summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c20
-rw-r--r--fs/proc/base.c296
-rw-r--r--fs/proc/fd.c14
-rw-r--r--fs/proc/generic.c17
-rw-r--r--fs/proc/inode.c5
-rw-r--r--fs/proc/internal.h27
-rw-r--r--fs/proc/kcore.c6
-rw-r--r--fs/proc/loadavg.c2
-rw-r--r--fs/proc/proc_net.c7
-rw-r--r--fs/proc/proc_sysctl.c72
-rw-r--r--fs/proc/root.c11
-rw-r--r--fs/proc/stat.c67
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/proc/task_nommu.c4
-rw-r--r--fs/proc/uptime.c7
-rw-r--r--fs/proc/vmcore.c8
16 files changed, 298 insertions, 270 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 51a4213afa2e..88c355574aa0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -60,6 +60,10 @@
#include <linux/tty.h>
#include <linux/string.h>
#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
#include <linux/proc_fs.h>
#include <linux/ioport.h>
#include <linux/uaccess.h>
@@ -401,8 +405,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
unsigned long long start_time;
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
- cputime_t cutime, cstime, utime, stime;
- cputime_t cgtime, gtime;
+ u64 cutime, cstime, utime, stime;
+ u64 cgtime, gtime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
@@ -497,10 +501,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, " ", cmin_flt);
seq_put_decimal_ull(m, " ", maj_flt);
seq_put_decimal_ull(m, " ", cmaj_flt);
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime));
seq_put_decimal_ll(m, " ", priority);
seq_put_decimal_ll(m, " ", nice);
seq_put_decimal_ll(m, " ", num_threads);
@@ -542,8 +546,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, " ", task->rt_priority);
seq_put_decimal_ull(m, " ", task->policy);
seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(gtime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cgtime));
if (mm && permitted) {
seq_put_decimal_ull(m, " ", mm->start_data);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 87c9a9aacda3..c87b6b9a8a76 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -85,6 +85,11 @@
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
+#include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/stat.h>
#include <linux/flex_array.h>
#include <linux/posix-timers.h>
#ifdef CONFIG_HARDWALL
@@ -292,101 +297,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
}
} else {
/*
- * Command line (1 string) occupies ARGV and maybe
- * extends into ENVP.
- */
- if (len1 + len2 <= *pos)
- goto skip_argv_envp;
- if (len1 <= *pos)
- goto skip_argv;
-
- p = arg_start + *pos;
- len = len1 - *pos;
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /*
- * Command line can be shorter than whole ARGV
- * even if last "marker" byte says it is not.
- */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
- }
-
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
- }
-skip_argv:
- /*
* Command line (1 string) occupies ARGV and
* extends into ENVP.
*/
- if (len1 <= *pos) {
- p = env_start + *pos - len1;
- len = len1 + len2 - *pos;
- } else {
- p = env_start;
- len = len2;
+ struct {
+ unsigned long p;
+ unsigned long len;
+ } cmdline[2] = {
+ { .p = arg_start, .len = len1 },
+ { .p = env_start, .len = len2 },
+ };
+ loff_t pos1 = *pos;
+ unsigned int i;
+
+ i = 0;
+ while (i < 2 && pos1 >= cmdline[i].len) {
+ pos1 -= cmdline[i].len;
+ i++;
}
- while (count > 0 && len > 0) {
- unsigned int _count, l;
- int nr_read;
- bool final;
-
- _count = min3(count, len, PAGE_SIZE);
- nr_read = access_remote_vm(mm, p, page, _count, 0);
- if (nr_read < 0)
- rv = nr_read;
- if (nr_read <= 0)
- goto out_free_page;
-
- /* Find EOS. */
- final = false;
- l = strnlen(page, nr_read);
- if (l < nr_read) {
- nr_read = l;
- final = true;
+ while (i < 2) {
+ p = cmdline[i].p + pos1;
+ len = cmdline[i].len - pos1;
+ while (count > 0 && len > 0) {
+ unsigned int _count, l;
+ int nr_read;
+ bool final;
+
+ _count = min3(count, len, PAGE_SIZE);
+ nr_read = access_remote_vm(mm, p, page, _count, 0);
+ if (nr_read < 0)
+ rv = nr_read;
+ if (nr_read <= 0)
+ goto out_free_page;
+
+ /*
+ * Command line can be shorter than whole ARGV
+ * even if last "marker" byte says it is not.
+ */
+ final = false;
+ l = strnlen(page, nr_read);
+ if (l < nr_read) {
+ nr_read = l;
+ final = true;
+ }
+
+ if (copy_to_user(buf, page, nr_read)) {
+ rv = -EFAULT;
+ goto out_free_page;
+ }
+
+ p += nr_read;
+ len -= nr_read;
+ buf += nr_read;
+ count -= nr_read;
+ rv += nr_read;
+
+ if (final)
+ goto out_free_page;
}
- if (copy_to_user(buf, page, nr_read)) {
- rv = -EFAULT;
- goto out_free_page;
- }
-
- p += nr_read;
- len -= nr_read;
- buf += nr_read;
- count -= nr_read;
- rv += nr_read;
-
- if (final)
- goto out_free_page;
+ /* Only first chunk can be read partially. */
+ pos1 = 0;
+ i++;
}
-skip_argv_envp:
- ;
}
out_free_page:
@@ -729,11 +702,11 @@ static int proc_pid_permission(struct inode *inode, int mask)
task = get_proc_task(inode);
if (!task)
return -ESRCH;
- has_perms = has_pid_permissions(pid, task, 1);
+ has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
put_task_struct(task);
if (!has_perms) {
- if (pid->hide_pid == 2) {
+ if (pid->hide_pid == HIDEPID_INVISIBLE) {
/*
* Let's make getdents(), stat(), and open()
* consistent with each other. If a process
@@ -798,7 +771,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
if (!IS_ERR_OR_NULL(mm)) {
/* ensure this mm_struct can't be freed */
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
/* but do not pin its memory */
mmput(mm);
}
@@ -845,7 +818,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
return -ENOMEM;
copied = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
@@ -953,7 +926,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
return -ENOMEM;
ret = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
down_read(&mm->mmap_sem);
@@ -1096,7 +1069,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
if (p) {
if (atomic_read(&p->mm->mm_users) > 1) {
mm = p->mm;
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
}
task_unlock(p);
}
@@ -1667,12 +1640,63 @@ const struct inode_operations proc_pid_link_inode_operations = {
/* building an inode */
+void task_dump_owner(struct task_struct *task, mode_t mode,
+ kuid_t *ruid, kgid_t *rgid)
+{
+ /* Depending on the state of dumpable compute who should own a
+ * proc file for a task.
+ */
+ const struct cred *cred;
+ kuid_t uid;
+ kgid_t gid;
+
+ /* Default to the tasks effective ownership */
+ rcu_read_lock();
+ cred = __task_cred(task);
+ uid = cred->euid;
+ gid = cred->egid;
+ rcu_read_unlock();
+
+ /*
+ * Before the /proc/pid/status file was created the only way to read
+ * the effective uid of a /process was to stat /proc/pid. Reading
+ * /proc/pid/status is slow enough that procps and other packages
+ * kept stating /proc/pid. To keep the rules in /proc simple I have
+ * made this apply to all per process world readable and executable
+ * directories.
+ */
+ if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
+ struct mm_struct *mm;
+ task_lock(task);
+ mm = task->mm;
+ /* Make non-dumpable tasks owned by some root */
+ if (mm) {
+ if (get_dumpable(mm) != SUID_DUMP_USER) {
+ struct user_namespace *user_ns = mm->user_ns;
+
+ uid = make_kuid(user_ns, 0);
+ if (!uid_valid(uid))
+ uid = GLOBAL_ROOT_UID;
+
+ gid = make_kgid(user_ns, 0);
+ if (!gid_valid(gid))
+ gid = GLOBAL_ROOT_GID;
+ }
+ } else {
+ uid = GLOBAL_ROOT_UID;
+ gid = GLOBAL_ROOT_GID;
+ }
+ task_unlock(task);
+ }
+ *ruid = uid;
+ *rgid = gid;
+}
+
struct inode *proc_pid_make_inode(struct super_block * sb,
struct task_struct *task, umode_t mode)
{
struct inode * inode;
struct proc_inode *ei;
- const struct cred *cred;
/* We need a new inode */
@@ -1694,13 +1718,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
if (!ei->pid)
goto out_unlock;
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
security_task_to_inode(task, inode);
out:
@@ -1711,12 +1729,12 @@ out_unlock:
return NULL;
}
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct task_struct *task;
- const struct cred *cred;
- struct pid_namespace *pid = dentry->d_sb->s_fs_info;
+ struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
@@ -1725,7 +1743,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
stat->gid = GLOBAL_ROOT_GID;
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if (!has_pid_permissions(pid, task, 2)) {
+ if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
rcu_read_unlock();
/*
* This doesn't prevent learning whether PID exists,
@@ -1733,12 +1751,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
*/
return -ENOENT;
}
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- cred = __task_cred(task);
- stat->uid = cred->euid;
- stat->gid = cred->egid;
- }
+ task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
}
rcu_read_unlock();
return 0;
@@ -1754,18 +1767,11 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
* Rewrite the inode's ownerships here because the owning task may have
* performed a setuid(), etc.
*
- * Before the /proc/pid/status file was created the only way to read
- * the effective uid of a /process was to stat /proc/pid. Reading
- * /proc/pid/status is slow enough that procps and other packages
- * kept stating /proc/pid. To keep the rules in /proc simple I have
- * made this apply to all per process world readable and executable
- * directories.
*/
int pid_revalidate(struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct task_struct *task;
- const struct cred *cred;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1774,17 +1780,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
task = get_proc_task(inode);
if (task) {
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+
inode->i_mode &= ~(S_ISUID | S_ISGID);
security_task_to_inode(task, inode);
put_task_struct(task);
@@ -1881,7 +1878,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
bool exact_vma_exists = false;
struct mm_struct *mm = NULL;
struct task_struct *task;
- const struct cred *cred;
struct inode *inode;
int status = 0;
@@ -1906,16 +1902,8 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
mmput(mm);
if (exact_vma_exists) {
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
security_task_to_inode(task, inode);
status = 1;
}
@@ -2179,7 +2167,7 @@ static const struct file_operations proc_map_files_operations = {
.llseek = generic_file_llseek,
};
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
struct timers_private {
struct pid *pid;
struct task_struct *task;
@@ -2488,6 +2476,12 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
length = -ESRCH;
if (!task)
goto out_no_task;
+
+ /* A task may only write its own attributes. */
+ length = -EACCES;
+ if (current != task)
+ goto out;
+
if (count > PAGE_SIZE)
count = PAGE_SIZE;
@@ -2503,14 +2497,13 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
}
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
+ length = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
if (length < 0)
goto out_free;
- length = security_setprocattr(task,
- (char*)file->f_path.dentry->d_name.name,
+ length = security_setprocattr(file->f_path.dentry->d_name.name,
page, count);
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&current->signal->cred_guard_mutex);
out_free:
kfree(page);
out:
@@ -2936,7 +2929,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
REG("timers", S_IRUGO, proc_timers_operations),
#endif
REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
@@ -3181,7 +3174,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
int len;
cond_resched();
- if (!has_pid_permissions(ns, iter.task, 2))
+ if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
continue;
len = snprintf(name, sizeof(name), "%d", iter.tgid);
@@ -3524,9 +3517,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
-static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_task_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct task_struct *p = get_proc_task(inode);
generic_fillattr(inode, stat);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 4274f83bf100..c330495c3115 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/dcache.h>
#include <linux/path.h>
@@ -84,7 +84,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
{
struct files_struct *files;
struct task_struct *task;
- const struct cred *cred;
struct inode *inode;
unsigned int fd;
@@ -108,16 +107,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
rcu_read_unlock();
put_files_struct(files);
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
if (S_ISLNK(inode->i_mode)) {
unsigned i_mode = S_IFLNK;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f6a01f09f79d..ee27feb34cf4 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -57,9 +57,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
struct rb_node *node = dir->subdir.rb_node;
while (node) {
- struct proc_dir_entry *de = container_of(node,
- struct proc_dir_entry,
- subdir_node);
+ struct proc_dir_entry *de = rb_entry(node,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(len, name, de);
if (result < 0)
@@ -80,8 +80,9 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
/* Figure out where to put new node */
while (*new) {
- struct proc_dir_entry *this =
- container_of(*new, struct proc_dir_entry, subdir_node);
+ struct proc_dir_entry *this = rb_entry(*new,
+ struct proc_dir_entry,
+ subdir_node);
int result = proc_match(de->namelen, de->name, this);
parent = *new;
@@ -117,10 +118,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
return 0;
}
-static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int proc_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct proc_dir_entry *de = PDE(inode);
if (de && de->nlink)
set_nlink(inode, de->nlink);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 842a5ff5b85c..2cc7a8030275 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode)
de = PDE(inode);
if (de)
pde_put(de);
+
head = PROC_I(inode)->sysctl;
if (head) {
RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
- sysctl_head_put(head);
+ proc_sys_evict_inode(inode, head);
}
}
@@ -106,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
- if (pid->hide_pid != 0)
+ if (pid->hide_pid != HIDEPID_OFF)
seq_printf(seq, ",hidepid=%u", pid->hide_pid);
return 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 2de5194ba378..c5ae09b6c726 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,8 @@
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/binfmts.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
struct ctl_table_header;
struct mempolicy;
@@ -65,6 +67,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
+ struct list_head sysctl_inodes;
const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
};
@@ -97,20 +100,8 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
-static inline int task_dumpable(struct task_struct *task)
-{
- int dumpable = 0;
- struct mm_struct *mm;
-
- task_lock(task);
- mm = task->mm;
- if (mm)
- dumpable = get_dumpable(mm);
- task_unlock(task);
- if (dumpable == SUID_DUMP_USER)
- return 1;
- return 0;
-}
+void task_dump_owner(struct task_struct *task, mode_t mode,
+ kuid_t *ruid, kgid_t *rgid);
static inline unsigned name_to_int(const struct qstr *qstr)
{
@@ -160,7 +151,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
* base.c
*/
extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int proc_setattr(struct dentry *, struct iattr *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
extern int pid_revalidate(struct dentry *, unsigned int);
@@ -249,10 +240,12 @@ extern void proc_thread_self_init(void);
*/
#ifdef CONFIG_PROC_SYSCTL
extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *);
+extern void proc_sys_evict_inode(struct inode *inode,
+ struct ctl_table_header *head);
#else
static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
+static inline void proc_sys_evict_inode(struct inode *inode,
+ struct ctl_table_header *head) { }
#endif
/*
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..4ee55274f155 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -28,6 +28,7 @@
#include <linux/list.h>
#include <linux/ioport.h>
#include <linux/memory.h>
+#include <linux/sched/task.h>
#include <asm/sections.h>
#include "internal.h"
@@ -373,7 +374,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = 0;
+ if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index aec66e6c2060..983fce5c2418 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -3,6 +3,8 @@
#include <linux/pid_namespace.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
#include <linux/seq_file.h>
#include <linux/seqlock.h>
#include <linux/time.h>
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ffd72a6c6e04..d72fc40241d9 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/sched/task.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/mount.h>
@@ -140,10 +141,10 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
return de;
}
-static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
+static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct net *net;
net = get_proc_task_net(inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d4e37acd4821..8f91ec66baa3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -8,6 +8,7 @@
#include <linux/printk.h>
#include <linux/security.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
@@ -190,6 +191,7 @@ static void init_header(struct ctl_table_header *head,
head->set = set;
head->parent = NULL;
head->node = node;
+ INIT_LIST_HEAD(&head->inodes);
if (node) {
struct ctl_table *entry;
for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +261,27 @@ static void unuse_table(struct ctl_table_header *p)
complete(p->unregistering);
}
+/* called under sysctl_lock */
+static void proc_sys_prune_dcache(struct ctl_table_header *head)
+{
+ struct inode *inode, *prev = NULL;
+ struct proc_inode *ei;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
+ inode = igrab(&ei->vfs_inode);
+ if (inode) {
+ rcu_read_unlock();
+ iput(prev);
+ prev = inode;
+ d_prune_aliases(inode);
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ iput(prev);
+}
+
/* called under sysctl_lock, will reacquire if has to wait */
static void start_unregistering(struct ctl_table_header *p)
{
@@ -272,31 +295,22 @@ static void start_unregistering(struct ctl_table_header *p)
p->unregistering = &wait;
spin_unlock(&sysctl_lock);
wait_for_completion(&wait);
- spin_lock(&sysctl_lock);
} else {
/* anything non-NULL; we'll never dereference it */
p->unregistering = ERR_PTR(-EINVAL);
+ spin_unlock(&sysctl_lock);
}
/*
+ * Prune dentries for unregistered sysctls: namespaced sysctls
+ * can have duplicate names and contaminate dcache very badly.
+ */
+ proc_sys_prune_dcache(p);
+ /*
* do not remove from the list until nobody holds it; walking the
* list in do_sysctl() relies on that.
*/
- erase_header(p);
-}
-
-static void sysctl_head_get(struct ctl_table_header *head)
-{
- spin_lock(&sysctl_lock);
- head->count++;
- spin_unlock(&sysctl_lock);
-}
-
-void sysctl_head_put(struct ctl_table_header *head)
-{
spin_lock(&sysctl_lock);
- if (!--head->count)
- kfree_rcu(head, rcu);
- spin_unlock(&sysctl_lock);
+ erase_header(p);
}
static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
@@ -440,10 +454,20 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
inode->i_ino = get_next_ino();
- sysctl_head_get(head);
ei = PROC_I(inode);
+
+ spin_lock(&sysctl_lock);
+ if (unlikely(head->unregistering)) {
+ spin_unlock(&sysctl_lock);
+ iput(inode);
+ inode = NULL;
+ goto out;
+ }
ei->sysctl = head;
ei->sysctl_entry = table;
+ list_add_rcu(&ei->sysctl_inodes, &head->inodes);
+ head->count++;
+ spin_unlock(&sysctl_lock);
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_mode = table->mode;
@@ -466,6 +490,15 @@ out:
return inode;
}
+void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
+{
+ spin_lock(&sysctl_lock);
+ list_del_rcu(&PROC_I(inode)->sysctl_inodes);
+ if (!--head->count)
+ kfree_rcu(head, rcu);
+ spin_unlock(&sysctl_lock);
+}
+
static struct ctl_table_header *grab_header(struct inode *inode)
{
struct ctl_table_header *head = PROC_I(inode)->sysctl;
@@ -769,9 +802,10 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
-static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_sys_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- struct inode *inode = d_inode(dentry);
+ struct inode *inode = d_inode(path->dentry);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 1988440b2049..deecb397daa3 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -14,12 +14,14 @@
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/sched/stat.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/user_namespace.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
#include <linux/parser.h>
+#include <linux/cred.h>
#include "internal.h"
@@ -58,7 +60,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid)
case Opt_hidepid:
if (match_int(&args[0], &option))
return 0;
- if (option < 0 || option > 2) {
+ if (option < HIDEPID_OFF ||
+ option > HIDEPID_INVISIBLE) {
pr_err("proc: hidepid value must be between 0 and 2.\n");
return 0;
}
@@ -148,10 +151,10 @@ void __init proc_root_init(void)
proc_sys_init();
}
-static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
-)
+static int proc_root_getattr(const struct path *path, struct kstat *stat,
+ u32 request_mask, unsigned int query_flags)
{
- generic_fillattr(d_inode(dentry), stat);
+ generic_fillattr(d_inode(path->dentry), stat);
stat->nlink = proc_root.nlink + nr_processes();
return 0;
}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index d700c42b3572..bd4e55f4aa20 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -5,11 +5,12 @@
#include <linux/kernel_stat.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
+#include <linux/sched/stat.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/irqnr.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
#include <linux/tick.h>
#ifndef arch_irq_stat_cpu
@@ -21,9 +22,9 @@
#ifdef arch_idle_time
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
{
- cputime64_t idle;
+ u64 idle;
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
@@ -31,9 +32,9 @@ static cputime64_t get_idle_time(int cpu)
return idle;
}
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
{
- cputime64_t iowait;
+ u64 iowait;
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
if (cpu_online(cpu) && nr_iowait_cpu(cpu))
@@ -45,32 +46,32 @@ static cputime64_t get_iowait_time(int cpu)
static u64 get_idle_time(int cpu)
{
- u64 idle, idle_time = -1ULL;
+ u64 idle, idle_usecs = -1ULL;
if (cpu_online(cpu))
- idle_time = get_cpu_idle_time_us(cpu, NULL);
+ idle_usecs = get_cpu_idle_time_us(cpu, NULL);
- if (idle_time == -1ULL)
+ if (idle_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
else
- idle = usecs_to_cputime64(idle_time);
+ idle = idle_usecs * NSEC_PER_USEC;
return idle;
}
static u64 get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = -1ULL;
+ u64 iowait, iowait_usecs = -1ULL;
if (cpu_online(cpu))
- iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
- if (iowait_time == -1ULL)
+ if (iowait_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else
- iowait = usecs_to_cputime64(iowait_time);
+ iowait = iowait_usecs * NSEC_PER_USEC;
return iowait;
}
@@ -115,16 +116,16 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();
- seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
seq_putc(p, '\n');
for_each_online_cpu(i) {
@@ -140,16 +141,16 @@ static int show_stat(struct seq_file *p, void *v)
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
seq_putc(p, '\n');
}
seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8f96a49178d0..f08bd31c1081 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/swap.h>
+#include <linux/sched/mm.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
@@ -167,7 +168,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
@@ -1352,7 +1353,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end_vaddr;
int ret = 0, copied = 0;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
goto out;
ret = -EINVAL;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 37175621e890..23266694db11 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -7,6 +7,8 @@
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
+#include <linux/sched/mm.h>
+
#include "internal.h"
/*
@@ -219,7 +221,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 33de567c25af..7981c4ffe787 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -5,23 +5,20 @@
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
-#include <linux/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
- u64 idletime;
u64 nsec;
u32 rem;
int i;
- idletime = 0;
+ nsec = 0;
for_each_possible_cpu(i)
- idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+ nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
get_monotonic_boottime(&uptime);
- nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 5105b1599981..885d445afa0d 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -265,10 +265,10 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pgoff_t index = vmf->pgoff;
struct page *page;
loff_t offset;
@@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
}
return 0;
fail:
- do_munmap(vma->vm_mm, from, len);
+ do_munmap(vma->vm_mm, from, len, NULL);
return -EAGAIN;
}
@@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
return 0;
fail:
- do_munmap(vma->vm_mm, vma->vm_start, len);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
return -EAGAIN;
}
#else