summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig6
-rw-r--r--fs/proc/array.c52
-rw-r--r--fs/proc/base.c264
-rw-r--r--fs/proc/bootconfig.c15
-rw-r--r--fs/proc/cpuinfo.c1
-rw-r--r--fs/proc/devices.c1
-rw-r--r--fs/proc/generic.c40
-rw-r--r--fs/proc/inode.c293
-rw-r--r--fs/proc/internal.h10
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/kmsg.c1
-rw-r--r--fs/proc/meminfo.c16
-rw-r--r--fs/proc/nommu.c1
-rw-r--r--fs/proc/proc_net.c19
-rw-r--r--fs/proc/proc_sysctl.c241
-rw-r--r--fs/proc/root.c164
-rw-r--r--fs/proc/self.c10
-rw-r--r--fs/proc/stat.c1
-rw-r--r--fs/proc/task_mmu.c153
-rw-r--r--fs/proc/task_nommu.c18
-rw-r--r--fs/proc/thread_self.c10
-rw-r--r--fs/proc/vmcore.c6
22 files changed, 859 insertions, 466 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 27ef84d99f59..c930001056f9 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -23,7 +23,7 @@ config PROC_FS
/proc" or the equivalent line in /etc/fstab does the job.
The /proc file system is explained in the file
- <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+ <file:Documentation/filesystems/proc.rst> and on the proc(5) manpage
("man 5 proc").
This option will enlarge your kernel by about 67 KB. Several
@@ -66,7 +66,7 @@ config PROC_SYSCTL
depends on PROC_FS
select SYSCTL
default y
- ---help---
+ help
The sysctl interface provides a means of dynamically changing
certain kernel parameters and variables on the fly without requiring
a recompile of the kernel or reboot of the system. The primary
@@ -95,7 +95,7 @@ config PROC_CHILDREN
default n
help
Provides a fast way to retrieve first level children pids of a task. See
- <file:Documentation/filesystems/proc.txt> for more information.
+ <file:Documentation/filesystems/proc.rst> for more information.
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5efaf3708ec6..65ec2029fa80 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -92,7 +92,6 @@
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include "internal.h"
@@ -248,8 +247,8 @@ void render_sigset_t(struct seq_file *m, const char *header,
seq_putc(m, '\n');
}
-static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
- sigset_t *catch)
+static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *sigign,
+ sigset_t *sigcatch)
{
struct k_sigaction *k;
int i;
@@ -257,9 +256,9 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
k = p->sighand->action;
for (i = 1; i <= _NSIG; ++i, ++k) {
if (k->sa.sa_handler == SIG_IGN)
- sigaddset(ign, i);
+ sigaddset(sigign, i);
else if (k->sa.sa_handler != SIG_DFL)
- sigaddset(catch, i);
+ sigaddset(sigcatch, i);
}
}
@@ -342,6 +341,8 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
+ seq_put_decimal_ull(m, "\nSeccomp_filters:\t",
+ atomic_read(&p->seccomp.filter_count));
#endif
seq_puts(m, "\nSpeculation_Store_Bypass:\t");
switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) {
@@ -635,28 +636,35 @@ int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- unsigned long size = 0, resident = 0, shared = 0, text = 0, data = 0;
struct mm_struct *mm = get_task_mm(task);
if (mm) {
+ unsigned long size;
+ unsigned long resident = 0;
+ unsigned long shared = 0;
+ unsigned long text = 0;
+ unsigned long data = 0;
+
size = task_statm(mm, &shared, &text, &data, &resident);
mmput(mm);
- }
- /*
- * For quick read, open code by putting numbers directly
- * expected format is
- * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
- * size, resident, shared, text, data);
- */
- seq_put_decimal_ull(m, "", size);
- seq_put_decimal_ull(m, " ", resident);
- seq_put_decimal_ull(m, " ", shared);
- seq_put_decimal_ull(m, " ", text);
- seq_put_decimal_ull(m, " ", 0);
- seq_put_decimal_ull(m, " ", data);
- seq_put_decimal_ull(m, " ", 0);
- seq_putc(m, '\n');
+ /*
+ * For quick read, open code by putting numbers directly
+ * expected format is
+ * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n",
+ * size, resident, shared, text, data);
+ */
+ seq_put_decimal_ull(m, "", size);
+ seq_put_decimal_ull(m, " ", resident);
+ seq_put_decimal_ull(m, " ", shared);
+ seq_put_decimal_ull(m, " ", text);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_put_decimal_ull(m, " ", data);
+ seq_put_decimal_ull(m, " ", 0);
+ seq_putc(m, '\n');
+ } else {
+ seq_write(m, "0 0 0 0 0 0 0\n", 14);
+ }
return 0;
}
@@ -721,7 +729,7 @@ static int children_seq_show(struct seq_file *seq, void *v)
{
struct inode *inode = file_inode(seq->file);
- seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
+ seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode->i_sb)));
return 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c7c64272b0fa..617db4e0faa0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -405,11 +405,11 @@ print0:
static int lock_trace(struct task_struct *task)
{
- int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ int err = mutex_lock_killable(&task->signal->exec_update_mutex);
if (err)
return err;
if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&task->signal->exec_update_mutex);
return -EPERM;
}
return 0;
@@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task)
static void unlock_trace(struct task_struct *task)
{
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&task->signal->exec_update_mutex);
}
#ifdef CONFIG_STACKTRACE
@@ -551,8 +551,17 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
{
unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
+ long badness;
+
+ badness = oom_badness(task, totalpages);
+ /*
+ * Special case OOM_SCORE_ADJ_MIN for all others scale the
+ * badness value into [0, 2000] range which we have been
+ * exporting for a long time so userspace might depend on it.
+ */
+ if (badness != LONG_MIN)
+ points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
- points = oom_badness(task, totalpages) * 1000 / totalpages;
seq_printf(m, "%lu\n", points);
return 0;
@@ -697,13 +706,21 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
* May current process learn task's sched/cmdline info (for hide_pid_min=1)
* or euid/egid (for hide_pid_min=2)?
*/
-static bool has_pid_permissions(struct pid_namespace *pid,
+static bool has_pid_permissions(struct proc_fs_info *fs_info,
struct task_struct *task,
- int hide_pid_min)
+ enum proc_hidepid hide_pid_min)
{
- if (pid->hide_pid < hide_pid_min)
+ /*
+ * If 'hidpid' mount option is set force a ptrace check,
+ * we indicate that we are using a filesystem syscall
+ * by passing PTRACE_MODE_READ_FSCREDS
+ */
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
+ return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
+
+ if (fs_info->hide_pid < hide_pid_min)
return true;
- if (in_group_p(pid->pid_gid))
+ if (in_group_p(fs_info->pid_gid))
return true;
return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
}
@@ -711,18 +728,18 @@ static bool has_pid_permissions(struct pid_namespace *pid,
static int proc_pid_permission(struct inode *inode, int mask)
{
- struct pid_namespace *pid = proc_pid_ns(inode);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
bool has_perms;
task = get_proc_task(inode);
if (!task)
return -ESRCH;
- has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
+ has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
put_task_struct(task);
if (!has_perms) {
- if (pid->hide_pid == HIDEPID_INVISIBLE) {
+ if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
/*
* Let's make getdents(), stat(), and open()
* consistent with each other. If a process
@@ -746,7 +763,7 @@ static const struct inode_operations proc_def_inode_operations = {
static int proc_single_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
struct pid *pid = proc_pid(inode);
struct task_struct *task;
int ret;
@@ -1415,7 +1432,7 @@ static const struct file_operations proc_fail_nth_operations = {
static int sched_show(struct seq_file *m, void *v)
{
struct inode *inode = m->private;
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
struct task_struct *p;
p = get_proc_task(inode);
@@ -1573,6 +1590,7 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
noffsets = 0;
for (pos = kbuf; pos; pos = next_line) {
struct proc_timens_offset *off = &offsets[noffsets];
+ char clock[10];
int err;
/* Find the end of line and ensure we don't look past it */
@@ -1584,10 +1602,21 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
next_line = NULL;
}
- err = sscanf(pos, "%u %lld %lu", &off->clockid,
+ err = sscanf(pos, "%9s %lld %lu", clock,
&off->val.tv_sec, &off->val.tv_nsec);
if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
goto out;
+
+ clock[sizeof(clock) - 1] = 0;
+ if (strcmp(clock, "monotonic") == 0 ||
+ strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
+ off->clockid = CLOCK_MONOTONIC;
+ else if (strcmp(clock, "boottime") == 0 ||
+ strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
+ off->clockid = CLOCK_BOOTTIME;
+ else
+ goto out;
+
noffsets++;
if (noffsets == ARRAY_SIZE(offsets)) {
if (next_line)
@@ -1834,11 +1863,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
*rgid = gid;
}
+void proc_pid_evict_inode(struct proc_inode *ei)
+{
+ struct pid *pid = ei->pid;
+
+ if (S_ISDIR(ei->vfs_inode.i_mode)) {
+ spin_lock(&pid->lock);
+ hlist_del_init_rcu(&ei->sibling_inodes);
+ spin_unlock(&pid->lock);
+ }
+
+ put_pid(pid);
+}
+
struct inode *proc_pid_make_inode(struct super_block * sb,
struct task_struct *task, umode_t mode)
{
struct inode * inode;
struct proc_inode *ei;
+ struct pid *pid;
/* We need a new inode */
@@ -1856,10 +1899,18 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
/*
* grab the reference to task.
*/
- ei->pid = get_task_pid(task, PIDTYPE_PID);
- if (!ei->pid)
+ pid = get_task_pid(task, PIDTYPE_PID);
+ if (!pid)
goto out_unlock;
+ /* Let the pid remember us for quick removal */
+ ei->pid = pid;
+ if (S_ISDIR(mode)) {
+ spin_lock(&pid->lock);
+ hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
+ spin_unlock(&pid->lock);
+ }
+
task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
security_task_to_inode(task, inode);
@@ -1875,7 +1926,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- struct pid_namespace *pid = proc_pid_ns(inode);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
generic_fillattr(inode, stat);
@@ -1885,7 +1936,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
rcu_read_lock();
task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task) {
- if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) {
+ if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
rcu_read_unlock();
/*
* This doesn't prevent learning whether PID exists,
@@ -2070,11 +2121,11 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
- status = down_read_killable(&mm->mmap_sem);
+ status = mmap_read_lock_killable(mm);
if (!status) {
exact_vma_exists = !!find_exact_vma(mm, vm_start,
vm_end);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
}
}
@@ -2121,7 +2172,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
- rc = down_read_killable(&mm->mmap_sem);
+ rc = mmap_read_lock_killable(mm);
if (rc)
goto out_mmput;
@@ -2132,7 +2183,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
path_get(path);
rc = 0;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_mmput:
mmput(mm);
@@ -2147,16 +2198,16 @@ struct map_files_info {
};
/*
- * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
- * symlinks may be used to bypass permissions on ancestor directories in the
- * path to the file in question.
+ * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
+ * to concerns about how the symlinks may be used to bypass permissions on
+ * ancestor directories in the path to the file in question.
*/
static const char *
proc_map_files_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- if (!capable(CAP_SYS_ADMIN))
+ if (!checkpoint_restore_ns_capable(&init_user_ns))
return ERR_PTR(-EPERM);
return proc_pid_get_link(dentry, inode, done);
@@ -2222,7 +2273,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out_put_task;
result = ERR_PTR(-EINTR);
- if (down_read_killable(&mm->mmap_sem))
+ if (mmap_read_lock_killable(mm))
goto out_put_mm;
result = ERR_PTR(-ENOENT);
@@ -2235,7 +2286,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
(void *)(unsigned long)vma->vm_file->f_mode);
out_no_vma:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_put_mm:
mmput(mm);
out_put_task:
@@ -2280,7 +2331,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
if (!mm)
goto out_put_task;
- ret = down_read_killable(&mm->mmap_sem);
+ ret = mmap_read_lock_killable(mm);
if (ret) {
mmput(mm);
goto out_put_task;
@@ -2291,11 +2342,11 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
/*
* We need two passes here:
*
- * 1) Collect vmas of mapped files with mmap_sem taken
- * 2) Release mmap_sem and instantiate entries
+ * 1) Collect vmas of mapped files with mmap_lock taken
+ * 2) Release mmap_lock and instantiate entries
*
* otherwise we get lockdep complained, since filldir()
- * routine might require mmap_sem taken in might_fault().
+ * routine might require mmap_lock taken in might_fault().
*/
for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
@@ -2307,7 +2358,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
goto out_put_task;
}
@@ -2316,7 +2367,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
p->end = vma->vm_end;
p->mode = vma->vm_file->f_mode;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
for (i = 0; i < nr_files; i++) {
@@ -2436,7 +2487,7 @@ static int proc_timers_open(struct inode *inode, struct file *file)
return -ENOMEM;
tp->pid = proc_pid(inode);
- tp->ns = proc_pid_ns(inode);
+ tp->ns = proc_pid_ns(inode->i_sb);
return 0;
}
@@ -2736,6 +2787,15 @@ static const struct pid_entry smack_attr_dir_stuff[] = {
LSM_DIR_OPS(smack);
#endif
+#ifdef CONFIG_SECURITY_APPARMOR
+static const struct pid_entry apparmor_attr_dir_stuff[] = {
+ ATTR("apparmor", "current", 0666),
+ ATTR("apparmor", "prev", 0444),
+ ATTR("apparmor", "exec", 0666),
+};
+LSM_DIR_OPS(apparmor);
+#endif
+
static const struct pid_entry attr_dir_stuff[] = {
ATTR(NULL, "current", 0666),
ATTR(NULL, "prev", 0444),
@@ -2747,6 +2807,10 @@ static const struct pid_entry attr_dir_stuff[] = {
DIR("smack", 0555,
proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
#endif
+#ifdef CONFIG_SECURITY_APPARMOR
+ DIR("apparmor", 0555,
+ proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
+#endif
};
static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -2861,7 +2925,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
unsigned long flags;
int result;
- result = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ result = mutex_lock_killable(&task->signal->exec_update_mutex);
if (result)
return result;
@@ -2897,7 +2961,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
result = 0;
out_unlock:
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(&task->signal->exec_update_mutex);
return result;
}
@@ -3230,90 +3294,28 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
.permission = proc_pid_permission,
};
-static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
-{
- struct dentry *dentry, *leader, *dir;
- char buf[10 + 1];
- struct qstr name;
-
- name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
- /* no ->d_hash() rejects on procfs */
- dentry = d_hash_and_lookup(mnt->mnt_root, &name);
- if (dentry) {
- d_invalidate(dentry);
- dput(dentry);
- }
-
- if (pid == tgid)
- return;
-
- name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%u", tgid);
- leader = d_hash_and_lookup(mnt->mnt_root, &name);
- if (!leader)
- goto out;
-
- name.name = "task";
- name.len = strlen(name.name);
- dir = d_hash_and_lookup(leader, &name);
- if (!dir)
- goto out_put_leader;
-
- name.name = buf;
- name.len = snprintf(buf, sizeof(buf), "%u", pid);
- dentry = d_hash_and_lookup(dir, &name);
- if (dentry) {
- d_invalidate(dentry);
- dput(dentry);
- }
-
- dput(dir);
-out_put_leader:
- dput(leader);
-out:
- return;
-}
-
/**
- * proc_flush_task - Remove dcache entries for @task from the /proc dcache.
- * @task: task that should be flushed.
- *
- * When flushing dentries from proc, one needs to flush them from global
- * proc (proc_mnt) and from all the namespaces' procs this task was seen
- * in. This call is supposed to do all of this job.
+ * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
+ * @pid: pid that should be flushed.
*
- * Looks in the dcache for
- * /proc/@pid
- * /proc/@tgid/task/@pid
- * if either directory is present flushes it and all of it'ts children
- * from the dcache.
+ * This function walks a list of inodes (that belong to any proc
+ * filesystem) that are attached to the pid and flushes them from
+ * the dentry cache.
*
* It is safe and reasonable to cache /proc entries for a task until
* that task exits. After that they just clog up the dcache with
* useless entries, possibly causing useful dcache entries to be
- * flushed instead. This routine is proved to flush those useless
- * dcache entries at process exit time.
+ * flushed instead. This routine is provided to flush those useless
+ * dcache entries when a process is reaped.
*
* NOTE: This routine is just an optimization so it does not guarantee
- * that no dcache entries will exist at process exit time it
- * just makes it very unlikely that any will persist.
+ * that no dcache entries will exist after a process is reaped
+ * it just makes it very unlikely that any will persist.
*/
-void proc_flush_task(struct task_struct *task)
+void proc_flush_pid(struct pid *pid)
{
- int i;
- struct pid *pid, *tgid;
- struct upid *upid;
-
- pid = task_pid(task);
- tgid = task_tgid(task);
-
- for (i = 0; i <= pid->level; i++) {
- upid = &pid->numbers[i];
- proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
- tgid->numbers[i].nr);
- }
+ proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
}
static struct dentry *proc_pid_instantiate(struct dentry * dentry,
@@ -3340,6 +3342,7 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
{
struct task_struct *task;
unsigned tgid;
+ struct proc_fs_info *fs_info;
struct pid_namespace *ns;
struct dentry *result = ERR_PTR(-ENOENT);
@@ -3347,7 +3350,8 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
if (tgid == ~0U)
goto out;
- ns = dentry->d_sb->s_fs_info;
+ fs_info = proc_sb_info(dentry->d_sb);
+ ns = fs_info->pid_ns;
rcu_read_lock();
task = find_task_by_pid_ns(tgid, ns);
if (task)
@@ -3356,7 +3360,14 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
if (!task)
goto out;
+ /* Limit procfs to only ptraceable tasks */
+ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
+ if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
+ goto out_put_task;
+ }
+
result = proc_pid_instantiate(dentry, task, NULL);
+out_put_task:
put_task_struct(task);
out:
return result;
@@ -3382,20 +3393,8 @@ retry:
pid = find_ge_pid(iter.tgid, ns);
if (pid) {
iter.tgid = pid_nr_ns(pid, ns);
- iter.task = pid_task(pid, PIDTYPE_PID);
- /* What we to know is if the pid we have find is the
- * pid of a thread_group_leader. Testing for task
- * being a thread_group_leader is the obvious thing
- * todo but there is a window when it fails, due to
- * the pid transfer logic in de_thread.
- *
- * So we perform the straight forward test of seeing
- * if the pid we have found is the pid of a thread
- * group leader, and don't worry if the task we have
- * found doesn't happen to be a thread group leader.
- * As we don't care in the case of readdir.
- */
- if (!iter.task || !has_group_leader_pid(iter.task)) {
+ iter.task = pid_task(pid, PIDTYPE_TGID);
+ if (!iter.task) {
iter.tgid += 1;
goto retry;
}
@@ -3411,20 +3410,21 @@ retry:
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns = proc_pid_ns(file_inode(file));
+ struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
+ struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
return 0;
if (pos == TGID_OFFSET - 2) {
- struct inode *inode = d_inode(ns->proc_self);
+ struct inode *inode = d_inode(fs_info->proc_self);
if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
}
if (pos == TGID_OFFSET - 1) {
- struct inode *inode = d_inode(ns->proc_thread_self);
+ struct inode *inode = d_inode(fs_info->proc_thread_self);
if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
@@ -3438,7 +3438,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
unsigned int len;
cond_resched();
- if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE))
+ if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
continue;
len = snprintf(name, sizeof(name), "%u", iter.tgid);
@@ -3638,6 +3638,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
+ struct proc_fs_info *fs_info;
struct pid_namespace *ns;
struct dentry *result = ERR_PTR(-ENOENT);
@@ -3648,7 +3649,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
if (tid == ~0U)
goto out;
- ns = dentry->d_sb->s_fs_info;
+ fs_info = proc_sb_info(dentry->d_sb);
+ ns = fs_info->pid_ns;
rcu_read_lock();
task = find_task_by_pid_ns(tid, ns);
if (task)
@@ -3762,7 +3764,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = proc_pid_ns(inode);
+ ns = proc_pid_ns(inode->i_sb);
tid = (int)file->f_version;
file->f_version = 0;
for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
index 9955d75c0585..ad31ec4ad627 100644
--- a/fs/proc/bootconfig.c
+++ b/fs/proc/bootconfig.c
@@ -26,8 +26,9 @@ static int boot_config_proc_show(struct seq_file *m, void *v)
static int __init copy_xbc_key_value_list(char *dst, size_t size)
{
struct xbc_node *leaf, *vnode;
- const char *val;
char *key, *end = dst + size;
+ const char *val;
+ char q;
int ret = 0;
key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL);
@@ -41,16 +42,20 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
break;
dst += ret;
vnode = xbc_node_get_child(leaf);
- if (vnode && xbc_node_is_array(vnode)) {
+ if (vnode) {
xbc_array_for_each_value(vnode, val) {
- ret = snprintf(dst, rest(dst, end), "\"%s\"%s",
- val, vnode->next ? ", " : "\n");
+ if (strchr(val, '"'))
+ q = '\'';
+ else
+ q = '"';
+ ret = snprintf(dst, rest(dst, end), "%c%s%c%s",
+ q, val, q, vnode->next ? ", " : "\n");
if (ret < 0)
goto out;
dst += ret;
}
} else {
- ret = snprintf(dst, rest(dst, end), "\"%s\"\n", val);
+ ret = snprintf(dst, rest(dst, end), "\"\"\n");
if (ret < 0)
break;
dst += ret;
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index c1dea9b8222e..d0989a443c77 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -17,6 +17,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
}
static const struct proc_ops cpuinfo_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = cpuinfo_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 37d38697eaf8..837971e74109 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/blkdev.h>
static int devinfo_show(struct seq_file *f, void *v)
{
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 3faed94e4b65..2f9fa179194d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -269,6 +269,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
+ struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);
+
+ if (fs_info->pidonly == PROC_PIDONLY_ON)
+ return ERR_PTR(-ENOENT);
+
return proc_lookup_de(dir, dentry, PDE(dir));
}
@@ -325,6 +330,10 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
int proc_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
+
+ if (fs_info->pidonly == PROC_PIDONLY_ON)
+ return 1;
return proc_readdir_de(file, ctx, PDE(inode));
}
@@ -531,6 +540,12 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
return p;
}
+static inline void pde_set_flags(struct proc_dir_entry *pde)
+{
+ if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT)
+ pde->flags |= PROC_ENTRY_PERMANENT;
+}
+
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct proc_ops *proc_ops, void *data)
@@ -541,6 +556,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
if (!p)
return NULL;
p->proc_ops = proc_ops;
+ pde_set_flags(p);
return proc_register(parent, p);
}
EXPORT_SYMBOL(proc_create_data);
@@ -572,6 +588,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
}
static const struct proc_ops proc_seq_ops = {
+ /* not permanent -- can call into arbitrary seq_operations */
.proc_open = proc_seq_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
@@ -602,6 +619,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
}
static const struct proc_ops proc_single_ops = {
+ /* not permanent -- can call into arbitrary ->single_show */
.proc_open = proc_single_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
@@ -662,9 +680,13 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = pde_subdir_find(parent, fn, len);
if (de) {
- rb_erase(&de->subdir_node, &parent->subdir);
- if (S_ISDIR(de->mode)) {
- parent->nlink--;
+ if (unlikely(pde_is_permanent(de))) {
+ WARN(1, "removing permanent /proc entry '%s'", de->name);
+ de = NULL;
+ } else {
+ rb_erase(&de->subdir_node, &parent->subdir);
+ if (S_ISDIR(de->mode))
+ parent->nlink--;
}
}
write_unlock(&proc_subdir_lock);
@@ -700,12 +722,24 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
write_unlock(&proc_subdir_lock);
return -ENOENT;
}
+ if (unlikely(pde_is_permanent(root))) {
+ write_unlock(&proc_subdir_lock);
+ WARN(1, "removing permanent /proc entry '%s/%s'",
+ root->parent->name, root->name);
+ return -EINVAL;
+ }
rb_erase(&root->subdir_node, &parent->subdir);
de = root;
while (1) {
next = pde_subdir_first(de);
if (next) {
+ if (unlikely(pde_is_permanent(root))) {
+ write_unlock(&proc_subdir_lock);
+ WARN(1, "removing permanent /proc entry '%s/%s'",
+ next->parent->name, next->name);
+ return -EINVAL;
+ }
rb_erase(&next->subdir_node, &de->subdir);
de = next;
continue;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6da18316d209..28d6105e908e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -24,6 +24,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/mount.h>
+#include <linux/bug.h>
#include <linux/uaccess.h>
@@ -33,21 +34,27 @@ static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
struct ctl_table_header *head;
+ struct proc_inode *ei = PROC_I(inode);
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
/* Stop tracking associated processes */
- put_pid(PROC_I(inode)->pid);
+ if (ei->pid) {
+ proc_pid_evict_inode(ei);
+ ei->pid = NULL;
+ }
/* Let go of any associated proc directory entry */
- de = PDE(inode);
- if (de)
+ de = ei->pde;
+ if (de) {
pde_put(de);
+ ei->pde = NULL;
+ }
- head = PROC_I(inode)->sysctl;
+ head = ei->sysctl;
if (head) {
- RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
+ RCU_INIT_POINTER(ei->sysctl, NULL);
proc_sys_evict_inode(inode, head);
}
}
@@ -68,6 +75,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
+ INIT_HLIST_NODE(&ei->sibling_inodes);
ei->ns_ops = NULL;
return &ei->vfs_inode;
}
@@ -102,15 +110,84 @@ void __init proc_init_kmemcache(void)
BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE);
}
+void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock)
+{
+ struct inode *inode;
+ struct proc_inode *ei;
+ struct hlist_node *node;
+ struct super_block *old_sb = NULL;
+
+ rcu_read_lock();
+ for (;;) {
+ struct super_block *sb;
+ node = hlist_first_rcu(inodes);
+ if (!node)
+ break;
+ ei = hlist_entry(node, struct proc_inode, sibling_inodes);
+ spin_lock(lock);
+ hlist_del_init_rcu(&ei->sibling_inodes);
+ spin_unlock(lock);
+
+ inode = &ei->vfs_inode;
+ sb = inode->i_sb;
+ if ((sb != old_sb) && !atomic_inc_not_zero(&sb->s_active))
+ continue;
+ inode = igrab(inode);
+ rcu_read_unlock();
+ if (sb != old_sb) {
+ if (old_sb)
+ deactivate_super(old_sb);
+ old_sb = sb;
+ }
+ if (unlikely(!inode)) {
+ rcu_read_lock();
+ continue;
+ }
+
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *dir = d_find_any_alias(inode);
+ if (dir) {
+ d_invalidate(dir);
+ dput(dir);
+ }
+ } else {
+ struct dentry *dentry;
+ while ((dentry = d_find_alias(inode))) {
+ d_invalidate(dentry);
+ dput(dentry);
+ }
+ }
+ iput(inode);
+
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+ if (old_sb)
+ deactivate_super(old_sb);
+}
+
+static inline const char *hidepid2str(enum proc_hidepid v)
+{
+ switch (v) {
+ case HIDEPID_OFF: return "off";
+ case HIDEPID_NO_ACCESS: return "noaccess";
+ case HIDEPID_INVISIBLE: return "invisible";
+ case HIDEPID_NOT_PTRACEABLE: return "ptraceable";
+ }
+ WARN_ONCE(1, "bad hide_pid value: %d\n", v);
+ return "unknown";
+}
+
static int proc_show_options(struct seq_file *seq, struct dentry *root)
{
- struct super_block *sb = root->d_sb;
- struct pid_namespace *pid = sb->s_fs_info;
+ struct proc_fs_info *fs_info = proc_sb_info(root->d_sb);
- if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID))
- seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid));
- if (pid->hide_pid != HIDEPID_OFF)
- seq_printf(seq, ",hidepid=%u", pid->hide_pid);
+ if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID))
+ seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid));
+ if (fs_info->hide_pid != HIDEPID_OFF)
+ seq_printf(seq, ",hidepid=%s", hidepid2str(fs_info->hide_pid));
+ if (fs_info->pidonly != PROC_PIDONLY_OFF)
+ seq_printf(seq, ",subset=pid");
return 0;
}
@@ -139,6 +216,7 @@ static void unuse_pde(struct proc_dir_entry *pde)
/* pde is locked on entry, unlocked on exit */
static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
+ __releases(&pde->pde_unload_lock)
{
/*
* close() (proc_reg_release()) can't delete an entry and proceed:
@@ -195,135 +273,204 @@ void proc_entry_rundown(struct proc_dir_entry *de)
spin_unlock(&de->pde_unload_lock);
}
+static loff_t pde_lseek(struct proc_dir_entry *pde, struct file *file, loff_t offset, int whence)
+{
+ typeof_member(struct proc_ops, proc_lseek) lseek;
+
+ lseek = pde->proc_ops->proc_lseek;
+ if (!lseek)
+ lseek = default_llseek;
+ return lseek(file, offset, whence);
+}
+
static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_lseek) lseek;
- lseek = pde->proc_ops->proc_lseek;
- if (!lseek)
- lseek = default_llseek;
- rv = lseek(file, offset, whence);
+ if (pde_is_permanent(pde)) {
+ return pde_lseek(pde, file, offset, whence);
+ } else if (use_pde(pde)) {
+ rv = pde_lseek(pde, file, offset, whence);
unuse_pde(pde);
}
return rv;
}
+static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ typeof_member(struct proc_ops, proc_read) read;
+
+ read = pde->proc_ops->proc_read;
+ if (read)
+ return read(file, buf, count, ppos);
+ return -EIO;
+}
+
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_read) read;
- read = pde->proc_ops->proc_read;
- if (read)
- rv = read(file, buf, count, ppos);
+ if (pde_is_permanent(pde)) {
+ return pde_read(pde, file, buf, count, ppos);
+ } else if (use_pde(pde)) {
+ rv = pde_read(pde, file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}
+static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+ typeof_member(struct proc_ops, proc_write) write;
+
+ write = pde->proc_ops->proc_write;
+ if (write)
+ return write(file, buf, count, ppos);
+ return -EIO;
+}
+
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_write) write;
- write = pde->proc_ops->proc_write;
- if (write)
- rv = write(file, buf, count, ppos);
+ if (pde_is_permanent(pde)) {
+ return pde_write(pde, file, buf, count, ppos);
+ } else if (use_pde(pde)) {
+ rv = pde_write(pde, file, buf, count, ppos);
unuse_pde(pde);
}
return rv;
}
+static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts)
+{
+ typeof_member(struct proc_ops, proc_poll) poll;
+
+ poll = pde->proc_ops->proc_poll;
+ if (poll)
+ return poll(file, pts);
+ return DEFAULT_POLLMASK;
+}
+
static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
__poll_t rv = DEFAULT_POLLMASK;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_poll) poll;
- poll = pde->proc_ops->proc_poll;
- if (poll)
- rv = poll(file, pts);
+ if (pde_is_permanent(pde)) {
+ return pde_poll(pde, file, pts);
+ } else if (use_pde(pde)) {
+ rv = pde_poll(pde, file, pts);
unuse_pde(pde);
}
return rv;
}
+static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ typeof_member(struct proc_ops, proc_ioctl) ioctl;
+
+ ioctl = pde->proc_ops->proc_ioctl;
+ if (ioctl)
+ return ioctl(file, cmd, arg);
+ return -ENOTTY;
+}
+
static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_ioctl) ioctl;
- ioctl = pde->proc_ops->proc_ioctl;
- if (ioctl)
- rv = ioctl(file, cmd, arg);
+ if (pde_is_permanent(pde)) {
+ return pde_ioctl(pde, file, cmd, arg);
+ } else if (use_pde(pde)) {
+ rv = pde_ioctl(pde, file, cmd, arg);
unuse_pde(pde);
}
return rv;
}
#ifdef CONFIG_COMPAT
+static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
+
+ compat_ioctl = pde->proc_ops->proc_compat_ioctl;
+ if (compat_ioctl)
+ return compat_ioctl(file, cmd, arg);
+ return -ENOTTY;
+}
+
static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_compat_ioctl) compat_ioctl;
-
- compat_ioctl = pde->proc_ops->proc_compat_ioctl;
- if (compat_ioctl)
- rv = compat_ioctl(file, cmd, arg);
+ if (pde_is_permanent(pde)) {
+ return pde_compat_ioctl(pde, file, cmd, arg);
+ } else if (use_pde(pde)) {
+ rv = pde_compat_ioctl(pde, file, cmd, arg);
unuse_pde(pde);
}
return rv;
}
#endif
+static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma)
+{
+ typeof_member(struct proc_ops, proc_mmap) mmap;
+
+ mmap = pde->proc_ops->proc_mmap;
+ if (mmap)
+ return mmap(file, vma);
+ return -EIO;
+}
+
static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_mmap) mmap;
- mmap = pde->proc_ops->proc_mmap;
- if (mmap)
- rv = mmap(file, vma);
+ if (pde_is_permanent(pde)) {
+ return pde_mmap(pde, file, vma);
+ } else if (use_pde(pde)) {
+ rv = pde_mmap(pde, file, vma);
unuse_pde(pde);
}
return rv;
}
static unsigned long
-proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
+pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- unsigned long rv = -EIO;
+ typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
- if (use_pde(pde)) {
- typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
-
- get_area = pde->proc_ops->proc_get_unmapped_area;
+ get_area = pde->proc_ops->proc_get_unmapped_area;
#ifdef CONFIG_MMU
- if (!get_area)
- get_area = current->mm->get_unmapped_area;
+ if (!get_area)
+ get_area = current->mm->get_unmapped_area;
#endif
+ if (get_area)
+ return get_area(file, orig_addr, len, pgoff, flags);
+ return orig_addr;
+}
- if (get_area)
- rv = get_area(file, orig_addr, len, pgoff, flags);
- else
- rv = orig_addr;
+static unsigned long
+proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct proc_dir_entry *pde = PDE(file_inode(file));
+ unsigned long rv = -EIO;
+
+ if (pde_is_permanent(pde)) {
+ return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
+ } else if (use_pde(pde)) {
+ rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags);
unuse_pde(pde);
}
return rv;
@@ -331,12 +478,23 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
static int proc_reg_open(struct inode *inode, struct file *file)
{
+ struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
typeof_member(struct proc_ops, proc_open) open;
typeof_member(struct proc_ops, proc_release) release;
struct pde_opener *pdeo;
+ if (pde_is_permanent(pde)) {
+ open = pde->proc_ops->proc_open;
+ if (open)
+ rv = open(inode, file);
+ return rv;
+ }
+
+ if (fs_info->pidonly == PROC_PIDONLY_ON)
+ return -ENOENT;
+
/*
* Ensure that
* 1) PDE's ->release hook will be called no matter what
@@ -386,6 +544,17 @@ static int proc_reg_release(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
struct pde_opener *pdeo;
+
+ if (pde_is_permanent(pde)) {
+ typeof_member(struct proc_ops, proc_release) release;
+
+ release = pde->proc_ops->proc_release;
+ if (release) {
+ return release(inode, file);
+ }
+ return 0;
+ }
+
spin_lock(&pde->pde_unload_lock);
list_for_each_entry(pdeo, &pde->pde_openers, lh) {
if (pdeo->file == file) {
@@ -448,7 +617,7 @@ const struct inode_operations proc_link_inode_operations = {
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
- struct inode *inode = new_inode_pseudo(sb);
+ struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = de->low_ino;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 41587276798e..917cc85e3466 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,7 @@ struct proc_dir_entry {
struct rb_node subdir_node;
char *name;
umode_t mode;
+ u8 flags;
u8 namelen;
char inline_name[];
} __randomize_layout;
@@ -73,6 +74,11 @@ struct proc_dir_entry {
0)
#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
+static inline bool pde_is_permanent(const struct proc_dir_entry *pde)
+{
+ return pde->flags & PROC_ENTRY_PERMANENT;
+}
+
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
@@ -91,7 +97,7 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
- struct hlist_node sysctl_inodes;
+ struct hlist_node sibling_inodes;
const struct proc_ns_operations *ns_ops;
struct inode vfs_inode;
} __randomize_layout;
@@ -158,6 +164,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
extern const struct dentry_operations pid_dentry_operations;
extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int proc_setattr(struct dentry *, struct iattr *);
+extern void proc_pid_evict_inode(struct proc_inode *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
extern void pid_update_inode(struct task_struct *, struct inode *);
extern int pid_delete_dentry(const struct dentry *);
@@ -210,6 +217,7 @@ extern const struct inode_operations proc_pid_link_inode_operations;
extern const struct super_operations proc_sops;
void proc_init_kmemcache(void);
+void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock);
void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern void proc_entry_rundown(struct proc_dir_entry *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8ba492d44e68..e502414b3556 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -512,7 +512,8 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
* Using bounce buffer to bypass the
* hardened user copy kernel text checks.
*/
- if (probe_kernel_read(buf, (void *) start, tsz)) {
+ if (copy_from_kernel_nofault(buf, (void *)start,
+ tsz)) {
if (clear_user(buffer, tsz)) {
ret = -EFAULT;
goto out;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ec1b7d2fb773..b38ad552887f 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -50,6 +50,7 @@ static __poll_t kmsg_poll(struct file *file, poll_table *wait)
static const struct proc_ops kmsg_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_read = kmsg_read,
.proc_poll = kmsg_poll,
.proc_open = kmsg_open,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..887a5532e449 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -17,7 +17,6 @@
#include <linux/cma.h>
#endif
#include <asm/page.h>
-#include <asm/pgtable.h>
#include "internal.h"
void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
@@ -42,7 +41,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
si_meminfo(&i);
si_swapinfo(&i);
- committed = percpu_counter_read_positive(&vm_committed_as);
+ committed = vm_memory_committed();
cached = global_node_page_state(NR_FILE_PAGES) -
total_swapcache_pages() - i.bufferram;
@@ -53,8 +52,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
available = si_mem_available();
- sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE);
- sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE);
+ sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
+ sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
show_val_kb(m, "MemTotal: ", i.totalram);
show_val_kb(m, "MemFree: ", i.freeram);
@@ -102,12 +101,15 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SReclaimable: ", sreclaimable);
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
- global_zone_page_state(NR_KERNEL_STACK_KB));
+ global_node_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_node_page_state(NR_KERNEL_SCS_KB));
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
- show_val_kb(m, "NFS_Unstable: ",
- global_node_page_state(NR_UNSTABLE_NFS));
+ show_val_kb(m, "NFS_Unstable: ", 0);
show_val_kb(m, "Bounce: ",
global_zone_page_state(NR_BOUNCE));
show_val_kb(m, "WritebackTmp: ",
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 14c2badb8fd9..13452b32e2bd 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -22,7 +22,6 @@
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include <asm/tlb.h>
#include <asm/div64.h>
#include "internal.h"
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4888c5224442..ed8a6306990c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -98,6 +98,25 @@ static const struct proc_ops proc_net_seq_ops = {
.proc_release = seq_release_net,
};
+int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+#ifdef CONFIG_NET_NS
+ struct seq_net_private *p = priv_data;
+
+ p->net = get_net(current->nsproxy->net_ns);
+#endif
+ return 0;
+}
+
+void bpf_iter_fini_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+ struct seq_net_private *p = priv_data;
+
+ put_net(p->net);
+#endif
+}
+
struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
struct proc_dir_entry *parent, const struct seq_operations *ops,
unsigned int state_size, void *data)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c75bb4632ed1..6c1166ccdaea 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
+#include <linux/mount.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -267,42 +268,9 @@ static void unuse_table(struct ctl_table_header *p)
complete(p->unregistering);
}
-static void proc_sys_prune_dcache(struct ctl_table_header *head)
+static void proc_sys_invalidate_dcache(struct ctl_table_header *head)
{
- struct inode *inode;
- struct proc_inode *ei;
- struct hlist_node *node;
- struct super_block *sb;
-
- rcu_read_lock();
- for (;;) {
- node = hlist_first_rcu(&head->inodes);
- if (!node)
- break;
- ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
- spin_lock(&sysctl_lock);
- hlist_del_init_rcu(&ei->sysctl_inodes);
- spin_unlock(&sysctl_lock);
-
- inode = &ei->vfs_inode;
- sb = inode->i_sb;
- if (!atomic_inc_not_zero(&sb->s_active))
- continue;
- inode = igrab(inode);
- rcu_read_unlock();
- if (unlikely(!inode)) {
- deactivate_super(sb);
- rcu_read_lock();
- continue;
- }
-
- d_prune_aliases(inode);
- iput(inode);
- deactivate_super(sb);
-
- rcu_read_lock();
- }
- rcu_read_unlock();
+ proc_invalidate_siblings_dcache(&head->inodes, &sysctl_lock);
}
/* called under sysctl_lock, will reacquire if has to wait */
@@ -324,10 +292,10 @@ static void start_unregistering(struct ctl_table_header *p)
spin_unlock(&sysctl_lock);
}
/*
- * Prune dentries for unregistered sysctls: namespaced sysctls
+ * Invalidate dentries for unregistered sysctls: namespaced sysctls
* can have duplicate names and contaminate dcache very badly.
*/
- proc_sys_prune_dcache(p);
+ proc_sys_invalidate_dcache(p);
/*
* do not remove from the list until nobody holds it; walking the
* list in do_sysctl() relies on that.
@@ -483,7 +451,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
}
ei->sysctl = head;
ei->sysctl_entry = table;
- hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
+ hlist_add_head_rcu(&ei->sibling_inodes, &head->inodes);
head->count++;
spin_unlock(&sysctl_lock);
@@ -514,7 +482,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
{
spin_lock(&sysctl_lock);
- hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
+ hlist_del_init_rcu(&PROC_I(inode)->sibling_inodes);
if (!--head->count)
kfree_rcu(head, rcu);
spin_unlock(&sysctl_lock);
@@ -572,13 +540,13 @@ out:
return err;
}
-static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
+static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
size_t count, loff_t *ppos, int write)
{
struct inode *inode = file_inode(filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
- void *new_buf = NULL;
+ void *kbuf;
ssize_t error;
if (IS_ERR(head))
@@ -597,27 +565,42 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
if (!table->proc_handler)
goto out;
- error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
- ppos, &new_buf);
- if (error)
+ /* don't even try if the size is too large */
+ error = -ENOMEM;
+ if (count >= KMALLOC_MAX_SIZE)
goto out;
- /* careful: calling conventions are nasty here */
- if (new_buf) {
- mm_segment_t old_fs;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- error = table->proc_handler(table, write, (void __user *)new_buf,
- &count, ppos);
- set_fs(old_fs);
- kfree(new_buf);
+ if (write) {
+ kbuf = memdup_user_nul(ubuf, count);
+ if (IS_ERR(kbuf)) {
+ error = PTR_ERR(kbuf);
+ goto out;
+ }
} else {
- error = table->proc_handler(table, write, buf, &count, ppos);
+ kbuf = kzalloc(count, GFP_KERNEL);
+ if (!kbuf)
+ goto out;
}
- if (!error)
- error = count;
+ error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
+ ppos);
+ if (error)
+ goto out_free_buf;
+
+ /* careful: calling conventions are nasty here */
+ error = table->proc_handler(table, write, kbuf, &count, ppos);
+ if (error)
+ goto out_free_buf;
+
+ if (!write) {
+ error = -EFAULT;
+ if (copy_to_user(ubuf, kbuf, count))
+ goto out_free_buf;
+ }
+
+ error = count;
+out_free_buf:
+ kfree(kbuf);
out:
sysctl_head_finish(head);
@@ -1725,3 +1708,147 @@ int __init proc_sys_init(void)
return sysctl_init();
}
+
+struct sysctl_alias {
+ const char *kernel_param;
+ const char *sysctl_param;
+};
+
+/*
+ * Historically some settings had both sysctl and a command line parameter.
+ * With the generic sysctl. parameter support, we can handle them at a single
+ * place and only keep the historical name for compatibility. This is not meant
+ * to add brand new aliases. When adding existing aliases, consider whether
+ * the possibly different moment of changing the value (e.g. from early_param
+ * to the moment do_sysctl_args() is called) is an issue for the specific
+ * parameter.
+ */
+static const struct sysctl_alias sysctl_aliases[] = {
+ {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" },
+ {"hung_task_panic", "kernel.hung_task_panic" },
+ {"numa_zonelist_order", "vm.numa_zonelist_order" },
+ {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" },
+ {"softlockup_panic", "kernel.softlockup_panic" },
+ { }
+};
+
+static const char *sysctl_find_alias(char *param)
+{
+ const struct sysctl_alias *alias;
+
+ for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) {
+ if (strcmp(alias->kernel_param, param) == 0)
+ return alias->sysctl_param;
+ }
+
+ return NULL;
+}
+
+/* Set sysctl value passed on kernel command line. */
+static int process_sysctl_arg(char *param, char *val,
+ const char *unused, void *arg)
+{
+ char *path;
+ struct vfsmount **proc_mnt = arg;
+ struct file_system_type *proc_fs_type;
+ struct file *file;
+ int len;
+ int err;
+ loff_t pos = 0;
+ ssize_t wret;
+
+ if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
+ param += sizeof("sysctl") - 1;
+
+ if (param[0] != '/' && param[0] != '.')
+ return 0;
+
+ param++;
+ } else {
+ param = (char *) sysctl_find_alias(param);
+ if (!param)
+ return 0;
+ }
+
+ /*
+ * To set sysctl options, we use a temporary mount of proc, look up the
+ * respective sys/ file and write to it. To avoid mounting it when no
+ * options were given, we mount it only when the first sysctl option is
+ * found. Why not a persistent mount? There are problems with a
+ * persistent mount of proc in that it forces userspace not to use any
+ * proc mount options.
+ */
+ if (!*proc_mnt) {
+ proc_fs_type = get_fs_type("proc");
+ if (!proc_fs_type) {
+ pr_err("Failed to find procfs to set sysctl from command line\n");
+ return 0;
+ }
+ *proc_mnt = kern_mount(proc_fs_type);
+ put_filesystem(proc_fs_type);
+ if (IS_ERR(*proc_mnt)) {
+ pr_err("Failed to mount procfs to set sysctl from command line\n");
+ return 0;
+ }
+ }
+
+ path = kasprintf(GFP_KERNEL, "sys/%s", param);
+ if (!path)
+ panic("%s: Failed to allocate path for %s\n", __func__, param);
+ strreplace(path, '.', '/');
+
+ file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ if (err == -ENOENT)
+ pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n",
+ param, val);
+ else if (err == -EACCES)
+ pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n",
+ param, val);
+ else
+ pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n",
+ file, param, val);
+ goto out;
+ }
+ len = strlen(val);
+ wret = kernel_write(file, val, len, &pos);
+ if (wret < 0) {
+ err = wret;
+ if (err == -EINVAL)
+ pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n",
+ param, val);
+ else
+ pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n",
+ ERR_PTR(err), param, val);
+ } else if (wret != len) {
+ pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n",
+ wret, len, path, param, val);
+ }
+
+ err = filp_close(file, NULL);
+ if (err)
+ pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n",
+ ERR_PTR(err), param, val);
+out:
+ kfree(path);
+ return 0;
+}
+
+void do_sysctl_args(void)
+{
+ char *command_line;
+ struct vfsmount *proc_mnt = NULL;
+
+ command_line = kstrdup(saved_command_line, GFP_KERNEL);
+ if (!command_line)
+ panic("%s: Failed to allocate copy of command line\n", __func__);
+
+ parse_args("Setting sysctl args", command_line,
+ NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg);
+
+ if (proc_mnt)
+ kern_unmount(proc_mnt);
+
+ kfree(command_line);
+}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 608233dfd29c..5e444d4f9717 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -32,21 +32,86 @@
struct proc_fs_context {
struct pid_namespace *pid_ns;
unsigned int mask;
- int hidepid;
+ enum proc_hidepid hidepid;
int gid;
+ enum proc_pidonly pidonly;
};
enum proc_param {
Opt_gid,
Opt_hidepid,
+ Opt_subset,
};
static const struct fs_parameter_spec proc_fs_parameters[] = {
fsparam_u32("gid", Opt_gid),
- fsparam_u32("hidepid", Opt_hidepid),
+ fsparam_string("hidepid", Opt_hidepid),
+ fsparam_string("subset", Opt_subset),
{}
};
+static inline int valid_hidepid(unsigned int value)
+{
+ return (value == HIDEPID_OFF ||
+ value == HIDEPID_NO_ACCESS ||
+ value == HIDEPID_INVISIBLE ||
+ value == HIDEPID_NOT_PTRACEABLE);
+}
+
+static int proc_parse_hidepid_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct proc_fs_context *ctx = fc->fs_private;
+ struct fs_parameter_spec hidepid_u32_spec = fsparam_u32("hidepid", Opt_hidepid);
+ struct fs_parse_result result;
+ int base = (unsigned long)hidepid_u32_spec.data;
+
+ if (param->type != fs_value_is_string)
+ return invalf(fc, "proc: unexpected type of hidepid value\n");
+
+ if (!kstrtouint(param->string, base, &result.uint_32)) {
+ if (!valid_hidepid(result.uint_32))
+ return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string);
+ ctx->hidepid = result.uint_32;
+ return 0;
+ }
+
+ if (!strcmp(param->string, "off"))
+ ctx->hidepid = HIDEPID_OFF;
+ else if (!strcmp(param->string, "noaccess"))
+ ctx->hidepid = HIDEPID_NO_ACCESS;
+ else if (!strcmp(param->string, "invisible"))
+ ctx->hidepid = HIDEPID_INVISIBLE;
+ else if (!strcmp(param->string, "ptraceable"))
+ ctx->hidepid = HIDEPID_NOT_PTRACEABLE;
+ else
+ return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string);
+
+ return 0;
+}
+
+static int proc_parse_subset_param(struct fs_context *fc, char *value)
+{
+ struct proc_fs_context *ctx = fc->fs_private;
+
+ while (value) {
+ char *ptr = strchr(value, ',');
+
+ if (ptr != NULL)
+ *ptr++ = '\0';
+
+ if (*value != '\0') {
+ if (!strcmp(value, "pid")) {
+ ctx->pidonly = PROC_PIDONLY_ON;
+ } else {
+ return invalf(fc, "proc: unsupported subset option - %s\n", value);
+ }
+ }
+ value = ptr;
+ }
+
+ return 0;
+}
+
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct proc_fs_context *ctx = fc->fs_private;
@@ -63,10 +128,13 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_hidepid:
- ctx->hidepid = result.uint_32;
- if (ctx->hidepid < HIDEPID_OFF ||
- ctx->hidepid > HIDEPID_INVISIBLE)
- return invalfc(fc, "hidepid value must be between 0 and 2.\n");
+ if (proc_parse_hidepid_param(fc, param))
+ return -EINVAL;
+ break;
+
+ case Opt_subset:
+ if (proc_parse_subset_param(fc, param->string) < 0)
+ return -EINVAL;
break;
default:
@@ -77,26 +145,33 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0;
}
-static void proc_apply_options(struct super_block *s,
+static void proc_apply_options(struct proc_fs_info *fs_info,
struct fs_context *fc,
- struct pid_namespace *pid_ns,
struct user_namespace *user_ns)
{
struct proc_fs_context *ctx = fc->fs_private;
if (ctx->mask & (1 << Opt_gid))
- pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
+ fs_info->pid_gid = make_kgid(user_ns, ctx->gid);
if (ctx->mask & (1 << Opt_hidepid))
- pid_ns->hide_pid = ctx->hidepid;
+ fs_info->hide_pid = ctx->hidepid;
+ if (ctx->mask & (1 << Opt_subset))
+ fs_info->pidonly = ctx->pidonly;
}
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
{
- struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
+ struct proc_fs_context *ctx = fc->fs_private;
struct inode *root_inode;
+ struct proc_fs_info *fs_info;
int ret;
- proc_apply_options(s, fc, pid_ns, current_user_ns());
+ fs_info = kzalloc(sizeof(*fs_info), GFP_KERNEL);
+ if (!fs_info)
+ return -ENOMEM;
+
+ fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
+ proc_apply_options(fs_info, fc, current_user_ns());
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
@@ -106,6 +181,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
+ s->s_fs_info = fs_info;
/*
* procfs isn't actually a stacking filesystem; however, there is
@@ -113,7 +189,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
* top of it
*/
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
-
+
/* procfs dentries and inodes don't require IO to create */
s->s_shrink.seeks = 0;
@@ -140,19 +216,17 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
static int proc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
- struct pid_namespace *pid = sb->s_fs_info;
+ struct proc_fs_info *fs_info = proc_sb_info(sb);
sync_filesystem(sb);
- proc_apply_options(sb, fc, pid, current_user_ns());
+ proc_apply_options(fs_info, fc, current_user_ns());
return 0;
}
static int proc_get_tree(struct fs_context *fc)
{
- struct proc_fs_context *ctx = fc->fs_private;
-
- return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns);
+ return get_tree_nodev(fc, proc_fill_super);
}
static void proc_fs_context_free(struct fs_context *fc)
@@ -188,15 +262,19 @@ static int proc_init_fs_context(struct fs_context *fc)
static void proc_kill_sb(struct super_block *sb)
{
- struct pid_namespace *ns;
+ struct proc_fs_info *fs_info = proc_sb_info(sb);
+
+ if (!fs_info) {
+ kill_anon_super(sb);
+ return;
+ }
+
+ dput(fs_info->proc_self);
+ dput(fs_info->proc_thread_self);
- ns = (struct pid_namespace *)sb->s_fs_info;
- if (ns->proc_self)
- dput(ns->proc_self);
- if (ns->proc_thread_self)
- dput(ns->proc_thread_self);
kill_anon_super(sb);
- put_pid_ns(ns);
+ put_pid_ns(fs_info->pid_ns);
+ kfree(fs_info);
}
static struct file_system_type proc_fs_type = {
@@ -292,39 +370,3 @@ struct proc_dir_entry proc_root = {
.subdir = RB_ROOT,
.name = "/proc",
};
-
-int pid_ns_prepare_proc(struct pid_namespace *ns)
-{
- struct proc_fs_context *ctx;
- struct fs_context *fc;
- struct vfsmount *mnt;
-
- fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT);
- if (IS_ERR(fc))
- return PTR_ERR(fc);
-
- if (fc->user_ns != ns->user_ns) {
- put_user_ns(fc->user_ns);
- fc->user_ns = get_user_ns(ns->user_ns);
- }
-
- ctx = fc->fs_private;
- if (ctx->pid_ns != ns) {
- put_pid_ns(ctx->pid_ns);
- get_pid_ns(ns);
- ctx->pid_ns = ns;
- }
-
- mnt = fc_mount(fc);
- put_fs_context(fc);
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
-
- ns->proc_mnt = mnt;
- return 0;
-}
-
-void pid_ns_release_proc(struct pid_namespace *ns)
-{
- kern_unmount(ns->proc_mnt);
-}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 57c0a1047250..72cd69bcaf4a 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
@@ -36,14 +36,14 @@ static unsigned self_inum __ro_after_init;
int proc_setup_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = proc_pid_ns(root_inode);
+ struct proc_fs_info *fs_info = proc_sb_info(s);
struct dentry *self;
int ret = -ENOMEM;
-
+
inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
if (self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
@@ -62,7 +62,7 @@ int proc_setup_self(struct super_block *s)
if (ret)
pr_err("proc_fill_super: can't allocate /proc/self\n");
else
- ns->proc_self = self;
+ fs_info->proc_self = self;
return ret;
}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 0449edf460f5..46b3293015fe 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -224,6 +224,7 @@ static int stat_open(struct inode *inode, struct file *file)
}
static const struct proc_ops stat_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = stat_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3ba9ae83bff5..5066b0251ed8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -123,38 +123,14 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
-static void vma_stop(struct proc_maps_private *priv)
-{
- struct mm_struct *mm = priv->mm;
-
- release_task_mempolicy(priv);
- up_read(&mm->mmap_sem);
- mmput(mm);
-}
-
-static struct vm_area_struct *
-m_next_vma(struct proc_maps_private *priv, struct vm_area_struct *vma)
-{
- if (vma == priv->tail_vma)
- return NULL;
- return vma->vm_next ?: priv->tail_vma;
-}
-
-static void m_cache_vma(struct seq_file *m, struct vm_area_struct *vma)
-{
- if (m->count < m->size) /* vma is copied successfully */
- m->version = m_next_vma(m->private, vma) ? vma->vm_end : -1UL;
-}
-
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
- unsigned long last_addr = m->version;
+ unsigned long last_addr = *ppos;
struct mm_struct *mm;
struct vm_area_struct *vma;
- unsigned int pos = *ppos;
- /* See m_cache_vma(). Zero at the start or after lseek. */
+ /* See m_next(). Zero at the start or after lseek. */
if (last_addr == -1UL)
return NULL;
@@ -163,64 +139,59 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !mmget_not_zero(mm))
+ if (!mm || !mmget_not_zero(mm)) {
+ put_task_struct(priv->task);
+ priv->task = NULL;
return NULL;
+ }
- if (down_read_killable(&mm->mmap_sem)) {
+ if (mmap_read_lock_killable(mm)) {
mmput(mm);
+ put_task_struct(priv->task);
+ priv->task = NULL;
return ERR_PTR(-EINTR);
}
hold_task_mempolicy(priv);
priv->tail_vma = get_gate_vma(mm);
- if (last_addr) {
- vma = find_vma(mm, last_addr - 1);
- if (vma && vma->vm_start <= last_addr)
- vma = m_next_vma(priv, vma);
- if (vma)
- return vma;
- }
-
- m->version = 0;
- if (pos < mm->map_count) {
- for (vma = mm->mmap; pos; pos--) {
- m->version = vma->vm_start;
- vma = vma->vm_next;
- }
+ vma = find_vma(mm, last_addr);
+ if (vma)
return vma;
- }
-
- /* we do not bother to update m->version in this case */
- if (pos == mm->map_count && priv->tail_vma)
- return priv->tail_vma;
- vma_stop(priv);
- return NULL;
+ return priv->tail_vma;
}
-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
- struct vm_area_struct *next;
+ struct vm_area_struct *next, *vma = v;
+
+ if (vma == priv->tail_vma)
+ next = NULL;
+ else if (vma->vm_next)
+ next = vma->vm_next;
+ else
+ next = priv->tail_vma;
+
+ *ppos = next ? next->vm_start : -1UL;
- (*pos)++;
- next = m_next_vma(priv, v);
- if (!next)
- vma_stop(priv);
return next;
}
static void m_stop(struct seq_file *m, void *v)
{
struct proc_maps_private *priv = m->private;
+ struct mm_struct *mm = priv->mm;
- if (!IS_ERR_OR_NULL(v))
- vma_stop(priv);
- if (priv->task) {
- put_task_struct(priv->task);
- priv->task = NULL;
- }
+ if (!priv->task)
+ return;
+
+ release_task_mempolicy(priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+ put_task_struct(priv->task);
+ priv->task = NULL;
}
static int proc_maps_open(struct inode *inode, struct file *file,
@@ -363,7 +334,6 @@ done:
static int show_map(struct seq_file *m, void *v)
{
show_map_vma(m, v);
- m_cache_vma(m, v);
return 0;
}
@@ -576,10 +546,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
- struct page *page;
+ struct page *page = NULL;
+
+ if (pmd_present(*pmd)) {
+ /* FOLL_DUMP will return -EFAULT on huge zero page */
+ page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+ } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
+ swp_entry_t entry = pmd_to_swp_entry(*pmd);
- /* FOLL_DUMP will return -EFAULT on huge zero page */
- page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+ if (is_migration_entry(entry))
+ page = migration_entry_to_page(entry);
+ }
if (IS_ERR_OR_NULL(page))
return;
if (PageAnon(page))
@@ -608,8 +585,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
- if (pmd_present(*pmd))
- smaps_pmd_entry(pmd, addr, walk);
+ smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
goto out;
}
@@ -617,7 +593,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (pmd_trans_unstable(pmd))
goto out;
/*
- * The mmap_sem held all the way back in m_start() is what
+ * The mmap_lock held all the way back in m_start() is what
* keeps khugepaged out of here and from collapsing things
* in here.
*/
@@ -652,9 +628,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_GROWSDOWN)] = "gd",
[ilog2(VM_PFNMAP)] = "pf",
[ilog2(VM_DENYWRITE)] = "dw",
-#ifdef CONFIG_X86_INTEL_MPX
- [ilog2(VM_MPX)] = "mp",
-#endif
[ilog2(VM_LOCKED)] = "lo",
[ilog2(VM_IO)] = "io",
[ilog2(VM_SEQ_READ)] = "sr",
@@ -668,6 +641,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_ARCH_1)] = "ar",
[ilog2(VM_WIPEONFORK)] = "wf",
[ilog2(VM_DONTDUMP)] = "dd",
+#ifdef CONFIG_ARM64_BTI
+ [ilog2(VM_ARM64_BTI)] = "bt",
+#endif
#ifdef CONFIG_MEM_SOFT_DIRTY
[ilog2(VM_SOFTDIRTY)] = "sd",
#endif
@@ -776,7 +752,7 @@ static void smap_gather_stats(struct vm_area_struct *vma,
}
}
#endif
- /* mmap_sem is held in m_start */
+ /* mmap_lock is held in m_start */
walk_page_vma(vma, &smaps_walk_ops, mss);
}
@@ -810,7 +786,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
- SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
+ SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
mss->private_hugetlb >> 10, 7);
@@ -840,15 +816,13 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss, false);
- seq_printf(m, "THPeligible: %d\n",
+ seq_printf(m, "THPeligible: %d\n",
transparent_hugepage_enabled(vma));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
show_smap_vma_flags(m, vma);
- m_cache_vma(m, vma);
-
return 0;
}
@@ -873,7 +847,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
memset(&mss, 0, sizeof(mss));
- ret = down_read_killable(&mm->mmap_sem);
+ ret = mmap_read_lock_killable(mm);
if (ret)
goto out_put_mm;
@@ -892,7 +866,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
__show_smap(m, &mss, true);
release_task_mempolicy(priv);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_put_mm:
mmput(mm);
@@ -1166,7 +1140,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
};
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
- if (down_write_killable(&mm->mmap_sem)) {
+ if (mmap_write_lock_killable(mm)) {
count = -EINTR;
goto out_mm;
}
@@ -1176,11 +1150,11 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
* resident set size to this mm's current rss value.
*/
reset_mm_hiwater_rss(mm);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
goto out_mm;
}
- if (down_read_killable(&mm->mmap_sem)) {
+ if (mmap_read_lock_killable(mm)) {
count = -EINTR;
goto out_mm;
}
@@ -1189,8 +1163,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (!(vma->vm_flags & VM_SOFTDIRTY))
continue;
- up_read(&mm->mmap_sem);
- if (down_write_killable(&mm->mmap_sem)) {
+ mmap_read_unlock(mm);
+ if (mmap_write_lock_killable(mm)) {
count = -EINTR;
goto out_mm;
}
@@ -1209,14 +1183,14 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
* failed like if
* get_proc_task() fails?
*/
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
goto out_mm;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
}
- downgrade_write(&mm->mmap_sem);
+ mmap_write_downgrade(mm);
break;
}
@@ -1229,7 +1203,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, 0, -1);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out_mm:
mmput(mm);
}
@@ -1590,11 +1564,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
- ret = down_read_killable(&mm->mmap_sem);
+ ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
start_vaddr = end;
len = min(count, PM_ENTRY_BYTES * pm.pos);
@@ -1853,7 +1827,7 @@ static int show_numa_map(struct seq_file *m, void *v)
if (is_vm_hugetlb_page(vma))
seq_puts(m, " huge");
- /* mmap_sem is held by m_start */
+ /* mmap_lock is held by m_start */
walk_page_vma(vma, &show_numa_ops, md);
if (!md->pages)
@@ -1887,7 +1861,6 @@ static int show_numa_map(struct seq_file *m, void *v)
seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10);
out:
seq_putc(m, '\n');
- m_cache_vma(m, vma);
return 0;
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 7907e6419e57..a6d21fc0033c 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -25,7 +25,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
@@ -77,7 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
"Shared:\t%8lu bytes\n",
bytes, slack, sbytes);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
}
unsigned long task_vsize(struct mm_struct *mm)
@@ -86,12 +86,12 @@ unsigned long task_vsize(struct mm_struct *mm)
struct rb_node *p;
unsigned long vsize = 0;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
vsize += vma->vm_end - vma->vm_start;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return vsize;
}
@@ -104,7 +104,7 @@ unsigned long task_statm(struct mm_struct *mm,
struct rb_node *p;
unsigned long size = kobjsize(mm);
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
size += kobjsize(vma);
@@ -119,7 +119,7 @@ unsigned long task_statm(struct mm_struct *mm,
>> PAGE_SHIFT;
*data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK))
>> PAGE_SHIFT;
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
size >>= PAGE_SHIFT;
size += *text + *data;
*resident = size;
@@ -211,7 +211,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!mm || !mmget_not_zero(mm))
return NULL;
- if (down_read_killable(&mm->mmap_sem)) {
+ if (mmap_read_lock_killable(mm)) {
mmput(mm);
return ERR_PTR(-EINTR);
}
@@ -221,7 +221,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (n-- == 0)
return p;
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
return NULL;
}
@@ -231,7 +231,7 @@ static void m_stop(struct seq_file *m, void *_vml)
struct proc_maps_private *priv = m->private;
if (!IS_ERR_OR_NULL(_vml)) {
- up_read(&priv->mm->mmap_sem);
+ mmap_read_unlock(priv->mm);
mmput(priv->mm);
}
if (priv->task) {
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index f61ae53533f5..a553273fbd41 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
- struct pid_namespace *ns = proc_pid_ns(inode);
+ struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
@@ -36,14 +36,14 @@ static unsigned thread_self_inum __ro_after_init;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = d_inode(s->s_root);
- struct pid_namespace *ns = proc_pid_ns(root_inode);
+ struct proc_fs_info *fs_info = proc_sb_info(s);
struct dentry *thread_self;
int ret = -ENOMEM;
inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = thread_self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
@@ -60,9 +60,9 @@ int proc_setup_thread_self(struct super_block *s)
inode_unlock(root_inode);
if (ret)
- pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
+ pr_err("proc_fill_super: can't allocate /proc/thread-self\n");
else
- ns->proc_thread_self = thread_self;
+ fs_info->proc_thread_self = thread_self;
return ret;
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7dc800cce354..c3a345c28a93 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -27,7 +27,6 @@
#include <linux/pagemap.h>
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
-#include <asm/pgtable.h>
#include <asm/io.h>
#include "internal.h"
@@ -266,7 +265,8 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
if (start < offset + dump->size) {
tsz = min(offset + (u64)dump->size - start, (u64)size);
buf = dump->buf + start - offset;
- if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) {
+ if (remap_vmalloc_range_partial(vma, dst, buf, 0,
+ tsz)) {
ret = -EFAULT;
goto out_unlock;
}
@@ -624,7 +624,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
- kaddr, tsz))
+ kaddr, 0, tsz))
goto fail;
size -= tsz;