diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Kconfig | 15 | ||||
-rw-r--r-- | fs/proc/array.c | 83 | ||||
-rw-r--r-- | fs/proc/base.c | 420 | ||||
-rw-r--r-- | fs/proc/cmdline.c | 14 | ||||
-rw-r--r-- | fs/proc/consoles.c | 14 | ||||
-rw-r--r-- | fs/proc/devices.c | 14 | ||||
-rw-r--r-- | fs/proc/fd.c | 140 | ||||
-rw-r--r-- | fs/proc/generic.c | 151 | ||||
-rw-r--r-- | fs/proc/internal.h | 21 | ||||
-rw-r--r-- | fs/proc/interrupts.c | 14 | ||||
-rw-r--r-- | fs/proc/kcore.c | 23 | ||||
-rw-r--r-- | fs/proc/loadavg.c | 14 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 14 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 24 | ||||
-rw-r--r-- | fs/proc/nommu.c | 14 | ||||
-rw-r--r-- | fs/proc/page.c | 2 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 104 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 15 | ||||
-rw-r--r-- | fs/proc/proc_tty.c | 22 | ||||
-rw-r--r-- | fs/proc/self.c | 4 | ||||
-rw-r--r-- | fs/proc/softirqs.c | 14 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 43 | ||||
-rw-r--r-- | fs/proc/thread_self.c | 4 | ||||
-rw-r--r-- | fs/proc/uptime.c | 14 | ||||
-rw-r--r-- | fs/proc/version.c | 14 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 386 |
26 files changed, 964 insertions, 633 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 1ade1206bb89..0eaeb41453f5 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -43,6 +43,21 @@ config PROC_VMCORE help Exports the dump image of crashed kernel in ELF format. +config PROC_VMCORE_DEVICE_DUMP + bool "Device Hardware/Firmware Log Collection" + depends on PROC_VMCORE + default n + help + After kernel panic, device drivers can collect the device + specific snapshot of their hardware or firmware before the + underlying devices are initialized in crash recovery kernel. + Note that the device driver must be present in the crash + recovery kernel's initramfs to collect its underlying device + snapshot. + + If you say Y here, the collected device dumps will be added + as ELF notes to /proc/vmcore. + config PROC_SYSCTL bool "Sysctl support (/proc/sys)" if EXPERT depends on PROC_FS diff --git a/fs/proc/array.c b/fs/proc/array.c index ae2c807fd719..0ceb3b6b37e7 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -85,6 +85,7 @@ #include <linux/delayacct.h> #include <linux/seq_file.h> #include <linux/pid_namespace.h> +#include <linux/prctl.h> #include <linux/ptrace.h> #include <linux/tracehook.h> #include <linux/string_helpers.h> @@ -95,22 +96,29 @@ #include <asm/processor.h> #include "internal.h" -static inline void task_name(struct seq_file *m, struct task_struct *p) +void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape) { char *buf; size_t size; - char tcomm[sizeof(p->comm)]; + char tcomm[64]; int ret; - get_task_comm(tcomm, p); - - seq_puts(m, "Name:\t"); + if (p->flags & PF_WQ_WORKER) + wq_worker_comm(tcomm, sizeof(tcomm), p); + else + __get_task_comm(tcomm, sizeof(tcomm), p); size = seq_get_buf(m, &buf); - ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); - seq_commit(m, ret < size ? ret : -1); + if (escape) { + ret = string_escape_str(tcomm, buf, size, + ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); + if (ret >= size) + ret = -1; + } else { + ret = strscpy(buf, tcomm, size); + } - seq_putc(m, '\n'); + seq_commit(m, ret); } /* @@ -260,7 +268,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) unsigned long flags; sigset_t pending, shpending, blocked, ignored, caught; int num_threads = 0; - unsigned long qsize = 0; + unsigned int qsize = 0; unsigned long qlim = 0; sigemptyset(&pending); @@ -335,6 +343,30 @@ static inline void task_seccomp(struct seq_file *m, struct task_struct *p) #ifdef CONFIG_SECCOMP seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode); #endif + seq_printf(m, "\nSpeculation_Store_Bypass:\t"); + switch (arch_prctl_spec_ctrl_get(p, PR_SPEC_STORE_BYPASS)) { + case -EINVAL: + seq_printf(m, "unknown"); + break; + case PR_SPEC_NOT_AFFECTED: + seq_printf(m, "not vulnerable"); + break; + case PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE: + seq_printf(m, "thread force mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_DISABLE: + seq_printf(m, "thread mitigated"); + break; + case PR_SPEC_PRCTL | PR_SPEC_ENABLE: + seq_printf(m, "thread vulnerable"); + break; + case PR_SPEC_DISABLE: + seq_printf(m, "globally mitigated"); + break; + default: + seq_printf(m, "vulnerable"); + break; + } seq_putc(m, '\n'); } @@ -365,7 +397,10 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, { struct mm_struct *mm = get_task_mm(task); - task_name(m, task); + seq_puts(m, "Name:\t"); + proc_task_name(m, task, true); + seq_putc(m, '\n'); + task_state(m, ns, pid, task); if (mm) { @@ -400,7 +435,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, u64 cutime, cstime, utime, stime; u64 cgtime, gtime; unsigned long rsslim = 0; - char tcomm[sizeof(task->comm)]; unsigned long flags; state = *get_task_state(task); @@ -427,8 +461,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, } } - get_task_comm(tcomm, task); - sigemptyset(&sigign); sigemptyset(&sigcatch); cutime = cstime = utime = stime = 0; @@ -495,7 +527,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns)); seq_puts(m, " ("); - seq_puts(m, tcomm); + proc_task_name(m, task, false); seq_puts(m, ") "); seq_putc(m, state); seq_put_decimal_ll(m, " ", ppid); @@ -677,25 +709,22 @@ out: static int children_seq_show(struct seq_file *seq, void *v) { - struct inode *inode = seq->private; - pid_t pid; - - pid = pid_nr_ns(v, inode->i_sb->s_fs_info); - seq_printf(seq, "%d ", pid); + struct inode *inode = file_inode(seq->file); + seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode))); return 0; } static void *children_seq_start(struct seq_file *seq, loff_t *pos) { - return get_children_pid(seq->private, NULL, *pos); + return get_children_pid(file_inode(seq->file), NULL, *pos); } static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pid *pid; - pid = get_children_pid(seq->private, v, *pos + 1); + pid = get_children_pid(file_inode(seq->file), v, *pos + 1); put_pid(v); ++*pos; @@ -716,17 +745,7 @@ static const struct seq_operations children_seq_ops = { static int children_seq_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - - ret = seq_open(file, &children_seq_ops); - if (ret) - return ret; - - m = file->private_data; - m->private = inode; - - return ret; + return seq_open(file, &children_seq_ops); } const struct file_operations proc_tid_children_operations = { diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b2ede6abcdf..4aa9ce5df02f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -205,171 +205,129 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, - size_t _count, loff_t *pos) +static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, + size_t count, loff_t *ppos) { - struct task_struct *tsk; - struct mm_struct *mm; - char *page; - unsigned long count = _count; unsigned long arg_start, arg_end, env_start, env_end; - unsigned long len1, len2, len; - unsigned long p; - char c; - ssize_t rv; - - BUG_ON(*pos < 0); + unsigned long pos, len; + char *page; - tsk = get_proc_task(file_inode(file)); - if (!tsk) - return -ESRCH; - mm = get_task_mm(tsk); - put_task_struct(tsk); - if (!mm) - return 0; /* Check if process spawned far enough to have cmdline. */ - if (!mm->env_end) { - rv = 0; - goto out_mmput; - } - - page = (char *)__get_free_page(GFP_KERNEL); - if (!page) { - rv = -ENOMEM; - goto out_mmput; - } + if (!mm->env_end) + return 0; - down_read(&mm->mmap_sem); + spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; env_start = mm->env_start; env_end = mm->env_end; - up_read(&mm->mmap_sem); - - BUG_ON(arg_start > arg_end); - BUG_ON(env_start > env_end); + spin_unlock(&mm->arg_lock); - len1 = arg_end - arg_start; - len2 = env_end - env_start; + if (arg_start >= arg_end) + return 0; - /* Empty ARGV. */ - if (len1 == 0) { - rv = 0; - goto out_free_page; - } /* - * Inherently racy -- command line shares address space - * with code and data. + * We have traditionally allowed the user to re-write + * the argument strings and overflow the end result + * into the environment section. But only do that if + * the environment area is contiguous to the arguments. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); - if (rv <= 0) - goto out_free_page; - - rv = 0; - - if (c == '\0') { - /* Command line (set of strings) occupies whole ARGV. */ - if (len1 <= *pos) - goto out_free_page; - - p = arg_start + *pos; - len = len1 - *pos; - while (count > 0 && len > 0) { - unsigned int _count; - int nr_read; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } + if (env_start != arg_end || env_start >= env_end) + env_start = env_end = arg_end; - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - } - } else { - /* - * Command line (1 string) occupies ARGV and - * extends into ENVP. - */ - struct { - unsigned long p; - unsigned long len; - } cmdline[2] = { - { .p = arg_start, .len = len1 }, - { .p = env_start, .len = len2 }, - }; - loff_t pos1 = *pos; - unsigned int i; + /* We're not going to care if "*ppos" has high bits set */ + pos = arg_start + *ppos; - i = 0; - while (i < 2 && pos1 >= cmdline[i].len) { - pos1 -= cmdline[i].len; - i++; + /* .. but we do check the result is in the proper range */ + if (pos < arg_start || pos >= env_end) + return 0; + + /* .. and we never go past env_end */ + if (env_end - pos < count) + count = env_end - pos; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + len = 0; + while (count) { + int got; + size_t size = min_t(size_t, PAGE_SIZE, count); + + got = access_remote_vm(mm, pos, page, size, FOLL_ANON); + if (got <= 0) + break; + + /* Don't walk past a NUL character once you hit arg_end */ + if (pos + got >= arg_end) { + int n = 0; + + /* + * If we started before 'arg_end' but ended up + * at or after it, we start the NUL character + * check at arg_end-1 (where we expect the normal + * EOF to be). + * + * NOTE! This is smaller than 'got', because + * pos + got >= arg_end + */ + if (pos < arg_end) + n = arg_end - pos - 1; + + /* Cut off at first NUL after 'n' */ + got = n + strnlen(page+n, got-n); + if (!got) + break; } - while (i < 2) { - p = cmdline[i].p + pos1; - len = cmdline[i].len - pos1; - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* - * Command line can be shorter than whole ARGV - * even if last "marker" byte says it is not. - */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } - - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; - } - /* Only first chunk can be read partially. */ - pos1 = 0; - i++; + got -= copy_to_user(buf, page, got); + if (unlikely(!got)) { + if (!len) + len = -EFAULT; + break; } + pos += got; + buf += got; + len += got; + count -= got; } -out_free_page: free_page((unsigned long)page); -out_mmput: + return len; +} + +static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, + size_t count, loff_t *pos) +{ + struct mm_struct *mm; + ssize_t ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + ret = get_mm_cmdline(mm, buf, count, pos); mmput(mm); - if (rv > 0) - *pos += rv; - return rv; + return ret; +} + +static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct task_struct *tsk; + ssize_t ret; + + BUG_ON(*pos < 0); + + tsk = get_proc_task(file_inode(file)); + if (!tsk) + return -ESRCH; + ret = get_task_cmdline(tsk, buf, count, pos); + put_task_struct(tsk); + if (ret > 0) + *pos += ret; + return ret; } static const struct file_operations proc_pid_cmdline_ops = { @@ -430,7 +388,6 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, struct stack_trace trace; unsigned long *entries; int err; - int i; entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); if (!entries) @@ -443,6 +400,8 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, err = lock_trace(task); if (!err) { + unsigned int i; + save_stack_trace_tsk(task, &trace); for (i = 0; i < trace.nr_entries; i++) { @@ -698,7 +657,7 @@ static bool has_pid_permissions(struct pid_namespace *pid, static int proc_pid_permission(struct inode *inode, int mask) { - struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct pid_namespace *pid = proc_pid_ns(inode); struct task_struct *task; bool has_perms; @@ -733,13 +692,11 @@ static const struct inode_operations proc_def_inode_operations = { static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns; - struct pid *pid; + struct pid_namespace *ns = proc_pid_ns(inode); + struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; - ns = inode->i_sb->s_fs_info; - pid = proc_pid(inode); task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; @@ -929,10 +886,10 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!mmget_not_zero(mm)) goto free; - down_read(&mm->mmap_sem); + spin_lock(&mm->arg_lock); env_start = mm->env_start; env_end = mm->env_end; - up_read(&mm->mmap_sem); + spin_unlock(&mm->arg_lock); while (count > 0) { size_t this_len, max_len; @@ -946,7 +903,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; @@ -1410,7 +1367,7 @@ static const struct file_operations proc_fail_nth_operations = { static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); struct task_struct *p; p = get_proc_task(inode); @@ -1565,9 +1522,8 @@ static int comm_show(struct seq_file *m, void *v) if (!p) return -ESRCH; - task_lock(p); - seq_printf(m, "%s\n", p->comm); - task_unlock(p); + proc_task_name(m, p, false); + seq_putc(m, '\n'); put_task_struct(p); @@ -1782,14 +1738,14 @@ int pid_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); + struct pid_namespace *pid = proc_pid_ns(inode); struct task_struct *task; - struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); - rcu_read_lock(); stat->uid = GLOBAL_ROOT_UID; stat->gid = GLOBAL_ROOT_GID; + rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { @@ -1809,15 +1765,22 @@ int pid_getattr(const struct path *path, struct kstat *stat, /* dentry stuff */ /* - * Exceptional case: normally we are not allowed to unhash a busy - * directory. In this case, however, we can do it - no aliasing problems - * due to the way we treat inodes. - * + * Set <pid>/... inode ownership (can change due to setuid(), etc.) + */ +void pid_update_inode(struct task_struct *task, struct inode *inode) +{ + task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); + + inode->i_mode &= ~(S_ISUID | S_ISGID); + security_task_to_inode(task, inode); +} + +/* * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * */ -int pid_revalidate(struct dentry *dentry, unsigned int flags) +static int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; @@ -1829,10 +1792,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) task = get_proc_task(inode); if (task) { - task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); - - inode->i_mode &= ~(S_ISUID | S_ISGID); - security_task_to_inode(task, inode); + pid_update_inode(task, inode); put_task_struct(task); return 1; } @@ -1874,14 +1834,14 @@ const struct dentry_operations pid_dentry_operations = * by stat. */ bool proc_fill_cache(struct file *file, struct dir_context *ctx, - const char *name, int len, + const char *name, unsigned int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = file->f_path.dentry; struct qstr qname = QSTR_INIT(name, len); struct inode *inode; - unsigned type; - ino_t ino; + unsigned type = DT_UNKNOWN; + ino_t ino = 1; child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -1890,11 +1850,14 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, if (IS_ERR(child)) goto end_instantiate; if (d_in_lookup(child)) { - int err = instantiate(d_inode(dir), child, task, ptr); + struct dentry *res; + res = instantiate(child, task, ptr); d_lookup_done(child); - if (err < 0) { + if (unlikely(res)) { dput(child); - goto end_instantiate; + child = res; + if (IS_ERR(child)) + goto end_instantiate; } } } @@ -1902,10 +1865,8 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, ino = inode->i_ino; type = inode->i_mode >> 12; dput(child); - return dir_emit(ctx, name, len, ino, type); - end_instantiate: - return dir_emit(ctx, name, len, 1, DT_UNKNOWN); + return dir_emit(ctx, name, len, ino, type); } /* @@ -2067,19 +2028,19 @@ static const struct inode_operations proc_map_files_link_inode_operations = { .setattr = proc_setattr, }; -static int -proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, +static struct dentry * +proc_map_files_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | ((mode & FMODE_READ ) ? S_IRUSR : 0) | ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) - return -ENOENT; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); ei->op.proc_get_link = map_files_get_link; @@ -2088,9 +2049,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_size = 64; d_set_d_op(dentry, &tid_map_files_dentry_operations); - d_add(dentry, inode); - - return 0; + return d_splice_alias(inode, dentry); } static struct dentry *proc_map_files_lookup(struct inode *dir, @@ -2099,19 +2058,19 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; - int result; + struct dentry *result; struct mm_struct *mm; - result = -ENOENT; + result = ERR_PTR(-ENOENT); task = get_proc_task(dir); if (!task) goto out; - result = -EACCES; + result = ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; - result = -ENOENT; + result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; @@ -2125,7 +2084,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out_no_vma; if (vma->vm_file) - result = proc_map_files_instantiate(dir, dentry, task, + result = proc_map_files_instantiate(dentry, task, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: @@ -2134,7 +2093,7 @@ out_no_vma: out_put_task: put_task_struct(task); out: - return ERR_PTR(result); + return result; } static const struct inode_operations proc_map_files_inode_operations = { @@ -2337,7 +2296,7 @@ static int proc_timers_open(struct inode *inode, struct file *file) return -ENOMEM; tp->pid = proc_pid(inode); - tp->ns = inode->i_sb->s_fs_info; + tp->ns = proc_pid_ns(inode); return 0; } @@ -2435,16 +2394,16 @@ static const struct file_operations proc_pid_set_timerslack_ns_operations = { .release = single_release, }; -static int proc_pident_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static struct dentry *proc_pident_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task, p->mode); + inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); if (S_ISDIR(inode->i_mode)) @@ -2454,13 +2413,9 @@ static int proc_pident_instantiate(struct inode *dir, if (p->fop) inode->i_fop = p->fop; ei->op = p->op; + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry *proc_pident_lookup(struct inode *dir, @@ -2468,11 +2423,9 @@ static struct dentry *proc_pident_lookup(struct inode *dir, const struct pid_entry *ents, unsigned int nents) { - int error; struct task_struct *task = get_proc_task(dir); const struct pid_entry *p, *last; - - error = -ENOENT; + struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; @@ -2491,11 +2444,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, if (p >= last) goto out; - error = proc_pident_instantiate(dir, dentry, task, p); + res = proc_pident_instantiate(dentry, task, p); out: put_task_struct(task); out_no_task: - return ERR_PTR(error); + return res; } static int proc_pident_readdir(struct file *file, struct dir_context *ctx, @@ -3138,38 +3091,32 @@ void proc_flush_task(struct task_struct *task) } } -static int proc_pid_instantiate(struct inode *dir, - struct dentry * dentry, +static struct dentry *proc_pid_instantiate(struct dentry * dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) - goto out; + return ERR_PTR(-ENOENT); inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; set_nlink(inode, nlink_tgid); + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - int result = -ENOENT; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; + struct dentry *result = ERR_PTR(-ENOENT); tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) @@ -3184,10 +3131,10 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign if (!task) goto out; - result = proc_pid_instantiate(dir, dentry, task, NULL); + result = proc_pid_instantiate(dentry, task, NULL); put_task_struct(task); out: - return ERR_PTR(result); + return result; } /* @@ -3239,7 +3186,7 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(file_inode(file)); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) @@ -3263,7 +3210,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[10 + 1]; - int len; + unsigned int len; cond_resched(); if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) @@ -3435,37 +3382,32 @@ static const struct inode_operations proc_tid_base_inode_operations = { .setattr = proc_setattr, }; -static int proc_task_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static struct dentry *proc_task_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); - + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) - goto out; + return ERR_PTR(-ENOENT); + inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; - inode->i_flags|=S_IMMUTABLE; + inode->i_flags |= S_IMMUTABLE; set_nlink(inode, nlink_tid); + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - int result = -ENOENT; struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; struct pid_namespace *ns; + struct dentry *result = ERR_PTR(-ENOENT); if (!leader) goto out_no_task; @@ -3485,13 +3427,13 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (!same_thread_group(leader, task)) goto out_drop_task; - result = proc_task_instantiate(dir, dentry, task, NULL); + result = proc_task_instantiate(dentry, task, NULL); out_drop_task: put_task_struct(task); out: put_task_struct(leader); out_no_task: - return ERR_PTR(result); + return result; } /* @@ -3588,14 +3530,14 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = inode->i_sb->s_fs_info; + ns = proc_pid_ns(inode); tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[10 + 1]; - int len; + unsigned int len; tid = task_pid_nr_ns(task, ns); len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 8233e7af9389..fa762c5fbcb2 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -11,21 +11,9 @@ static int cmdline_proc_show(struct seq_file *m, void *v) return 0; } -static int cmdline_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, cmdline_proc_show, NULL); -} - -static const struct file_operations cmdline_proc_fops = { - .open = cmdline_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_cmdline_init(void) { - proc_create("cmdline", 0, NULL, &cmdline_proc_fops); + proc_create_single("cmdline", 0, NULL, cmdline_proc_show); return 0; } fs_initcall(proc_cmdline_init); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index a8ac48aebd59..954caf0b7fee 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -91,21 +91,9 @@ static const struct seq_operations consoles_op = { .show = show_console_dev }; -static int consoles_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &consoles_op); -} - -static const struct file_operations proc_consoles_operations = { - .open = consoles_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_consoles_init(void) { - proc_create("consoles", 0, NULL, &proc_consoles_operations); + proc_create_seq("consoles", 0, NULL, &consoles_op); return 0; } fs_initcall(proc_consoles_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c index 2c7f22b14489..37d38697eaf8 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -51,21 +51,9 @@ static const struct seq_operations devinfo_ops = { .show = devinfo_show }; -static int devinfo_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &devinfo_ops); -} - -static const struct file_operations proc_devinfo_operations = { - .open = devinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_devices_init(void) { - proc_create("devices", 0, NULL, &proc_devinfo_operations); + proc_create_seq("devices", 0, NULL, &devinfo_ops); return 0; } fs_initcall(proc_devices_init); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 6b80cd1e419a..81882a13212d 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -81,9 +81,41 @@ static const struct file_operations proc_fdinfo_file_operations = { .release = single_release, }; +static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode) +{ + struct files_struct *files = get_files_struct(task); + struct file *file; + + if (!files) + return false; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) + *mode = file->f_mode; + rcu_read_unlock(); + put_files_struct(files); + return !!file; +} + +static void tid_fd_update_inode(struct task_struct *task, struct inode *inode, + fmode_t f_mode) +{ + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } + security_task_to_inode(task, inode); +} + static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { - struct files_struct *files; struct task_struct *task; struct inode *inode; unsigned int fd; @@ -96,35 +128,11 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) fd = proc_fd(inode); if (task) { - files = get_files_struct(task); - if (files) { - struct file *file; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - unsigned f_mode = file->f_mode; - - rcu_read_unlock(); - put_files_struct(files); - - task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); - - if (S_ISLNK(inode->i_mode)) { - unsigned i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; - } - - security_task_to_inode(task, inode); - put_task_struct(task); - return 1; - } - rcu_read_unlock(); - put_files_struct(files); + fmode_t f_mode; + if (tid_fd_mode(task, fd, &f_mode)) { + tid_fd_update_inode(task, inode, f_mode); + put_task_struct(task); + return 1; } put_task_struct(task); } @@ -166,34 +174,33 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) return ret; } -static int -proc_fd_instantiate(struct inode *dir, struct dentry *dentry, - struct task_struct *task, const void *ptr) +struct fd_data { + fmode_t mode; + unsigned fd; +}; + +static struct dentry *proc_fd_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { - unsigned fd = (unsigned long)ptr; + const struct fd_data *data = ptr; struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); - ei->fd = fd; + ei->fd = data->fd; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; + tid_fd_update_inode(task, inode, data->mode); d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - return 0; - out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry *proc_lookupfd_common(struct inode *dir, @@ -201,19 +208,21 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, instantiate_t instantiate) { struct task_struct *task = get_proc_task(dir); - int result = -ENOENT; - unsigned fd = name_to_int(&dentry->d_name); + struct fd_data data = {.fd = name_to_int(&dentry->d_name)}; + struct dentry *result = ERR_PTR(-ENOENT); if (!task) goto out_no_task; - if (fd == ~0U) + if (data.fd == ~0U) + goto out; + if (!tid_fd_mode(task, data.fd, &data.mode)) goto out; - result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); + result = instantiate(dentry, task, &data); out: put_task_struct(task); out_no_task: - return ERR_PTR(result); + return result; } static int proc_readfd_common(struct file *file, struct dir_context *ctx, @@ -236,17 +245,22 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, for (fd = ctx->pos - 2; fd < files_fdtable(files)->max_fds; fd++, ctx->pos++) { + struct file *f; + struct fd_data data; char name[10 + 1]; - int len; + unsigned int len; - if (!fcheck_files(files, fd)) + f = fcheck_files(files, fd); + if (!f) continue; + data.mode = f->f_mode; rcu_read_unlock(); + data.fd = fd; len = snprintf(name, sizeof(name), "%u", fd); if (!proc_fill_cache(file, ctx, name, len, instantiate, p, - (void *)(unsigned long)fd)) + &data)) goto out_fd_loop; cond_resched(); rcu_read_lock(); @@ -304,31 +318,25 @@ const struct inode_operations proc_fd_inode_operations = { .setattr = proc_setattr, }; -static int -proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, - struct task_struct *task, const void *ptr) +static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { - unsigned fd = (unsigned long)ptr; + const struct fd_data *data = ptr; struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFREG | S_IRUSR); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFREG | S_IRUSR); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); - ei->fd = fd; + ei->fd = data->fd; inode->i_fop = &proc_fdinfo_file_operations; + tid_fd_update_inode(task, inode, 0); d_set_d_op(dentry, &tid_fd_dentry_operations); - d_add(dentry, inode); - - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, 0)) - return 0; - out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static struct dentry * diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2078e70e1595..7b4d9714f248 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -25,6 +25,7 @@ #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/uaccess.h> +#include <linux/seq_file.h> #include "internal.h" @@ -256,8 +257,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-ENOMEM); d_set_d_op(dentry, &proc_misc_dentry_ops); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } read_unlock(&proc_subdir_lock); return ERR_PTR(-ENOENT); @@ -346,13 +346,12 @@ static const struct inode_operations proc_dir_inode_operations = { .setattr = proc_notify_change, }; -static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +/* returns the registered entry, or frees dp and returns NULL on failure */ +struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + struct proc_dir_entry *dp) { - int ret; - - ret = proc_alloc_inum(&dp->low_ino); - if (ret) - return ret; + if (proc_alloc_inum(&dp->low_ino)) + goto out_free_entry; write_lock(&proc_subdir_lock); dp->parent = dir; @@ -360,12 +359,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp WARN(1, "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); write_unlock(&proc_subdir_lock); - proc_free_inum(dp->low_ino); - return -EEXIST; + goto out_free_inum; } write_unlock(&proc_subdir_lock); - return 0; + return dp; +out_free_inum: + proc_free_inum(dp->low_ino); +out_free_entry: + pde_free(dp); + return NULL; } static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, @@ -443,10 +446,7 @@ struct proc_dir_entry *proc_symlink(const char *name, if (ent->data) { strcpy((char*)ent->data,dest); ent->proc_iops = &proc_link_inode_operations; - if (proc_register(parent, ent) < 0) { - pde_free(ent); - ent = NULL; - } + ent = proc_register(parent, ent); } else { pde_free(ent); ent = NULL; @@ -470,11 +470,9 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; parent->nlink++; - if (proc_register(parent, ent) < 0) { - pde_free(ent); + ent = proc_register(parent, ent); + if (!ent) parent->nlink--; - ent = NULL; - } } return ent; } @@ -505,47 +503,47 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->proc_fops = NULL; ent->proc_iops = NULL; parent->nlink++; - if (proc_register(parent, ent) < 0) { - pde_free(ent); + ent = proc_register(parent, ent); + if (!ent) parent->nlink--; - ent = NULL; - } } return ent; } EXPORT_SYMBOL(proc_create_mount_point); -struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops, - void *data) +struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, + struct proc_dir_entry **parent, void *data) { - struct proc_dir_entry *pde; + struct proc_dir_entry *p; + if ((mode & S_IFMT) == 0) mode |= S_IFREG; - - if (!S_ISREG(mode)) { - WARN_ON(1); /* use proc_mkdir() */ + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; + + p = __proc_create(parent, name, mode, 1); + if (p) { + p->proc_iops = &proc_file_inode_operations; + p->data = data; } + return p; +} + +struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, void *data) +{ + struct proc_dir_entry *p; BUG_ON(proc_fops == NULL); - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO; - pde = __proc_create(&parent, name, mode, 1); - if (!pde) - goto out; - pde->proc_fops = proc_fops; - pde->data = data; - pde->proc_iops = &proc_file_inode_operations; - if (proc_register(parent, pde) < 0) - goto out_free; - return pde; -out_free: - pde_free(pde); -out: - return NULL; + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = proc_fops; + return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_data); @@ -557,6 +555,67 @@ struct proc_dir_entry *proc_create(const char *name, umode_t mode, } EXPORT_SYMBOL(proc_create); +static int proc_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + if (de->state_size) + return seq_open_private(file, de->seq_ops, de->state_size); + return seq_open(file, de->seq_ops); +} + +static const struct file_operations proc_seq_fops = { + .open = proc_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + return proc_register(parent, p); +} +EXPORT_SYMBOL(proc_create_seq_private); + +static int proc_single_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + return single_open(file, de->single_show, de->data); +} + +static const struct file_operations proc_single_fops = { + .open = proc_single_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_single_fops; + p->single_show = show; + return proc_register(parent, p); +} +EXPORT_SYMBOL(proc_create_single_data); + void proc_set_size(struct proc_dir_entry *de, loff_t size) { de->size = size; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 0f1692e63cb6..50cb22a08c2f 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -44,7 +44,12 @@ struct proc_dir_entry { struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; + union { + const struct seq_operations *seq_ops; + int (*single_show)(struct seq_file *, void *); + }; void *data; + unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; @@ -57,9 +62,9 @@ struct proc_dir_entry { umode_t mode; u8 namelen; #ifdef CONFIG_64BIT -#define SIZEOF_PDE_INLINE_NAME (192-139) +#define SIZEOF_PDE_INLINE_NAME (192-155) #else -#define SIZEOF_PDE_INLINE_NAME (128-87) +#define SIZEOF_PDE_INLINE_NAME (128-95) #endif char inline_name[SIZEOF_PDE_INLINE_NAME]; } __randomize_layout; @@ -131,6 +136,8 @@ unsigned name_to_int(const struct qstr *qstr); */ extern const struct file_operations proc_tid_children_operations; +extern void proc_task_name(struct seq_file *m, struct task_struct *p, + bool escape); extern int proc_tid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *, @@ -147,21 +154,25 @@ extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct dentry *, struct iattr *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); -extern int pid_revalidate(struct dentry *, unsigned int); +extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ -typedef int instantiate_t(struct inode *, struct dentry *, +typedef struct dentry *instantiate_t(struct dentry *, struct task_struct *, const void *); -extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, +bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int, instantiate_t, struct task_struct *, const void *); /* * generic.c */ +struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, + struct proc_dir_entry **parent, void *data); +struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, + struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c index 6a6bee9c603c..cb0edc7cbf09 100644 --- a/fs/proc/interrupts.c +++ b/fs/proc/interrupts.c @@ -34,21 +34,9 @@ static const struct seq_operations int_seq_ops = { .show = show_interrupts }; -static int interrupts_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &int_seq_ops); -} - -static const struct file_operations proc_interrupts_operations = { - .open = interrupts_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_interrupts_init(void) { - proc_create("interrupts", 0, NULL, &proc_interrupts_operations); + proc_create_seq("interrupts", 0, NULL, &int_seq_ops); return 0; } fs_initcall(proc_interrupts_init); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d1e82761de81..e64ecb9f2720 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) { struct list_head *head = (struct list_head *)arg; struct kcore_list *ent; + struct page *p; + + if (!pfn_valid(pfn)) + return 1; + + p = pfn_to_page(pfn); + if (!memmap_valid_within(pfn, p, page_zone(p))) + return 1; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); + ent->addr = (unsigned long)page_to_virt(p); ent->size = nr_pages << PAGE_SHIFT; - /* Sanity check: Can happen in 32bit arch...maybe */ - if (ent->addr < (unsigned long) __va(0)) + if (!virt_addr_valid(ent->addr)) goto free_out; /* cut not-mapped area. ....from ppc-32 code. */ if (ULONG_MAX - ent->addr < ent->size) ent->size = ULONG_MAX - ent->addr; - /* cut when vmalloc() area is higher than direct-map area */ - if (VMALLOC_START > (unsigned long)__va(0)) { - if (ent->addr > VMALLOC_START) - goto free_out; + /* + * We've already checked virt_addr_valid so we know this address + * is a valid pointer, therefore we can check against it to determine + * if we need to trim + */ + if (VMALLOC_START > ent->addr) { if (VMALLOC_START - ent->addr < ent->size) ent->size = VMALLOC_START - ent->addr; } diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index b572cc865b92..d06694757201 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -28,21 +28,9 @@ static int loadavg_proc_show(struct seq_file *m, void *v) return 0; } -static int loadavg_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, loadavg_proc_show, NULL); -} - -static const struct file_operations loadavg_proc_fops = { - .open = loadavg_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_loadavg_init(void) { - proc_create("loadavg", 0, NULL, &loadavg_proc_fops); + proc_create_single("loadavg", 0, NULL, loadavg_proc_show); return 0; } fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 65a72ab57471..2fb04846ed11 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -149,21 +149,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) return 0; } -static int meminfo_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, meminfo_proc_show, NULL); -} - -static const struct file_operations meminfo_proc_fops = { - .open = meminfo_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_meminfo_init(void) { - proc_create("meminfo", 0, NULL, &meminfo_proc_fops); + proc_create_single("meminfo", 0, NULL, meminfo_proc_show); return 0; } fs_initcall(proc_meminfo_init); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 59b17e509f46..dd2b35f78b09 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -87,28 +87,24 @@ static const struct inode_operations proc_ns_link_inode_operations = { .setattr = proc_setattr, }; -static int proc_ns_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static struct dentry *proc_ns_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) { const struct proc_ns_operations *ns_ops = ptr; struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO); + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); if (!inode) - goto out; + return ERR_PTR(-ENOENT); ei = PROC_I(inode); inode->i_op = &proc_ns_link_inode_operations; ei->ns_ops = ns_ops; + pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, 0)) - return 0; -out: - return -ENOENT; + return d_splice_alias(inode, dentry); } static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) @@ -147,12 +143,10 @@ const struct file_operations proc_ns_dir_operations = { static struct dentry *proc_ns_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - int error; struct task_struct *task = get_proc_task(dir); const struct proc_ns_operations **entry, **last; unsigned int len = dentry->d_name.len; - - error = -ENOENT; + struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; @@ -167,11 +161,11 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (entry == last) goto out; - error = proc_ns_instantiate(dir, dentry, task, *entry); + res = proc_ns_instantiate(dentry, task, *entry); out: put_task_struct(task); out_no_task: - return ERR_PTR(error); + return res; } const struct inode_operations proc_ns_dir_inode_operations = { diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 75634379f82e..3b63be64e436 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -113,21 +113,9 @@ static const struct seq_operations proc_nommu_region_list_seqop = { .show = nommu_region_list_show }; -static int proc_nommu_region_list_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &proc_nommu_region_list_seqop); -} - -static const struct file_operations proc_nommu_region_list_operations = { - .open = proc_nommu_region_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static int __init proc_nommu_init(void) { - proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations); + proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop); return 0; } diff --git a/fs/proc/page.c b/fs/proc/page.c index 1491918a33c3..792c78a49174 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -154,6 +154,8 @@ u64 stable_page_flags(struct page *page) if (PageBalloon(page)) u |= 1 << KPF_BALLOON; + if (PageTable(page)) + u |= 1 << KPF_PGTABLE; if (page_is_idle(page)) u |= 1 << KPF_IDLE; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 1763f370489d..7d94fa005b0d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -38,20 +38,20 @@ static struct net *get_proc_net(const struct inode *inode) return maybe_get_net(PDE_NET(PDE(inode))); } -int seq_open_net(struct inode *ino, struct file *f, - const struct seq_operations *ops, int size) +static int seq_open_net(struct inode *inode, struct file *file) { - struct net *net; + unsigned int state_size = PDE(inode)->state_size; struct seq_net_private *p; + struct net *net; - BUG_ON(size < sizeof(*p)); + WARN_ON_ONCE(state_size < sizeof(*p)); - net = get_proc_net(ino); - if (net == NULL) + net = get_proc_net(inode); + if (!net) return -ENXIO; - p = __seq_open_private(f, ops, size); - if (p == NULL) { + p = __seq_open_private(file, PDE(inode)->seq_ops, state_size); + if (!p) { put_net(net); return -ENOMEM; } @@ -60,51 +60,83 @@ int seq_open_net(struct inode *ino, struct file *f, #endif return 0; } -EXPORT_SYMBOL_GPL(seq_open_net); -int single_open_net(struct inode *inode, struct file *file, - int (*show)(struct seq_file *, void *)) +static int seq_release_net(struct inode *ino, struct file *f) { - int err; - struct net *net; - - err = -ENXIO; - net = get_proc_net(inode); - if (net == NULL) - goto err_net; - - err = single_open(file, show, net); - if (err < 0) - goto err_open; + struct seq_file *seq = f->private_data; + put_net(seq_file_net(seq)); + seq_release_private(ino, f); return 0; +} -err_open: - put_net(net); -err_net: - return err; +static const struct file_operations proc_net_seq_fops = { + .open = seq_open_net, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, + struct proc_dir_entry *parent, const struct seq_operations *ops, + unsigned int state_size, void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_seq_fops; + p->seq_ops = ops; + p->state_size = state_size; + return proc_register(parent, p); } -EXPORT_SYMBOL_GPL(single_open_net); +EXPORT_SYMBOL_GPL(proc_create_net_data); -int seq_release_net(struct inode *ino, struct file *f) +static int single_open_net(struct inode *inode, struct file *file) { - struct seq_file *seq; + struct proc_dir_entry *de = PDE(inode); + struct net *net; + int err; - seq = f->private_data; + net = get_proc_net(inode); + if (!net) + return -ENXIO; - put_net(seq_file_net(seq)); - seq_release_private(ino, f); - return 0; + err = single_open(file, de->single_show, net); + if (err) + put_net(net); + return err; } -EXPORT_SYMBOL_GPL(seq_release_net); -int single_release_net(struct inode *ino, struct file *f) +static int single_release_net(struct inode *ino, struct file *f) { struct seq_file *seq = f->private_data; put_net(seq->private); return single_release(ino, f); } -EXPORT_SYMBOL_GPL(single_release_net); + +static const struct file_operations proc_net_single_fops = { + .open = single_open_net, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + +struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, + struct proc_dir_entry *parent, + int (*show)(struct seq_file *, void *), void *data) +{ + struct proc_dir_entry *p; + + p = proc_create_reg(name, mode, &parent, data); + if (!p) + return NULL; + p->proc_fops = &proc_net_single_fops; + p->single_show = show; + return proc_register(parent, p); +} +EXPORT_SYMBOL_GPL(proc_create_net_single); static struct net *get_proc_task_net(struct inode *dir) { diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8989936f2995..4d765e5e91ed 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -554,9 +554,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, if (!inode) goto out; - err = NULL; d_set_d_op(dentry, &proc_sys_dentry_operations); - d_add(dentry, inode); + err = d_splice_alias(inode, dentry); out: if (h) @@ -684,6 +683,7 @@ static bool proc_sys_fill_cache(struct file *file, if (IS_ERR(child)) return false; if (d_in_lookup(child)) { + struct dentry *res; inode = proc_sys_make_inode(dir->d_sb, head, table); if (!inode) { d_lookup_done(child); @@ -691,7 +691,16 @@ static bool proc_sys_fill_cache(struct file *file, return false; } d_set_d_op(child, &proc_sys_dentry_operations); - d_add(child, inode); + res = d_splice_alias(inode, child); + d_lookup_done(child); + if (unlikely(res)) { + if (IS_ERR(res)) { + dput(child); + return false; + } + dput(child); + child = res; + } } } inode = d_inode(child); diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index d0cf1c50bb6c..c69ff191e5d8 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -126,18 +126,6 @@ static const struct seq_operations tty_drivers_op = { .show = show_tty_driver }; -static int tty_drivers_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tty_drivers_op); -} - -static const struct file_operations proc_tty_drivers_operations = { - .open = tty_drivers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> @@ -147,11 +135,11 @@ void proc_tty_register_driver(struct tty_driver *driver) struct proc_dir_entry *ent; if (!driver->driver_name || driver->proc_entry || - !driver->ops->proc_fops) + !driver->ops->proc_show) return; - ent = proc_create_data(driver->driver_name, 0, proc_tty_driver, - driver->ops->proc_fops, driver); + ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver, + driver->ops->proc_show, driver); driver->proc_entry = ent; } @@ -186,6 +174,6 @@ void __init proc_tty_init(void) * entry. */ proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL); - proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops); - proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); + proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops); + proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op); } diff --git a/fs/proc/self.c b/fs/proc/self.c index 4d7d061696b3..127265e5c55f 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); pid_t tgid = task_tgid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = s->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(root_inode); struct dentry *self; inode_lock(root_inode); diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 24072cc06e65..12901dcf57e2 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -25,21 +25,9 @@ static int show_softirqs(struct seq_file *p, void *v) return 0; } -static int softirqs_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_softirqs, NULL); -} - -static const struct file_operations proc_softirqs_operations = { - .open = softirqs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_softirqs_init(void) { - proc_create("softirqs", 0, NULL, &proc_softirqs_operations); + proc_create_single("softirqs", 0, NULL, show_softirqs); return 0; } fs_initcall(proc_softirqs_init); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c486ad4b43f0..597969db9e90 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -18,6 +18,7 @@ #include <linux/page_idle.h> #include <linux/shmem_fs.h> #include <linux/uaccess.h> +#include <linux/pkeys.h> #include <asm/elf.h> #include <asm/tlb.h> @@ -673,13 +674,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", [ilog2(VM_PKEY_BIT1)] = "", [ilog2(VM_PKEY_BIT2)] = "", [ilog2(VM_PKEY_BIT3)] = "", +#if VM_PKEY_BIT4 + [ilog2(VM_PKEY_BIT4)] = "", #endif +#endif /* CONFIG_ARCH_HAS_PKEYS */ }; size_t i; @@ -727,10 +731,6 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, } #endif /* HUGETLB_PAGE */ -void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma) -{ -} - #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) >> 10, 8) static int show_smap(struct seq_file *m, void *v, int is_pid) @@ -835,7 +835,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) seq_puts(m, " kB\n"); } if (!rollup_mode) { - arch_show_smap(m, vma); + if (arch_pkeys_enabled()) + seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); show_smap_vma_flags(m, vma); } m_cache_vma(m, vma); @@ -937,7 +938,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the - * Documentation/vm/soft-dirty.txt for full description + * Documentation/admin-guide/mm/soft-dirty.rst for full description * of how soft-dirty works. */ pte_t ptent = *pte; @@ -1258,8 +1259,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; entry = pte_to_swp_entry(pte); - frame = swp_type(entry) | - (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + if (pm->show_pfn) + frame = swp_type(entry) | + (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; if (is_migration_entry(entry)) page = migration_entry_to_page(entry); @@ -1310,11 +1312,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION else if (is_swap_pmd(pmd)) { swp_entry_t entry = pmd_to_swp_entry(pmd); - unsigned long offset = swp_offset(entry); + unsigned long offset; - offset += (addr & ~PMD_MASK) >> PAGE_SHIFT; - frame = swp_type(entry) | - (offset << MAX_SWAPFILES_SHIFT); + if (pm->show_pfn) { + offset = swp_offset(entry) + + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + frame = swp_type(entry) | + (offset << MAX_SWAPFILES_SHIFT); + } flags |= PM_SWAP; if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; @@ -1332,10 +1337,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, err = add_to_pagemap(addr, &pme, pm); if (err) break; - if (pm->show_pfn && (flags & PM_PRESENT)) - frame++; - else if (flags & PM_SWAP) - frame += (1 << MAX_SWAPFILES_SHIFT); + if (pm->show_pfn) { + if (flags & PM_PRESENT) + frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); + } } spin_unlock(ptl); return err; @@ -1421,7 +1428,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask, * Bits 0-54 page frame number (PFN) if present * Bits 0-4 swap type if swapped * Bits 5-54 swap offset if swapped - * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt) + * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 56 page exclusively mapped * Bits 57-60 zero * Bit 61 page is file-page or shared-anon diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 9d2efaca499f..b905010ca9eb 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = inode->i_sb->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(inode); pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = s->s_fs_info; + struct pid_namespace *ns = proc_pid_ns(root_inode); struct dentry *thread_self; inode_lock(root_inode); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 95a708d83721..3bd12f955867 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -30,21 +30,9 @@ static int uptime_proc_show(struct seq_file *m, void *v) return 0; } -static int uptime_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, uptime_proc_show, NULL); -} - -static const struct file_operations uptime_proc_fops = { - .open = uptime_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_uptime_init(void) { - proc_create("uptime", 0, NULL, &uptime_proc_fops); + proc_create_single("uptime", 0, NULL, uptime_proc_show); return 0; } fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index 94901e8e700d..b449f186577f 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -15,21 +15,9 @@ static int version_proc_show(struct seq_file *m, void *v) return 0; } -static int version_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, version_proc_show, NULL); -} - -static const struct file_operations version_proc_fops = { - .open = version_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_version_init(void) { - proc_create("version", 0, NULL, &version_proc_fops); + proc_create_single("version", 0, NULL, version_proc_show); return 0; } fs_initcall(proc_version_init); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a45f0af22a60..cfb6674331fd 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/crash_dump.h> #include <linux/list.h> +#include <linux/mutex.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> #include <linux/uaccess.h> @@ -38,12 +39,23 @@ static size_t elfcorebuf_sz_orig; static char *elfnotes_buf; static size_t elfnotes_sz; +/* Size of all notes minus the device dump notes */ +static size_t elfnotes_orig_sz; /* Total size of vmcore file. */ static u64 vmcore_size; static struct proc_dir_entry *proc_vmcore; +#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP +/* Device Dump list and mutex to synchronize access to list */ +static LIST_HEAD(vmcoredd_list); +static DEFINE_MUTEX(vmcoredd_mutex); +#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ + +/* Device Dump Size */ +static size_t vmcoredd_orig_sz; + /* * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error * The called function has to take care of module refcounting. @@ -178,6 +190,77 @@ static int copy_to(void *target, void *src, size_t size, int userbuf) return 0; } +#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP +static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf) +{ + struct vmcoredd_node *dump; + u64 offset = 0; + int ret = 0; + size_t tsz; + char *buf; + + mutex_lock(&vmcoredd_mutex); + list_for_each_entry(dump, &vmcoredd_list, list) { + if (start < offset + dump->size) { + tsz = min(offset + (u64)dump->size - start, (u64)size); + buf = dump->buf + start - offset; + if (copy_to(dst, buf, tsz, userbuf)) { + ret = -EFAULT; + goto out_unlock; + } + + size -= tsz; + start += tsz; + dst += tsz; + + /* Leave now if buffer filled already */ + if (!size) + goto out_unlock; + } + offset += dump->size; + } + +out_unlock: + mutex_unlock(&vmcoredd_mutex); + return ret; +} + +static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst, + u64 start, size_t size) +{ + struct vmcoredd_node *dump; + u64 offset = 0; + int ret = 0; + size_t tsz; + char *buf; + + mutex_lock(&vmcoredd_mutex); + list_for_each_entry(dump, &vmcoredd_list, list) { + if (start < offset + dump->size) { + tsz = min(offset + (u64)dump->size - start, (u64)size); + buf = dump->buf + start - offset; + if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) { + ret = -EFAULT; + goto out_unlock; + } + + size -= tsz; + start += tsz; + dst += tsz; + + /* Leave now if buffer filled already */ + if (!size) + goto out_unlock; + } + offset += dump->size; + } + +out_unlock: + mutex_unlock(&vmcoredd_mutex); + return ret; +} +#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ + /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ @@ -215,10 +298,41 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, if (*fpos < elfcorebuf_sz + elfnotes_sz) { void *kaddr; + /* We add device dumps before other elf notes because the + * other elf notes may not fill the elf notes buffer + * completely and we will end up with zero-filled data + * between the elf notes and the device dumps. Tools will + * then try to decode this zero-filled data as valid notes + * and we don't want that. Hence, adding device dumps before + * the other elf notes ensure that zero-filled data can be + * avoided. + */ +#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP + /* Read device dumps */ + if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) { + tsz = min(elfcorebuf_sz + vmcoredd_orig_sz - + (size_t)*fpos, buflen); + start = *fpos - elfcorebuf_sz; + if (vmcoredd_copy_dumps(buffer, start, tsz, userbuf)) + return -EFAULT; + + buflen -= tsz; + *fpos += tsz; + buffer += tsz; + acc += tsz; + + /* leave now if filled buffer already */ + if (!buflen) + return acc; + } +#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ + + /* Read remaining elf notes */ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); - kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; + kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz; if (copy_to(buffer, kaddr, tsz, userbuf)) return -EFAULT; + buflen -= tsz; *fpos += tsz; buffer += tsz; @@ -302,10 +416,8 @@ static const struct vm_operations_struct vmcore_mmap_ops = { }; /** - * alloc_elfnotes_buf - allocate buffer for ELF note segment in - * vmalloc memory - * - * @notes_sz: size of buffer + * vmcore_alloc_buf - allocate buffer in vmalloc memory + * @sizez: size of buffer * * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap * the buffer to user-space by means of remap_vmalloc_range(). @@ -313,12 +425,12 @@ static const struct vm_operations_struct vmcore_mmap_ops = { * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is * disabled and there's no need to allow users to mmap the buffer. */ -static inline char *alloc_elfnotes_buf(size_t notes_sz) +static inline char *vmcore_alloc_buf(size_t size) { #ifdef CONFIG_MMU - return vmalloc_user(notes_sz); + return vmalloc_user(size); #else - return vzalloc(notes_sz); + return vzalloc(size); #endif } @@ -446,11 +558,46 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) if (start < elfcorebuf_sz + elfnotes_sz) { void *kaddr; + /* We add device dumps before other elf notes because the + * other elf notes may not fill the elf notes buffer + * completely and we will end up with zero-filled data + * between the elf notes and the device dumps. Tools will + * then try to decode this zero-filled data as valid notes + * and we don't want that. Hence, adding device dumps before + * the other elf notes ensure that zero-filled data can be + * avoided. This also ensures that the device dumps and + * other elf notes can be properly mmaped at page aligned + * address. + */ +#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP + /* Read device dumps */ + if (start < elfcorebuf_sz + vmcoredd_orig_sz) { + u64 start_off; + + tsz = min(elfcorebuf_sz + vmcoredd_orig_sz - + (size_t)start, size); + start_off = start - elfcorebuf_sz; + if (vmcoredd_mmap_dumps(vma, vma->vm_start + len, + start_off, tsz)) + goto fail; + + size -= tsz; + start += tsz; + len += tsz; + + /* leave now if filled buffer already */ + if (!size) + return 0; + } +#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ + + /* Read remaining elf notes */ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); - kaddr = elfnotes_buf + start - elfcorebuf_sz; + kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, kaddr, tsz)) goto fail; + size -= tsz; start += tsz; len += tsz; @@ -502,8 +649,8 @@ static struct vmcore* __init get_new_element(void) return kzalloc(sizeof(struct vmcore), GFP_KERNEL); } -static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, - struct list_head *vc_list) +static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz, + struct list_head *vc_list) { u64 size; struct vmcore *m; @@ -665,7 +812,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, return rc; *notes_sz = roundup(phdr_sz, PAGE_SIZE); - *notes_buf = alloc_elfnotes_buf(*notes_sz); + *notes_buf = vmcore_alloc_buf(*notes_sz); if (!*notes_buf) return -ENOMEM; @@ -698,6 +845,11 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, /* Modify e_phnum to reflect merged headers. */ ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; + /* Store the size of all notes. We need this to update the note + * header when the device dumps will be added. + */ + elfnotes_orig_sz = phdr.p_memsz; + return 0; } @@ -851,7 +1003,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, return rc; *notes_sz = roundup(phdr_sz, PAGE_SIZE); - *notes_buf = alloc_elfnotes_buf(*notes_sz); + *notes_buf = vmcore_alloc_buf(*notes_sz); if (!*notes_buf) return -ENOMEM; @@ -884,6 +1036,11 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, /* Modify e_phnum to reflect merged headers. */ ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; + /* Store the size of all notes. We need this to update the note + * header when the device dumps will be added. + */ + elfnotes_orig_sz = phdr.p_memsz; + return 0; } @@ -976,8 +1133,8 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, } /* Sets offset fields of vmcore elements. */ -static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, - struct list_head *vc_list) +static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, + struct list_head *vc_list) { loff_t vmcore_off; struct vmcore *m; @@ -1145,6 +1302,202 @@ static int __init parse_crash_elf_headers(void) return 0; } +#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP +/** + * vmcoredd_write_header - Write vmcore device dump header at the + * beginning of the dump's buffer. + * @buf: Output buffer where the note is written + * @data: Dump info + * @size: Size of the dump + * + * Fills beginning of the dump's buffer with vmcore device dump header. + */ +static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data, + u32 size) +{ + struct vmcoredd_header *vdd_hdr = (struct vmcoredd_header *)buf; + + vdd_hdr->n_namesz = sizeof(vdd_hdr->name); + vdd_hdr->n_descsz = size + sizeof(vdd_hdr->dump_name); + vdd_hdr->n_type = NT_VMCOREDD; + + strncpy((char *)vdd_hdr->name, VMCOREDD_NOTE_NAME, + sizeof(vdd_hdr->name)); + memcpy(vdd_hdr->dump_name, data->dump_name, sizeof(vdd_hdr->dump_name)); +} + +/** + * vmcoredd_update_program_headers - Update all Elf program headers + * @elfptr: Pointer to elf header + * @elfnotesz: Size of elf notes aligned to page size + * @vmcoreddsz: Size of device dumps to be added to elf note header + * + * Determine type of Elf header (Elf64 or Elf32) and update the elf note size. + * Also update the offsets of all the program headers after the elf note header. + */ +static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz, + size_t vmcoreddsz) +{ + unsigned char *e_ident = (unsigned char *)elfptr; + u64 start, end, size; + loff_t vmcore_off; + u32 i; + + vmcore_off = elfcorebuf_sz + elfnotesz; + + if (e_ident[EI_CLASS] == ELFCLASS64) { + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfptr; + Elf64_Phdr *phdr = (Elf64_Phdr *)(elfptr + sizeof(Elf64_Ehdr)); + + /* Update all program headers */ + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type == PT_NOTE) { + /* Update note size */ + phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz; + phdr->p_filesz = phdr->p_memsz; + continue; + } + + start = rounddown(phdr->p_offset, PAGE_SIZE); + end = roundup(phdr->p_offset + phdr->p_memsz, + PAGE_SIZE); + size = end - start; + phdr->p_offset = vmcore_off + (phdr->p_offset - start); + vmcore_off += size; + } + } else { + Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfptr; + Elf32_Phdr *phdr = (Elf32_Phdr *)(elfptr + sizeof(Elf32_Ehdr)); + + /* Update all program headers */ + for (i = 0; i < ehdr->e_phnum; i++, phdr++) { + if (phdr->p_type == PT_NOTE) { + /* Update note size */ + phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz; + phdr->p_filesz = phdr->p_memsz; + continue; + } + + start = rounddown(phdr->p_offset, PAGE_SIZE); + end = roundup(phdr->p_offset + phdr->p_memsz, + PAGE_SIZE); + size = end - start; + phdr->p_offset = vmcore_off + (phdr->p_offset - start); + vmcore_off += size; + } + } +} + +/** + * vmcoredd_update_size - Update the total size of the device dumps and update + * Elf header + * @dump_size: Size of the current device dump to be added to total size + * + * Update the total size of all the device dumps and update the Elf program + * headers. Calculate the new offsets for the vmcore list and update the + * total vmcore size. + */ +static void vmcoredd_update_size(size_t dump_size) +{ + vmcoredd_orig_sz += dump_size; + elfnotes_sz = roundup(elfnotes_orig_sz, PAGE_SIZE) + vmcoredd_orig_sz; + vmcoredd_update_program_headers(elfcorebuf, elfnotes_sz, + vmcoredd_orig_sz); + + /* Update vmcore list offsets */ + set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); + + vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, + &vmcore_list); + proc_vmcore->size = vmcore_size; +} + +/** + * vmcore_add_device_dump - Add a buffer containing device dump to vmcore + * @data: dump info. + * + * Allocate a buffer and invoke the calling driver's dump collect routine. + * Write Elf note at the beginning of the buffer to indicate vmcore device + * dump and add the dump to global list. + */ +int vmcore_add_device_dump(struct vmcoredd_data *data) +{ + struct vmcoredd_node *dump; + void *buf = NULL; + size_t data_size; + int ret; + + if (!data || !strlen(data->dump_name) || + !data->vmcoredd_callback || !data->size) + return -EINVAL; + + dump = vzalloc(sizeof(*dump)); + if (!dump) { + ret = -ENOMEM; + goto out_err; + } + + /* Keep size of the buffer page aligned so that it can be mmaped */ + data_size = roundup(sizeof(struct vmcoredd_header) + data->size, + PAGE_SIZE); + + /* Allocate buffer for driver's to write their dumps */ + buf = vmcore_alloc_buf(data_size); + if (!buf) { + ret = -ENOMEM; + goto out_err; + } + + vmcoredd_write_header(buf, data, data_size - + sizeof(struct vmcoredd_header)); + + /* Invoke the driver's dump collection routing */ + ret = data->vmcoredd_callback(data, buf + + sizeof(struct vmcoredd_header)); + if (ret) + goto out_err; + + dump->buf = buf; + dump->size = data_size; + + /* Add the dump to driver sysfs list */ + mutex_lock(&vmcoredd_mutex); + list_add_tail(&dump->list, &vmcoredd_list); + mutex_unlock(&vmcoredd_mutex); + + vmcoredd_update_size(data_size); + return 0; + +out_err: + if (buf) + vfree(buf); + + if (dump) + vfree(dump); + + return ret; +} +EXPORT_SYMBOL(vmcore_add_device_dump); +#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ + +/* Free all dumps in vmcore device dump list */ +static void vmcore_free_device_dumps(void) +{ +#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP + mutex_lock(&vmcoredd_mutex); + while (!list_empty(&vmcoredd_list)) { + struct vmcoredd_node *dump; + + dump = list_first_entry(&vmcoredd_list, struct vmcoredd_node, + list); + list_del(&dump->list); + vfree(dump->buf); + vfree(dump); + } + mutex_unlock(&vmcoredd_mutex); +#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ +} + /* Init function for vmcore module. */ static int __init vmcore_init(void) { @@ -1192,4 +1545,7 @@ void vmcore_cleanup(void) kfree(m); } free_elfcorebuf(); + + /* clear vmcore device dump list */ + vmcore_free_device_dumps(); } |