diff options
Diffstat (limited to 'fs/coredump.c')
-rw-r--r-- | fs/coredump.c | 218 |
1 files changed, 170 insertions, 48 deletions
diff --git a/fs/coredump.c b/fs/coredump.c index 9d235fa14ab9..64894ba6efca 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -18,6 +18,7 @@ #include <linux/personality.h> #include <linux/binfmts.h> #include <linux/coredump.h> +#include <linux/sort.h> #include <linux/sched/coredump.h> #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> @@ -42,6 +43,8 @@ #include <linux/timekeeping.h> #include <linux/sysctl.h> #include <linux/elf.h> +#include <linux/pidfs.h> +#include <uapi/linux/pidfd.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -56,10 +59,22 @@ static bool dump_vma_snapshot(struct coredump_params *cprm); static void free_vma_snapshot(struct coredump_params *cprm); +#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) +/* Define a reasonable max cap */ +#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) +/* + * File descriptor number for the pidfd for the thread-group leader of + * the coredumping task installed into the usermode helper's file + * descriptor table. + */ +#define COREDUMP_PIDFD_NUMBER 3 + static int core_uses_pid; static unsigned int core_pipe_limit; +static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; +unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; struct core_name { char *corename; @@ -332,6 +347,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, case 'C': err = cn_printf(cn, "%d", cprm->cpu); break; + /* pidfd number */ + case 'F': { + /* + * Installing a pidfd only makes sense if + * we actually spawn a usermode helper. + */ + if (!ispipe) + break; + + /* + * Note that we'll install a pidfd for the + * thread-group leader. We know that task + * linkage hasn't been removed yet and even if + * this @current isn't the actual thread-group + * leader we know that the thread-group leader + * cannot be reaped until @current has exited. + */ + cprm->pid = task_tgid(current); + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); + break; + } default: break; } @@ -356,24 +392,21 @@ out: return ispipe; } -static int zap_process(struct task_struct *start, int exit_code) +static int zap_process(struct signal_struct *signal, int exit_code) { struct task_struct *t; int nr = 0; - /* Allow SIGKILL, see prepare_signal() */ - start->signal->flags = SIGNAL_GROUP_EXIT; - start->signal->group_exit_code = exit_code; - start->signal->group_stop_count = 0; + signal->flags = SIGNAL_GROUP_EXIT; + signal->group_exit_code = exit_code; + signal->group_stop_count = 0; - for_each_thread(start, t) { + __for_each_thread(signal, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); - /* The vhost_worker does not particpate in coredumps */ - if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER) - nr++; + nr++; } } @@ -388,8 +421,9 @@ static int zap_threads(struct task_struct *tsk, spin_lock_irq(&tsk->sighand->siglock); if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) { + /* Allow SIGKILL, see prepare_signal() */ signal->core_state = core_state; - nr = zap_process(tsk, exit_code); + nr = zap_process(signal, exit_code); clear_tsk_thread_flag(tsk, TIF_SIGPENDING); tsk->flags |= PF_DUMPCORE; atomic_set(&core_state->nr_threads, nr); @@ -488,7 +522,7 @@ static void wait_for_dump_helpers(struct file *file) } /* - * umh_pipe_setup + * umh_coredump_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) @@ -498,11 +532,32 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; - int err = create_pipe_files(files, 0); + int err; + + if (cp->pid) { + struct file *pidfs_file __free(fput) = NULL; + + pidfs_file = pidfs_alloc_file(cp->pid, O_RDWR); + if (IS_ERR(pidfs_file)) + return PTR_ERR(pidfs_file); + + /* + * Usermode helpers are childen of either + * system_unbound_wq or of kthreadd. So we know that + * we're starting off with a clean file descriptor + * table. So we should always be able to use + * COREDUMP_PIDFD_NUMBER as our file descriptor value. + */ + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); + if (err < 0) + return err; + } + + err = create_pipe_files(files, 0); if (err) return err; @@ -510,10 +565,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) err = replace_fd(0, files[0], 0); fput(files[0]); + if (err < 0) + return err; + /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - return err; + return 0; } void do_coredump(const kernel_siginfo_t *siginfo) @@ -583,13 +641,12 @@ void do_coredump(const kernel_siginfo_t *siginfo) struct subprocess_info *sub_info; if (ispipe < 0) { - printk(KERN_WARNING "format_corename failed\n"); - printk(KERN_WARNING "Aborting core\n"); + coredump_report_failure("format_corename failed, aborting core"); goto fail_unlock; } if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use @@ -604,27 +661,21 @@ void do_coredump(const kernel_siginfo_t *siginfo) * right pid if a thread in a multi-threaded * core_pattern process dies. */ - printk(KERN_WARNING - "Process %d(%s) has RLIMIT_CORE set to 1\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Aborting core\n"); + coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); goto fail_unlock; } cprm.limit = RLIM_INFINITY; dump_count = atomic_inc_return(&core_dump_count); if (core_pipe_limit && (core_pipe_limit < dump_count)) { - printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Skipping core dump\n"); + coredump_report_failure("over core_pipe_limit, skipping core dump"); goto fail_dropcount; } helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL); if (!helper_argv) { - printk(KERN_WARNING "%s failed to allocate memory\n", - __func__); + coredump_report_failure("%s failed to allocate memory", __func__); goto fail_dropcount; } for (argi = 0; argi < argc; argi++) @@ -634,15 +685,14 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, GFP_KERNEL, - umh_pipe_setup, NULL, &cprm); + umh_coredump_setup, NULL, &cprm); if (sub_info) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); kfree(helper_argv); if (retval) { - printk(KERN_INFO "Core dump to |%s pipe failed\n", - cn.corename); + coredump_report_failure("|%s pipe failed", cn.corename); goto close_fail; } } else { @@ -655,10 +705,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) goto fail_unlock; if (need_suid_safe && cn.corename[0] != '/') { - printk(KERN_WARNING "Pid %d(%s) can only dump core "\ - "to fully qualified path!\n", - task_tgid_vnr(current), current->comm); - printk(KERN_WARNING "Skipping core dump\n"); + coredump_report_failure( + "this process can only dump core to a fully qualified path, skipping core dump"); goto fail_unlock; } @@ -727,13 +775,13 @@ void do_coredump(const kernel_siginfo_t *siginfo) idmap = file_mnt_idmap(cprm.file); if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) { - pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n", - cn.corename); + coredump_report_failure("Core dump to %s aborted: " + "cannot preserve file owner", cn.corename); goto close_fail; } if ((inode->i_mode & 0677) != 0600) { - pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n", - cn.corename); + coredump_report_failure("Core dump to %s aborted: " + "cannot preserve file permissions", cn.corename); goto close_fail; } if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) @@ -754,7 +802,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) * have this set to NULL. */ if (!cprm.file) { - pr_info("Core dump to |%s disabled\n", cn.corename); + coredump_report_failure("Core dump to |%s disabled", cn.corename); goto close_fail; } if (!dump_vma_snapshot(&cprm)) @@ -872,6 +920,9 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) loff_t pos; ssize_t n; + if (!page) + return 0; + if (cprm->to_skip) { if (!__dump_skip(cprm, cprm->to_skip)) return 0; @@ -884,7 +935,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) pos = file->f_pos; bvec_set_page(&bvec, page, PAGE_SIZE, 0); iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); - iov_iter_set_copy_mc(&iter); n = __kernel_write_iter(cprm->file, &iter, &pos); if (n != PAGE_SIZE) return 0; @@ -895,10 +945,44 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) return 1; } +/* + * If we might get machine checks from kernel accesses during the + * core dump, let's get those errors early rather than during the + * IO. This is not performance-critical enough to warrant having + * all the machine check logic in the iovec paths. + */ +#ifdef copy_mc_to_kernel + +#define dump_page_alloc() alloc_page(GFP_KERNEL) +#define dump_page_free(x) __free_page(x) +static struct page *dump_page_copy(struct page *src, struct page *dst) +{ + void *buf = kmap_local_page(src); + size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE); + kunmap_local(buf); + return left ? NULL : dst; +} + +#else + +/* We just want to return non-NULL; it's never used. */ +#define dump_page_alloc() ERR_PTR(-EINVAL) +#define dump_page_free(x) ((void)(x)) +static inline struct page *dump_page_copy(struct page *src, struct page *dst) +{ + return src; +} +#endif + int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len) { unsigned long addr; + struct page *dump_page; + + dump_page = dump_page_alloc(); + if (!dump_page) + return 0; for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page; @@ -912,14 +996,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, */ page = get_dump_page(addr); if (page) { - int stop = !dump_emit_page(cprm, page); + int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); put_page(page); - if (stop) + if (stop) { + dump_page_free(dump_page); return 0; + } } else { dump_skip(cprm, PAGE_SIZE); } } + dump_page_free(dump_page); return 1; } #endif @@ -941,15 +1028,14 @@ void validate_coredump_safety(void) { if (suid_dumpable == SUID_DUMP_ROOT && core_pattern[0] != '/' && core_pattern[0] != '|') { - pr_warn( -"Unsafe core_pattern used with fs.suid_dumpable=2.\n" -"Pipe handler or fully qualified core dump path required.\n" -"Set kernel.core_pattern before fs.suid_dumpable.\n" - ); + + coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: " + "pipe handler or fully qualified core dump path required. " + "Set kernel.core_pattern before fs.suid_dumpable."); } } -static int proc_dostring_coredump(struct ctl_table *table, int write, +static int proc_dostring_coredump(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dostring(table, write, buffer, lenp, ppos); @@ -959,6 +1045,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, return error; } +static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT; +static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX; + static struct ctl_table coredump_sysctls[] = { { .procname = "core_uses_pid", @@ -981,7 +1070,24 @@ static struct ctl_table coredump_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { } + { + .procname = "core_file_note_size_limit", + .data = &core_file_note_size_limit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (unsigned int *)&core_file_note_size_min, + .extra2 = (unsigned int *)&core_file_note_size_max, + }, + { + .procname = "core_sort_vma", + .data = &core_sort_vma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __init init_fs_coredump_sysctls(void) @@ -1138,6 +1244,18 @@ static void free_vma_snapshot(struct coredump_params *cprm) } } +static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr) +{ + const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr; + const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr; + + if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size) + return -1; + if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size) + return 1; + return 0; +} + /* * Under the mmap_lock, take a snapshot of relevant information about the task's * VMAs. @@ -1200,5 +1318,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm) cprm->vma_data_size += m->dump_size; } + if (core_sort_vma) + sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta), + cmp_vma_size, NULL); + return true; } |