summaryrefslogtreecommitdiff
path: root/fs/coredump.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/coredump.c')
-rw-r--r--fs/coredump.c218
1 files changed, 170 insertions, 48 deletions
diff --git a/fs/coredump.c b/fs/coredump.c
index 9d235fa14ab9..64894ba6efca 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -18,6 +18,7 @@
#include <linux/personality.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
+#include <linux/sort.h>
#include <linux/sched/coredump.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
@@ -42,6 +43,8 @@
#include <linux/timekeeping.h>
#include <linux/sysctl.h>
#include <linux/elf.h>
+#include <linux/pidfs.h>
+#include <uapi/linux/pidfd.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -56,10 +59,22 @@
static bool dump_vma_snapshot(struct coredump_params *cprm);
static void free_vma_snapshot(struct coredump_params *cprm);
+#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
+/* Define a reasonable max cap */
+#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
+/*
+ * File descriptor number for the pidfd for the thread-group leader of
+ * the coredumping task installed into the usermode helper's file
+ * descriptor table.
+ */
+#define COREDUMP_PIDFD_NUMBER 3
+
static int core_uses_pid;
static unsigned int core_pipe_limit;
+static unsigned int core_sort_vma;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
+unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT;
struct core_name {
char *corename;
@@ -332,6 +347,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
case 'C':
err = cn_printf(cn, "%d", cprm->cpu);
break;
+ /* pidfd number */
+ case 'F': {
+ /*
+ * Installing a pidfd only makes sense if
+ * we actually spawn a usermode helper.
+ */
+ if (!ispipe)
+ break;
+
+ /*
+ * Note that we'll install a pidfd for the
+ * thread-group leader. We know that task
+ * linkage hasn't been removed yet and even if
+ * this @current isn't the actual thread-group
+ * leader we know that the thread-group leader
+ * cannot be reaped until @current has exited.
+ */
+ cprm->pid = task_tgid(current);
+ err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER);
+ break;
+ }
default:
break;
}
@@ -356,24 +392,21 @@ out:
return ispipe;
}
-static int zap_process(struct task_struct *start, int exit_code)
+static int zap_process(struct signal_struct *signal, int exit_code)
{
struct task_struct *t;
int nr = 0;
- /* Allow SIGKILL, see prepare_signal() */
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
+ signal->flags = SIGNAL_GROUP_EXIT;
+ signal->group_exit_code = exit_code;
+ signal->group_stop_count = 0;
- for_each_thread(start, t) {
+ __for_each_thread(signal, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
- /* The vhost_worker does not particpate in coredumps */
- if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER)
- nr++;
+ nr++;
}
}
@@ -388,8 +421,9 @@ static int zap_threads(struct task_struct *tsk,
spin_lock_irq(&tsk->sighand->siglock);
if (!(signal->flags & SIGNAL_GROUP_EXIT) && !signal->group_exec_task) {
+ /* Allow SIGKILL, see prepare_signal() */
signal->core_state = core_state;
- nr = zap_process(tsk, exit_code);
+ nr = zap_process(signal, exit_code);
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
tsk->flags |= PF_DUMPCORE;
atomic_set(&core_state->nr_threads, nr);
@@ -488,7 +522,7 @@ static void wait_for_dump_helpers(struct file *file)
}
/*
- * umh_pipe_setup
+ * umh_coredump_setup
* helper function to customize the process used
* to collect the core in userspace. Specifically
* it sets up a pipe and installs it as fd 0 (stdin)
@@ -498,11 +532,32 @@ static void wait_for_dump_helpers(struct file *file)
* is a special value that we use to trap recursive
* core dumps
*/
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
{
struct file *files[2];
struct coredump_params *cp = (struct coredump_params *)info->data;
- int err = create_pipe_files(files, 0);
+ int err;
+
+ if (cp->pid) {
+ struct file *pidfs_file __free(fput) = NULL;
+
+ pidfs_file = pidfs_alloc_file(cp->pid, O_RDWR);
+ if (IS_ERR(pidfs_file))
+ return PTR_ERR(pidfs_file);
+
+ /*
+ * Usermode helpers are childen of either
+ * system_unbound_wq or of kthreadd. So we know that
+ * we're starting off with a clean file descriptor
+ * table. So we should always be able to use
+ * COREDUMP_PIDFD_NUMBER as our file descriptor value.
+ */
+ err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0);
+ if (err < 0)
+ return err;
+ }
+
+ err = create_pipe_files(files, 0);
if (err)
return err;
@@ -510,10 +565,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
err = replace_fd(0, files[0], 0);
fput(files[0]);
+ if (err < 0)
+ return err;
+
/* and disallow core files too */
current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
- return err;
+ return 0;
}
void do_coredump(const kernel_siginfo_t *siginfo)
@@ -583,13 +641,12 @@ void do_coredump(const kernel_siginfo_t *siginfo)
struct subprocess_info *sub_info;
if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
+ coredump_report_failure("format_corename failed, aborting core");
goto fail_unlock;
}
if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ /* See umh_coredump_setup() which sets RLIMIT_CORE = 1.
*
* Normally core limits are irrelevant to pipes, since
* we're not writing to the file system, but we use
@@ -604,27 +661,21 @@ void do_coredump(const kernel_siginfo_t *siginfo)
* right pid if a thread in a multi-threaded
* core_pattern process dies.
*/
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
+ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core");
goto fail_unlock;
}
cprm.limit = RLIM_INFINITY;
dump_count = atomic_inc_return(&core_dump_count);
if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
+ coredump_report_failure("over core_pipe_limit, skipping core dump");
goto fail_dropcount;
}
helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
GFP_KERNEL);
if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
+ coredump_report_failure("%s failed to allocate memory", __func__);
goto fail_dropcount;
}
for (argi = 0; argi < argc; argi++)
@@ -634,15 +685,14 @@ void do_coredump(const kernel_siginfo_t *siginfo)
retval = -ENOMEM;
sub_info = call_usermodehelper_setup(helper_argv[0],
helper_argv, NULL, GFP_KERNEL,
- umh_pipe_setup, NULL, &cprm);
+ umh_coredump_setup, NULL, &cprm);
if (sub_info)
retval = call_usermodehelper_exec(sub_info,
UMH_WAIT_EXEC);
kfree(helper_argv);
if (retval) {
- printk(KERN_INFO "Core dump to |%s pipe failed\n",
- cn.corename);
+ coredump_report_failure("|%s pipe failed", cn.corename);
goto close_fail;
}
} else {
@@ -655,10 +705,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
goto fail_unlock;
if (need_suid_safe && cn.corename[0] != '/') {
- printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
+ coredump_report_failure(
+ "this process can only dump core to a fully qualified path, skipping core dump");
goto fail_unlock;
}
@@ -727,13 +775,13 @@ void do_coredump(const kernel_siginfo_t *siginfo)
idmap = file_mnt_idmap(cprm.file);
if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
current_fsuid())) {
- pr_info_ratelimited("Core dump to %s aborted: cannot preserve file owner\n",
- cn.corename);
+ coredump_report_failure("Core dump to %s aborted: "
+ "cannot preserve file owner", cn.corename);
goto close_fail;
}
if ((inode->i_mode & 0677) != 0600) {
- pr_info_ratelimited("Core dump to %s aborted: cannot preserve file permissions\n",
- cn.corename);
+ coredump_report_failure("Core dump to %s aborted: "
+ "cannot preserve file permissions", cn.corename);
goto close_fail;
}
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
@@ -754,7 +802,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
* have this set to NULL.
*/
if (!cprm.file) {
- pr_info("Core dump to |%s disabled\n", cn.corename);
+ coredump_report_failure("Core dump to |%s disabled", cn.corename);
goto close_fail;
}
if (!dump_vma_snapshot(&cprm))
@@ -872,6 +920,9 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
loff_t pos;
ssize_t n;
+ if (!page)
+ return 0;
+
if (cprm->to_skip) {
if (!__dump_skip(cprm, cprm->to_skip))
return 0;
@@ -884,7 +935,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
pos = file->f_pos;
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
- iov_iter_set_copy_mc(&iter);
n = __kernel_write_iter(cprm->file, &iter, &pos);
if (n != PAGE_SIZE)
return 0;
@@ -895,10 +945,44 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
return 1;
}
+/*
+ * If we might get machine checks from kernel accesses during the
+ * core dump, let's get those errors early rather than during the
+ * IO. This is not performance-critical enough to warrant having
+ * all the machine check logic in the iovec paths.
+ */
+#ifdef copy_mc_to_kernel
+
+#define dump_page_alloc() alloc_page(GFP_KERNEL)
+#define dump_page_free(x) __free_page(x)
+static struct page *dump_page_copy(struct page *src, struct page *dst)
+{
+ void *buf = kmap_local_page(src);
+ size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE);
+ kunmap_local(buf);
+ return left ? NULL : dst;
+}
+
+#else
+
+/* We just want to return non-NULL; it's never used. */
+#define dump_page_alloc() ERR_PTR(-EINVAL)
+#define dump_page_free(x) ((void)(x))
+static inline struct page *dump_page_copy(struct page *src, struct page *dst)
+{
+ return src;
+}
+#endif
+
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len)
{
unsigned long addr;
+ struct page *dump_page;
+
+ dump_page = dump_page_alloc();
+ if (!dump_page)
+ return 0;
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page;
@@ -912,14 +996,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- int stop = !dump_emit_page(cprm, page);
+ int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
put_page(page);
- if (stop)
+ if (stop) {
+ dump_page_free(dump_page);
return 0;
+ }
} else {
dump_skip(cprm, PAGE_SIZE);
}
}
+ dump_page_free(dump_page);
return 1;
}
#endif
@@ -941,15 +1028,14 @@ void validate_coredump_safety(void)
{
if (suid_dumpable == SUID_DUMP_ROOT &&
core_pattern[0] != '/' && core_pattern[0] != '|') {
- pr_warn(
-"Unsafe core_pattern used with fs.suid_dumpable=2.\n"
-"Pipe handler or fully qualified core dump path required.\n"
-"Set kernel.core_pattern before fs.suid_dumpable.\n"
- );
+
+ coredump_report_failure("Unsafe core_pattern used with fs.suid_dumpable=2: "
+ "pipe handler or fully qualified core dump path required. "
+ "Set kernel.core_pattern before fs.suid_dumpable.");
}
}
-static int proc_dostring_coredump(struct ctl_table *table, int write,
+static int proc_dostring_coredump(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int error = proc_dostring(table, write, buffer, lenp, ppos);
@@ -959,6 +1045,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
return error;
}
+static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
+static const unsigned int core_file_note_size_max = CORE_FILE_NOTE_SIZE_MAX;
+
static struct ctl_table coredump_sysctls[] = {
{
.procname = "core_uses_pid",
@@ -981,7 +1070,24 @@ static struct ctl_table coredump_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
+ {
+ .procname = "core_file_note_size_limit",
+ .data = &core_file_note_size_limit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (unsigned int *)&core_file_note_size_min,
+ .extra2 = (unsigned int *)&core_file_note_size_max,
+ },
+ {
+ .procname = "core_sort_vma",
+ .data = &core_sort_vma,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
static int __init init_fs_coredump_sysctls(void)
@@ -1138,6 +1244,18 @@ static void free_vma_snapshot(struct coredump_params *cprm)
}
}
+static int cmp_vma_size(const void *vma_meta_lhs_ptr, const void *vma_meta_rhs_ptr)
+{
+ const struct core_vma_metadata *vma_meta_lhs = vma_meta_lhs_ptr;
+ const struct core_vma_metadata *vma_meta_rhs = vma_meta_rhs_ptr;
+
+ if (vma_meta_lhs->dump_size < vma_meta_rhs->dump_size)
+ return -1;
+ if (vma_meta_lhs->dump_size > vma_meta_rhs->dump_size)
+ return 1;
+ return 0;
+}
+
/*
* Under the mmap_lock, take a snapshot of relevant information about the task's
* VMAs.
@@ -1200,5 +1318,9 @@ static bool dump_vma_snapshot(struct coredump_params *cprm)
cprm->vma_data_size += m->dump_size;
}
+ if (core_sort_vma)
+ sort(cprm->vma_meta, cprm->vma_count, sizeof(*cprm->vma_meta),
+ cmp_vma_size, NULL);
+
return true;
}