From c18258c6f0848f97e85287f6271c511a092bb784 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:06 -0700 Subject: [PATCH] pid: Implement transfer_pid and use it to simplify de_thread In de_thread we move pids from one process to another, a rather ugly case. The function transfer_pid makes it clear what we are doing, and makes the action atomic. This is useful we ever want to atomically traverse the process group and session lists, in a rcu safe manner. Even if the atomic properties this change should be a win as transfer_pid should be less code to execute than executing both attach_pid and detach_pid, and this should make de_thread slightly smaller as only a single function call needs to be emitted. The only downside is that the code might be slower to execute as the odds are against transfer_pid being in cache. Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 54135df2a966..b7aa3d6422d6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -696,23 +696,20 @@ static int de_thread(struct task_struct *tsk) */ /* Become a process group leader with the old leader's pid. - * Note: The old leader also uses thispid until release_task + * The old leader becomes a thread of the this thread group. + * Note: The old leader also uses this pid until release_task * is called. Odd but simple and correct. */ detach_pid(current, PIDTYPE_PID); current->pid = leader->pid; attach_pid(current, PIDTYPE_PID, current->pid); - attach_pid(current, PIDTYPE_PGID, current->signal->pgrp); - attach_pid(current, PIDTYPE_SID, current->signal->session); + transfer_pid(leader, current, PIDTYPE_PGID); + transfer_pid(leader, current, PIDTYPE_SID); list_replace_rcu(&leader->tasks, ¤t->tasks); current->group_leader = current; leader->group_leader = current; - /* Reduce leader to a thread */ - detach_pid(leader, PIDTYPE_PGID); - detach_pid(leader, PIDTYPE_SID); - current->exit_signal = SIGCHLD; BUG_ON(leader->exit_state != EXIT_ZOMBIE); -- cgit v1.2.3 From aafe6c2a2b6bce5a3a4913ce5c07e85ea143144d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:13 -0700 Subject: [PATCH] de_thread: Use tsk not current Ingo Oeser pointed out that because current expands to an inline function it is more space efficient and somewhat faster to simply keep a cached copy of current in another variable. This patch implements that for the de_thread function. (akpm: saves nearly 100 bytes of text on x86) Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b7aa3d6422d6..97df6e0aeaee 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -595,7 +595,7 @@ static int de_thread(struct task_struct *tsk) if (!newsighand) return -ENOMEM; - if (thread_group_empty(current)) + if (thread_group_empty(tsk)) goto no_thread_group; /* @@ -620,17 +620,17 @@ static int de_thread(struct task_struct *tsk) * Reparenting needs write_lock on tasklist_lock, * so it is safe to do it under read_lock. */ - if (unlikely(current->group_leader == child_reaper)) - child_reaper = current; + if (unlikely(tsk->group_leader == child_reaper)) + child_reaper = tsk; - zap_other_threads(current); + zap_other_threads(tsk); read_unlock(&tasklist_lock); /* * Account for the thread group leader hanging around: */ count = 1; - if (!thread_group_leader(current)) { + if (!thread_group_leader(tsk)) { count = 2; /* * The SIGALRM timer survives the exec, but needs to point @@ -639,14 +639,14 @@ static int de_thread(struct task_struct *tsk) * synchronize with any firing (by calling del_timer_sync) * before we can safely let the old group leader die. */ - sig->tsk = current; + sig->tsk = tsk; spin_unlock_irq(lock); if (hrtimer_cancel(&sig->real_timer)) hrtimer_restart(&sig->real_timer); spin_lock_irq(lock); } while (atomic_read(&sig->count) > count) { - sig->group_exit_task = current; + sig->group_exit_task = tsk; sig->notify_count = count; __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(lock); @@ -662,13 +662,13 @@ static int de_thread(struct task_struct *tsk) * do is to wait for the thread group leader to become inactive, * and to assume its PID: */ - if (!thread_group_leader(current)) { + if (!thread_group_leader(tsk)) { /* * Wait for the thread group leader to be a zombie. * It should already be zombie at this point, most * of the time. */ - leader = current->group_leader; + leader = tsk->group_leader; while (leader->exit_state != EXIT_ZOMBIE) yield(); @@ -682,12 +682,12 @@ static int de_thread(struct task_struct *tsk) * When we take on its identity by switching to its PID, we * also take its birthdate (always earlier than our own). */ - current->start_time = leader->start_time; + tsk->start_time = leader->start_time; write_lock_irq(&tasklist_lock); - BUG_ON(leader->tgid != current->tgid); - BUG_ON(current->pid == current->tgid); + BUG_ON(leader->tgid != tsk->tgid); + BUG_ON(tsk->pid == tsk->tgid); /* * An exec() starts a new thread group with the * TGID of the previous thread group. Rehash the @@ -700,17 +700,17 @@ static int de_thread(struct task_struct *tsk) * Note: The old leader also uses this pid until release_task * is called. Odd but simple and correct. */ - detach_pid(current, PIDTYPE_PID); - current->pid = leader->pid; - attach_pid(current, PIDTYPE_PID, current->pid); - transfer_pid(leader, current, PIDTYPE_PGID); - transfer_pid(leader, current, PIDTYPE_SID); - list_replace_rcu(&leader->tasks, ¤t->tasks); + detach_pid(tsk, PIDTYPE_PID); + tsk->pid = leader->pid; + attach_pid(tsk, PIDTYPE_PID, tsk->pid); + transfer_pid(leader, tsk, PIDTYPE_PGID); + transfer_pid(leader, tsk, PIDTYPE_SID); + list_replace_rcu(&leader->tasks, &tsk->tasks); - current->group_leader = current; - leader->group_leader = current; + tsk->group_leader = tsk; + leader->group_leader = tsk; - current->exit_signal = SIGCHLD; + tsk->exit_signal = SIGCHLD; BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; @@ -750,7 +750,7 @@ no_thread_group: spin_lock(&oldsighand->siglock); spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING); - rcu_assign_pointer(current->sighand, newsighand); + rcu_assign_pointer(tsk->sighand, newsighand); recalc_sigpending(); spin_unlock(&newsighand->siglock); @@ -761,7 +761,7 @@ no_thread_group: kmem_cache_free(sighand_cachep, oldsighand); } - BUG_ON(!thread_group_leader(current)); + BUG_ON(!thread_group_leader(tsk)); return 0; } -- cgit v1.2.3 From 3b9b8ab65d8eed784b9164d03807cb2bda7b5cd6 Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Fri, 29 Sep 2006 02:00:05 -0700 Subject: [PATCH] Fix unserialized task->files changing Fixed race on put_files_struct on exec with proc. Restoring files on current on error path may lead to proc having a pointer to already kfree-d files_struct. ->files changing at exit.c and khtread.c are safe as exit_files() makes all things under lock. Found during OpenVZ stress testing. [akpm@osdl.org: add export] Signed-off-by: Pavel Emelianov Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 6 ++---- fs/binfmt_misc.c | 6 ++---- fs/exec.c | 3 +-- include/linux/file.h | 1 + kernel/exit.c | 12 ++++++++++++ 5 files changed, 18 insertions(+), 10 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index dfd8cfb7fb5d..bb43da5cde5c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1038,10 +1038,8 @@ out_free_interp: out_free_file: sys_close(elf_exec_fileno); out_free_fh: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); out_free_ph: kfree(elf_phdata); goto out; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 66ba137f8661..1713c48fef54 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -215,10 +215,8 @@ _error: bprm->interp_flags = 0; bprm->interp_data = 0; _unshare: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); goto _ret; } diff --git a/fs/exec.c b/fs/exec.c index 97df6e0aeaee..a8efe35176b0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm) return 0; mmap_failed: - put_files_struct(current->files); - current->files = files; + reset_files_struct(current, files); out: return retval; } diff --git a/include/linux/file.h b/include/linux/file.h index 9f7c2513866f..74183e6f7f45 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -112,5 +112,6 @@ struct task_struct; struct files_struct *get_files_struct(struct task_struct *); void FASTCALL(put_files_struct(struct files_struct *fs)); +void reset_files_struct(struct task_struct *, struct files_struct *); #endif /* __LINUX_FILE_H */ diff --git a/kernel/exit.c b/kernel/exit.c index d891883420f7..4b6fb054b25d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -487,6 +487,18 @@ void fastcall put_files_struct(struct files_struct *files) EXPORT_SYMBOL(put_files_struct); +void reset_files_struct(struct task_struct *tsk, struct files_struct *files) +{ + struct files_struct *old; + + old = tsk->files; + task_lock(tsk); + tsk->files = files; + task_unlock(tsk); + put_files_struct(old); +} +EXPORT_SYMBOL(reset_files_struct); + static inline void __exit_files(struct task_struct *tsk) { struct files_struct * files = tsk->files; -- cgit v1.2.3 From 8f0ab5147951267134612570604cf8341901a80c Mon Sep 17 00:00:00 2001 From: Jay Lan Date: Sat, 30 Sep 2006 23:28:59 -0700 Subject: [PATCH] csa: convert CONFIG tag for extended accounting routines There were a few accounting data/macros that are used in CSA but are #ifdef'ed inside CONFIG_BSD_PROCESS_ACCT. This patch is to change those ifdef's from CONFIG_BSD_PROCESS_ACCT to CONFIG_TASK_XACCT. A few defines are moved from kernel/acct.c and include/linux/acct.h to kernel/tsacct.c and include/linux/tsacct_kern.h. Signed-off-by: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jes Sorensen Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/compat.c | 2 +- fs/exec.c | 2 +- include/linux/acct.h | 4 ---- include/linux/sched.h | 2 +- include/linux/tsacct_kern.h | 6 ++++++ kernel/acct.c | 30 ------------------------------ kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sched.c | 2 +- kernel/tsacct.c | 30 ++++++++++++++++++++++++++++++ 10 files changed, 42 insertions(+), 38 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/compat.c b/fs/compat.c index 6b90bf35f61d..13fb08d096c4 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include /* siocdevprivate_ioctl */ diff --git a/fs/exec.c b/fs/exec.c index a8efe35176b0..0db3fc3c5f0f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/acct.h b/include/linux/acct.h index e86bae7324d2..0496d1f09952 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -124,16 +124,12 @@ extern void acct_auto_close(struct super_block *sb); extern void acct_init_pacct(struct pacct_struct *pacct); extern void acct_collect(long exitcode, int group_dead); extern void acct_process(void); -extern void acct_update_integrals(struct task_struct *tsk); -extern void acct_clear_integrals(struct task_struct *tsk); #else #define acct_auto_close_mnt(x) do { } while (0) #define acct_auto_close(x) do { } while (0) #define acct_init_pacct(x) do { } while (0) #define acct_collect(x,y) do { } while (0) #define acct_process() do { } while (0) -#define acct_update_integrals(x) do { } while (0) -#define acct_clear_integrals(task) do { } while (0) #endif /* diff --git a/include/linux/sched.h b/include/linux/sched.h index fc4a9873ec10..4ddeb0f982fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -981,7 +981,7 @@ struct task_struct { wait_queue_t *io_wait; /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; -#if defined(CONFIG_BSD_PROCESS_ACCT) +#if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ clock_t acct_stimexpd; /* clock_t-converted stime since last update */ diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 74102dcae67a..7e50ac795b0b 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -18,9 +18,15 @@ static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *ts #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); +extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} +static inline void acct_update_integrals(struct task_struct *tsk) +{} +static inline void acct_clear_integrals(struct task_struct *tsk) +{} #endif /* CONFIG_TASK_XACCT */ #endif diff --git a/kernel/acct.c b/kernel/acct.c index f4330acead46..0aad5ca36a81 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -602,33 +602,3 @@ void acct_process(void) do_acct_process(file); fput(file); } - - -/** - * acct_update_integrals - update mm integral fields in task_struct - * @tsk: task_struct for accounting - */ -void acct_update_integrals(struct task_struct *tsk) -{ - if (likely(tsk->mm)) { - long delta = - cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; - - if (delta == 0) - return; - tsk->acct_stimexpd = tsk->stime; - tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; - } -} - -/** - * acct_clear_integrals - clear the mm integral fields in task_struct - * @tsk: task_struct whose accounting fields are cleared - */ -void acct_clear_integrals(struct task_struct *tsk) -{ - tsk->acct_stimexpd = 0; - tsk->acct_rss_mem1 = 0; - tsk->acct_vm_mem1 = 0; -} diff --git a/kernel/exit.c b/kernel/exit.c index c189de2927ab..3b47f26985f2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 1c999f3e0b47..89f666491d1f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched.c b/kernel/sched.c index 74f169ac0773..2bbd948f0169 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 410483490cf6..47c71daa416f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -88,4 +88,34 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) stats->read_syscalls = p->syscr; stats->write_syscalls = p->syscw; } + + +/** + * acct_update_integrals - update mm integral fields in task_struct + * @tsk: task_struct for accounting + */ +void acct_update_integrals(struct task_struct *tsk) +{ + if (likely(tsk->mm)) { + long delta = + cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd; + + if (delta == 0) + return; + tsk->acct_stimexpd = tsk->stime; + tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); + tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + } +} + +/** + * acct_clear_integrals - clear the mm integral fields in task_struct + * @tsk: task_struct whose accounting fields are cleared + */ +void acct_clear_integrals(struct task_struct *tsk) +{ + tsk->acct_stimexpd = 0; + tsk->acct_rss_mem1 = 0; + tsk->acct_vm_mem1 = 0; +} #endif -- cgit v1.2.3 From d025c9db7f31fc0554ce7fb2dfc78d35a77f3487 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 30 Sep 2006 23:29:28 -0700 Subject: [PATCH] Support piping into commands in /proc/sys/kernel/core_pattern Using the infrastructure created in previous patches implement support to pipe core dumps into programs. This is done by overloading the existing core_pattern sysctl with a new syntax: |program When the first character of the pattern is a '|' the kernel will instead threat the rest of the pattern as a command to run. The core dump will be written to the standard input of that program instead of to a file. This is useful for having automatic core dump analysis without filling up disks. The program can do some simple analysis and save only a summary of the core dump. The core dump proces will run with the privileges and in the name space of the process that caused the core dump. I also increased the core pattern size to 128 bytes so that longer command lines fit. Most of the changes comes from allowing core dumps without seeks. They are fairly straight forward though. One small incompatibility is that if someone had a core pattern previously that started with '|' they will get suddenly new behaviour. I think that's unlikely to be a real problem though. Additional background: > Very nice, do you happen to have a program that can accept this kind of > input for crash dumps? I'm guessing that the embedded people will > really want this functionality. I had a cheesy demo/prototype. Basically it wrote the dump to a file again, ran gdb on it to get a backtrace and wrote the summary to a shared directory. Then there was a simple CGI script to generate a "top 10" crashes HTML listing. Unfortunately this still had the disadvantage to needing full disk space for a dump except for deleting it afterwards (in fact it was worse because over the pipe holes didn't work so if you have a holey address map it would require more space). Fortunately gdb seems to be happy to handle /proc/pid/fd/xxx input pipes as cores (at least it worked with zsh's =(cat core) syntax), so it would be likely possible to do it without temporary space with a simple wrapper that calls it in the right way. I ran out of time before doing that though. The demo prototype scripts weren't very good. If there is really interest I can dig them out (they are currently on a laptop disk on the desk with the laptop itself being in service), but I would recommend to rewrite them for any serious application of this and fix the disk space problem. Also to be really useful it should probably find a way to automatically fetch the debuginfos (I cheated and just installed them in advance). If nobody else does it I can probably do the rewrite myself again at some point. My hope at some point was that desktops would support it in their builtin crash reporters, but at least the KDE people I talked too seemed to be happy with their user space only solution. Alan sayeth: I don't believe that piping as such as neccessarily the right model, but the ability to intercept and processes core dumps from user space is asked for by many enterprise users as well. They want to know about, capture, analyse and process core dumps, often centrally and in automated form. [akpm@osdl.org: loff_t != unsigned long] Signed-off-by: Andi Kleen Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 77 +++++++++++++++++++++++++++++++++------------------------ fs/exec.c | 23 +++++++++++++---- kernel/kmod.c | 4 +++ kernel/sysctl.c | 2 +- 4 files changed, 68 insertions(+), 38 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bad52433de69..06435f3665f4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1151,11 +1151,23 @@ static int dump_write(struct file *file, const void *addr, int nr) static int dump_seek(struct file *file, loff_t off) { - if (file->f_op->llseek) { - if (file->f_op->llseek(file, off, 0) != off) + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + if (file->f_op->llseek(file, off, 1) != off) return 0; - } else - file->f_pos = off; + } else { + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) + return 0; + off -= n; + } + free_page((unsigned long)buf); + } return 1; } @@ -1203,30 +1215,35 @@ static int notesize(struct memelfnote *en) return sz; } -#define DUMP_WRITE(addr, nr) \ - do { if (!dump_write(file, (addr), (nr))) return 0; } while(0) -#define DUMP_SEEK(off) \ - do { if (!dump_seek(file, (off))) return 0; } while(0) +#define DUMP_WRITE(addr, nr, foffset) \ + do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) -static int writenote(struct memelfnote *men, struct file *file) +static int alignfile(struct file *file, loff_t *foffset) { - struct elf_note en; + char buf[4] = { 0, }; + DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); + return 1; +} +static int writenote(struct memelfnote *men, struct file *file, + loff_t *foffset) +{ + struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - DUMP_WRITE(&en, sizeof(en)); - DUMP_WRITE(men->name, en.n_namesz); - /* XXX - cast from long long to long to avoid need for libgcc.a */ - DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ - DUMP_WRITE(men->data, men->datasz); - DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ + DUMP_WRITE(&en, sizeof(en), foffset); + DUMP_WRITE(men->name, en.n_namesz, foffset); + if (!alignfile(file, foffset)) + return 0; + DUMP_WRITE(men->data, men->datasz, foffset); + if (!alignfile(file, foffset)) + return 0; return 1; } #undef DUMP_WRITE -#undef DUMP_SEEK #define DUMP_WRITE(addr, nr) \ if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ @@ -1426,7 +1443,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) int i; struct vm_area_struct *vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff; + loff_t offset = 0, dataoff, foffset; unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; int numnote; struct memelfnote *notes = NULL; @@ -1569,7 +1586,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) DUMP_WRITE(&phdr, sizeof(phdr)); } - /* Page-align dumped data */ + foffset = offset; + dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); /* Write program headers for segments dump */ @@ -1594,6 +1612,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) phdr.p_align = ELF_EXEC_PAGESIZE; DUMP_WRITE(&phdr, sizeof(phdr)); + foffset += sizeof(phdr); } #ifdef ELF_CORE_WRITE_EXTRA_PHDRS @@ -1602,7 +1621,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) /* write out the notes section */ for (i = 0; i < numnote; i++) - if (!writenote(notes + i, file)) + if (!writenote(notes + i, file, &foffset)) goto end_coredump; /* write out the thread status notes section */ @@ -1611,11 +1630,12 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], file)) + if (!writenote(&tmp->notes[i], file, &foffset)) goto end_coredump; } - - DUMP_SEEK(dataoff); + + /* Align to page */ + DUMP_SEEK(dataoff - foffset); for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { unsigned long addr; @@ -1631,10 +1651,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) if (get_user_pages(current, current->mm, addr, 1, 0, 1, &page, &vma) <= 0) { - DUMP_SEEK(file->f_pos + PAGE_SIZE); + DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(addr)) { - DUMP_SEEK(file->f_pos + PAGE_SIZE); + DUMP_SEEK(PAGE_SIZE); } else { void *kaddr; flush_cache_page(vma, addr, @@ -1658,13 +1678,6 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) ELF_CORE_WRITE_EXTRA_DATA; #endif - if (file->f_pos != offset) { - /* Sanity check */ - printk(KERN_WARNING - "elf_core_dump: file->f_pos (%Ld) != offset (%Ld)\n", - file->f_pos, offset); - } - end_coredump: set_fs(fs); diff --git a/fs/exec.c b/fs/exec.c index 0db3fc3c5f0f..6270f8f20a63 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -58,7 +58,7 @@ #endif int core_uses_pid; -char core_pattern[65] = "core"; +char core_pattern[128] = "core"; int suid_dumpable = 0; EXPORT_SYMBOL(suid_dumpable); @@ -1463,6 +1463,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) int retval = 0; int fsuid = current->fsuid; int flag = 0; + int ispipe = 0; binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) @@ -1504,22 +1505,34 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) lock_kernel(); format_corename(corename, core_pattern, signr); unlock_kernel(); - file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); + if (corename[0] == '|') { + /* SIGPIPE can happen, but it's just never processed */ + if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) { + printk(KERN_INFO "Core dump to %s pipe failed\n", + corename); + goto fail_unlock; + } + ispipe = 1; + } else + file = filp_open(corename, + O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); if (IS_ERR(file)) goto fail_unlock; inode = file->f_dentry->d_inode; if (inode->i_nlink > 1) goto close_fail; /* multiple links - don't dump */ - if (d_unhashed(file->f_dentry)) + if (!ispipe && d_unhashed(file->f_dentry)) goto close_fail; - if (!S_ISREG(inode->i_mode)) + /* AK: actually i see no reason to not allow this for named pipes etc., + but keep the previous behaviour for now. */ + if (!ispipe && !S_ISREG(inode->i_mode)) goto close_fail; if (!file->f_op) goto close_fail; if (!file->f_op->write) goto close_fail; - if (do_truncate(file->f_dentry, 0, 0, file) != 0) + if (!ispipe && do_truncate(file->f_dentry, 0, 0, file) != 0) goto close_fail; retval = binfmt->core_dump(signr, regs, file); diff --git a/kernel/kmod.c b/kernel/kmod.c index 5c63c53014a9..f8121b95183f 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -35,6 +35,7 @@ #include #include #include +#include #include extern int max_threads; @@ -158,6 +159,9 @@ static int ____call_usermodehelper(void *data) FD_SET(0, fdt->open_fds); FD_CLR(0, fdt->close_on_exec); spin_unlock(&f->file_lock); + + /* and disallow core files too */ + current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0}; } /* We can run anywhere, unlike our parent keventd(). */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c57c4532e296..ba42694f0453 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -294,7 +294,7 @@ static ctl_table kern_table[] = { .ctl_name = KERN_CORE_PATTERN, .procname = "core_pattern", .data = core_pattern, - .maxlen = 64, + .maxlen = 128, .mode = 0644, .proc_handler = &proc_dostring, .strategy = &sysctl_string, -- cgit v1.2.3