From 5b3efd500854d45d305b53c54c97db5970959980 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 11 Feb 2010 11:50:59 -0800 Subject: x86, ptrace: regset extensions to support xstate Add the xstate regset support which helps extend the kernel ptrace and the core-dump interfaces to support AVX state etc. This regset interface is designed to support all the future state that gets supported using xsave/xrstor infrastructure. Looking at the memory layout saved by "xsave", one can't say which state is represented in the memory layout. This is because if a particular state is in init state, in the xsave hdr it can be represented by bit '0'. And hence we can't really say by the xsave header wether a state is in init state or the state is not saved in the memory layout. And hence the xsave memory layout available through this regset interface uses SW usable bytes [464..511] to convey what state is represented in the memory layout. First 8 bytes of the sw_usable_bytes[464..467] will be set to OS enabled xstate mask(which is same as the 64bit mask returned by the xgetbv's xCR0). The note NT_X86_XSTATE represents the extended state information in the core file, using the above mentioned memory layout. Signed-off-by: Suresh Siddha LKML-Reference: <20100211195614.802495327@sbs-t61.sc.intel.com> Signed-off-by: Hongjiu Lu Cc: Roland McGrath Signed-off-by: H. Peter Anvin --- include/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/elf.h') diff --git a/include/linux/elf.h b/include/linux/elf.h index 0cc4d55151b7..a8c4af073ce9 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -361,6 +361,7 @@ typedef struct elf64_shdr { #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ -- cgit v1.2.3 From 2225a122ae26d542bdce523d9d87a4a7ba10e07b Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 11 Feb 2010 11:51:00 -0800 Subject: ptrace: Add support for generic PTRACE_GETREGSET/PTRACE_SETREGSET Generic support for PTRACE_GETREGSET/PTRACE_SETREGSET commands which export the regsets supported by each architecture using the correponding NT_* types. These NT_* types are already part of the userland ABI, used in representing the architecture specific register sets as different NOTES in an ELF core file. 'addr' parameter for the ptrace system call encode the REGSET type (using the corresppnding NT_* type) and the 'data' parameter points to the struct iovec having the user buffer and the length of that buffer. struct iovec iov = { buf, len}; ret = ptrace(PTRACE_GETREGSET/PTRACE_SETREGSET, pid, NT_XXX_TYPE, &iov); On successful completion, iov.len will be updated by the kernel specifying how much the kernel has written/read to/from the user's iov.buf. x86 extended state registers are primarily exported using this interface. Signed-off-by: Suresh Siddha LKML-Reference: <20100211195614.886724710@sbs-t61.sc.intel.com> Acked-by: Hongjiu Lu Cc: Roland McGrath Signed-off-by: H. Peter Anvin --- include/linux/elf.h | 6 +++- include/linux/ptrace.h | 15 +++++++++ kernel/ptrace.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) (limited to 'include/linux/elf.h') diff --git a/include/linux/elf.h b/include/linux/elf.h index a8c4af073ce9..d8e6e61ad9ff 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -349,7 +349,11 @@ typedef struct elf64_shdr { #define ELF_OSABI ELFOSABI_NONE #endif -/* Notes used in ET_CORE */ +/* + * Notes used in ET_CORE. Architectures export some of the arch register sets + * using the corresponding note types via the PTRACE_GETREGSET and + * PTRACE_SETREGSET requests. + */ #define NT_PRSTATUS 1 #define NT_PRFPREG 2 #define NT_PRPSINFO 3 diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 56f2d63a5cbb..dbfa821d5a6e 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -27,6 +27,21 @@ #define PTRACE_GETSIGINFO 0x4202 #define PTRACE_SETSIGINFO 0x4203 +/* + * Generic ptrace interface that exports the architecture specific regsets + * using the corresponding NT_* types (which are also used in the core dump). + * + * This interface usage is as follows: + * struct iovec iov = { buf, len}; + * + * ret = ptrace(PTRACE_GETREGSET/PTRACE_SETREGSET, pid, NT_XXX_TYPE, &iov); + * + * On the successful completion, iov.len will be updated by the kernel, + * specifying how much the kernel has written/read to/from the user's iov.buf. + */ +#define PTRACE_GETREGSET 0x4204 +#define PTRACE_SETREGSET 0x4205 + /* options set using PTRACE_SETOPTIONS */ #define PTRACE_O_TRACESYSGOOD 0x00000001 #define PTRACE_O_TRACEFORK 0x00000002 diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 23bd09cd042e..13b4554d8fbb 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include #include +#include /* @@ -511,6 +512,47 @@ static int ptrace_resume(struct task_struct *child, long request, long data) return 0; } +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + +static const struct user_regset * +find_regset(const struct user_regset_view *view, unsigned int type) +{ + const struct user_regset *regset; + int n; + + for (n = 0; n < view->n; ++n) { + regset = view->regsets + n; + if (regset->core_note_type == type) + return regset; + } + + return NULL; +} + +static int ptrace_regset(struct task_struct *task, int req, unsigned int type, + struct iovec *kiov) +{ + const struct user_regset_view *view = task_user_regset_view(task); + const struct user_regset *regset = find_regset(view, type); + int regset_no; + + if (!regset || (kiov->iov_len % regset->size) != 0) + return -EIO; + + regset_no = regset - view->regsets; + kiov->iov_len = min(kiov->iov_len, + (__kernel_size_t) (regset->n * regset->size)); + + if (req == PTRACE_GETREGSET) + return copy_regset_to_user(task, view, regset_no, 0, + kiov->iov_len, kiov->iov_base); + else + return copy_regset_from_user(task, view, regset_no, 0, + kiov->iov_len, kiov->iov_base); +} + +#endif + int ptrace_request(struct task_struct *child, long request, long addr, long data) { @@ -573,6 +615,26 @@ int ptrace_request(struct task_struct *child, long request, return 0; return ptrace_resume(child, request, SIGKILL); +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + case PTRACE_GETREGSET: + case PTRACE_SETREGSET: + { + struct iovec kiov; + struct iovec __user *uiov = (struct iovec __user *) data; + + if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) + return -EFAULT; + + if (__get_user(kiov.iov_base, &uiov->iov_base) || + __get_user(kiov.iov_len, &uiov->iov_len)) + return -EFAULT; + + ret = ptrace_regset(child, request, addr, &kiov); + if (!ret) + ret = __put_user(kiov.iov_len, &uiov->iov_len); + break; + } +#endif default: break; } @@ -711,6 +773,32 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, else ret = ptrace_setsiginfo(child, &siginfo); break; +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + case PTRACE_GETREGSET: + case PTRACE_SETREGSET: + { + struct iovec kiov; + struct compat_iovec __user *uiov = + (struct compat_iovec __user *) datap; + compat_uptr_t ptr; + compat_size_t len; + + if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) + return -EFAULT; + + if (__get_user(ptr, &uiov->iov_base) || + __get_user(len, &uiov->iov_len)) + return -EFAULT; + + kiov.iov_base = compat_ptr(ptr); + kiov.iov_len = len; + + ret = ptrace_regset(child, request, addr, &kiov); + if (!ret) + ret = __put_user(kiov.iov_len, &uiov->iov_len); + break; + } +#endif default: ret = ptrace_request(child, request, addr, data); -- cgit v1.2.3 From 73bfa5f2f71efcdcaad8d18cbed96b9d7ed86948 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Fri, 26 Feb 2010 22:37:52 +0100 Subject: [S390] Define new s390 ELF note sections in elf.h S390 ELF core dump currently only contains the PSW, the general purpose registers, the floating point registers and the access registers stored in PRSTATUS/PRFPREG note sections. For analyzing s390 kernel problems additional registers are important. In order to be able to include these registers to a kernel ELF core dump, this patch adds the following five new note sections to elf.h: * NT_S390_TIMER: S390 timer register * NT_S390_TODCMP: S390 TOD comparator register * NT_S390_TODPREG: S390 TOD programmable register * NT_S390_CTRS: S390 control registers * NT_S390_PREFIX: S390 prefix register The new note sections have been already defined and accepted in the upstream binutils package. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- include/linux/elf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/elf.h') diff --git a/include/linux/elf.h b/include/linux/elf.h index 0cc4d55151b7..39ad4b230a4a 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -362,6 +362,11 @@ typedef struct elf64_shdr { #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ +#define NT_S390_TIMER 0x301 /* s390 timer register */ +#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ +#define NT_S390_CTRS 0x304 /* s390 control registers */ +#define NT_S390_PREFIX 0x305 /* s390 prefix register */ /* Note header in a PT_NOTE section */ -- cgit v1.2.3 From 1fcccbac89f5bbc5e41aa72086960059fce372da Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:07 -0800 Subject: elf coredump: replace ELF_CORE_EXTRA_* macros by functions elf_core_dump() and elf_fdpic_core_dump() use #ifdef and the corresponding macro for hiding _multiline_ logics in functions. This patch removes #ifdef and replaces ELF_CORE_EXTRA_* by corresponding functions. For architectures not implemeonting ELF_CORE_EXTRA_*, we use weak functions in order to reduce a range of modification. This cleanup is for my next patches, but I think this cleanup itself is worth doing regardless of my firnal purpose. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/elf.h | 48 -------------------------------- arch/ia64/kernel/Makefile | 2 ++ arch/ia64/kernel/elfcore.c | 64 +++++++++++++++++++++++++++++++++++++++++++ arch/um/sys-i386/Makefile | 2 ++ arch/um/sys-i386/asm/elf.h | 43 ----------------------------- arch/um/sys-i386/elfcore.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ fs/binfmt_elf.c | 14 ++++------ fs/binfmt_elf_fdpic.c | 14 ++++------ fs/compat_binfmt_elf.c | 2 ++ include/linux/elf.h | 2 ++ include/linux/elfcore.h | 16 +++++++++++ kernel/Makefile | 3 ++ kernel/elfcore.c | 23 ++++++++++++++++ 13 files changed, 191 insertions(+), 109 deletions(-) create mode 100644 arch/ia64/kernel/elfcore.c create mode 100644 arch/um/sys-i386/elfcore.c create mode 100644 kernel/elfcore.c (limited to 'include/linux/elf.h') diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 4c41656ede87..b5298eb09adb 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -219,54 +219,6 @@ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \ } while (0) - -/* - * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out - * extra segments containing the gate DSO contents. Dumping its - * contents makes post-mortem fully interpretable later without matching up - * the same kernel and hardware config to see what PC values meant. - * Dumping its extra ELF program headers includes all the other information - * a debugger needs to easily find how the gate DSO was being used. - */ -#define ELF_CORE_EXTRA_PHDRS (GATE_EHDR->e_phnum) -#define ELF_CORE_WRITE_EXTRA_PHDRS \ -do { \ - const struct elf_phdr *const gate_phdrs = \ - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); \ - int i; \ - Elf64_Off ofs = 0; \ - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { \ - struct elf_phdr phdr = gate_phdrs[i]; \ - if (phdr.p_type == PT_LOAD) { \ - phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \ - phdr.p_filesz = phdr.p_memsz; \ - if (ofs == 0) { \ - ofs = phdr.p_offset = offset; \ - offset += phdr.p_filesz; \ - } \ - else \ - phdr.p_offset = ofs; \ - } \ - else \ - phdr.p_offset += ofs; \ - phdr.p_paddr = 0; /* match other core phdrs */ \ - DUMP_WRITE(&phdr, sizeof(phdr)); \ - } \ -} while (0) -#define ELF_CORE_WRITE_EXTRA_DATA \ -do { \ - const struct elf_phdr *const gate_phdrs = \ - (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); \ - int i; \ - for (i = 0; i < GATE_EHDR->e_phnum; ++i) { \ - if (gate_phdrs[i].p_type == PT_LOAD) { \ - DUMP_WRITE((void *) gate_phdrs[i].p_vaddr, \ - PAGE_ALIGN(gate_phdrs[i].p_memsz)); \ - break; \ - } \ - } \ -} while (0) - /* * format for entries in the Global Offset Table */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 4138282aefa8..db10b1e378b0 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -45,6 +45,8 @@ endif obj-$(CONFIG_DMAR) += pci-dma.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o + # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c new file mode 100644 index 000000000000..57a2298a8581 --- /dev/null +++ b/arch/ia64/kernel/elfcore.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include + +#include + + +Elf64_Half elf_core_extra_phdrs(void) +{ + return GATE_EHDR->e_phnum; +} + +int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + Elf64_Off ofs = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + struct elf_phdr phdr = gate_phdrs[i]; + + if (phdr.p_type == PT_LOAD) { + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); + phdr.p_filesz = phdr.p_memsz; + if (ofs == 0) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset = ofs; + } + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + *size += sizeof(phdr); + if (*size > limit || !dump_write(file, &phdr, sizeof(phdr))) + return 0; + } + return 1; +} + +int elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + void *addr = (void *)gate_phdrs[i].p_vaddr; + size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); + + *size += memsz; + if (*size > limit || !dump_write(file, addr, memsz)) + return 0; + break; + } + } + return 1; +} diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 1b549bca4645..804b28dd0328 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -6,6 +6,8 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ sys_call_table.o tls.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o + subarch-obj-y = lib/semaphore_32.o lib/string_32.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h index 770885472ed4..e64cd41d7bab 100644 --- a/arch/um/sys-i386/asm/elf.h +++ b/arch/um/sys-i386/asm/elf.h @@ -116,47 +116,4 @@ do { \ } \ } while (0) -/* - * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out - * extra segments containing the vsyscall DSO contents. Dumping its - * contents makes post-mortem fully interpretable later without matching up - * the same kernel and hardware config to see what PC values meant. - * Dumping its extra ELF program headers includes all the other information - * a debugger needs to easily find how the vsyscall DSO was being used. - */ -#define ELF_CORE_EXTRA_PHDRS \ - (vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0 ) - -#define ELF_CORE_WRITE_EXTRA_PHDRS \ -if ( vsyscall_ehdr ) { \ - const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \ - const struct elf_phdr *const phdrp = \ - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \ - int i; \ - Elf32_Off ofs = 0; \ - for (i = 0; i < ehdrp->e_phnum; ++i) { \ - struct elf_phdr phdr = phdrp[i]; \ - if (phdr.p_type == PT_LOAD) { \ - ofs = phdr.p_offset = offset; \ - offset += phdr.p_filesz; \ - } \ - else \ - phdr.p_offset += ofs; \ - phdr.p_paddr = 0; /* match other core phdrs */ \ - DUMP_WRITE(&phdr, sizeof(phdr)); \ - } \ -} -#define ELF_CORE_WRITE_EXTRA_DATA \ -if ( vsyscall_ehdr ) { \ - const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \ - const struct elf_phdr *const phdrp = \ - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \ - int i; \ - for (i = 0; i < ehdrp->e_phnum; ++i) { \ - if (phdrp[i].p_type == PT_LOAD) \ - DUMP_WRITE((void *) phdrp[i].p_vaddr, \ - phdrp[i].p_filesz); \ - } \ -} - #endif diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c new file mode 100644 index 000000000000..30cac52a04b4 --- /dev/null +++ b/arch/um/sys-i386/elfcore.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +#include + + +Elf32_Half elf_core_extra_phdrs(void) +{ + return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; +} + +int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *) vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + Elf32_Off ofs = 0; + + for (i = 0; i < ehdrp->e_phnum; ++i) { + struct elf_phdr phdr = phdrp[i]; + + if (phdr.p_type == PT_LOAD) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + *size += sizeof(phdr); + if (*size > limit + || !dump_write(file, &phdr, sizeof(phdr))) + return 0; + } + } + return 1; +} + +int elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *) vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + + for (i = 0; i < ehdrp->e_phnum; ++i) { + if (phdrp[i].p_type == PT_LOAD) { + void *addr = (void *) phdrp[i].p_vaddr; + size_t filesz = phdrp[i].p_filesz; + + *size += filesz; + if (*size > limit + || !dump_write(file, addr, filesz)) + return 0; + } + } + } + return 1; +} diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0bcfbb05c32d..c1a499599b7d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1878,9 +1878,7 @@ static int elf_core_dump(struct coredump_params *cprm) * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ segs = current->mm->map_count; -#ifdef ELF_CORE_EXTRA_PHDRS - segs += ELF_CORE_EXTRA_PHDRS; -#endif + segs += elf_core_extra_phdrs(); gate_vma = get_gate_vma(current); if (gate_vma != NULL) @@ -1958,9 +1956,8 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; } -#ifdef ELF_CORE_WRITE_EXTRA_PHDRS - ELF_CORE_WRITE_EXTRA_PHDRS; -#endif + if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + goto end_coredump; /* write out the notes section */ if (!write_note_info(&info, cprm->file, &foffset)) @@ -1999,9 +1996,8 @@ static int elf_core_dump(struct coredump_params *cprm) } } -#ifdef ELF_CORE_WRITE_EXTRA_DATA - ELF_CORE_WRITE_EXTRA_DATA; -#endif + if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + goto end_coredump; end_coredump: set_fs(fs); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 63edf40b569b..952699a86ec3 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1664,9 +1664,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_core_copy_regs(&prstatus->pr_reg, cprm->regs); segs = current->mm->map_count; -#ifdef ELF_CORE_EXTRA_PHDRS - segs += ELF_CORE_EXTRA_PHDRS; -#endif + segs += elf_core_extra_phdrs(); /* Set up header */ fill_elf_fdpic_header(elf, segs + 1); /* including notes section */ @@ -1773,9 +1771,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } -#ifdef ELF_CORE_WRITE_EXTRA_PHDRS - ELF_CORE_WRITE_EXTRA_PHDRS; -#endif + if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) + goto end_coredump; /* write out the notes section */ for (i = 0; i < numnote; i++) @@ -1799,9 +1796,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) mm_flags) < 0) goto end_coredump; -#ifdef ELF_CORE_WRITE_EXTRA_DATA - ELF_CORE_WRITE_EXTRA_DATA; -#endif + if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) + goto end_coredump; if (cprm->file->f_pos != offset) { /* Sanity check */ diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 0adced2f296f..112e45a17e99 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -28,10 +28,12 @@ #undef elfhdr #undef elf_phdr +#undef elf_shdr #undef elf_note #undef elf_addr_t #define elfhdr elf32_hdr #define elf_phdr elf32_phdr +#define elf_shdr elf32_shdr #define elf_note elf32_note #define elf_addr_t Elf32_Addr diff --git a/include/linux/elf.h b/include/linux/elf.h index ad990c5f63f6..ccde3fd45f36 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -396,6 +396,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elf_phdr elf32_phdr #define elf_note elf32_note #define elf_addr_t Elf32_Off +#define Elf_Half Elf32_Half #else @@ -404,6 +405,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elf_phdr elf64_phdr #define elf_note elf64_note #define elf_addr_t Elf64_Off +#define Elf_Half Elf64_Half #endif diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 00d6a68d0421..cfda74f521b5 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -8,6 +8,8 @@ #include #endif #include +#include +#include struct elf_siginfo { @@ -150,5 +152,19 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse #endif /* __KERNEL__ */ +/* + * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the gate DSO contents. Dumping its + * contents makes post-mortem fully interpretable later without matching up + * the same kernel and hardware config to see what PC values meant. + * Dumping its extra ELF program headers includes all the other information + * a debugger needs to easily find how the gate DSO was being used. + */ +extern Elf_Half elf_core_extra_phdrs(void); +extern int +elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit); +extern int +elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); #endif /* _LINUX_ELFCORE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 7b974699f8c2..a987aa1676b5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,6 +91,9 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ diff --git a/kernel/elfcore.c b/kernel/elfcore.c new file mode 100644 index 000000000000..5445741f4b4c --- /dev/null +++ b/kernel/elfcore.c @@ -0,0 +1,23 @@ +#include +#include +#include + +#include + + +Elf_Half __weak elf_core_extra_phdrs(void) +{ + return 0; +} + +int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, + unsigned long limit) +{ + return 1; +} + +int __weak elf_core_write_extra_data(struct file *file, size_t *size, + unsigned long limit) +{ + return 1; +} -- cgit v1.2.3 From 8d9032bbe4671dc481261ccd4e161cd96e54b118 Mon Sep 17 00:00:00 2001 From: Daisuke HATAYAMA Date: Fri, 5 Mar 2010 13:44:10 -0800 Subject: elf coredump: add extended numbering support The current ELF dumper implementation can produce broken corefiles if program headers exceed 65535. This number is determined by the number of vmas which the process have. In particular, some extreme programs may use more than 65535 vmas. (If you google max_map_count, you can find some users facing this problem.) This kind of program never be able to generate correct coredumps. This patch implements ``extended numbering'' that uses sh_info field of the first section header instead of e_phnum field in order to represent upto 4294967295 vmas. This is supported by AMD64-ABI(http://www.x86-64.org/documentation.html) and Solaris(http://docs.sun.com/app/docs/doc/817-1984/). Of course, we are preparing patches for gdb and binutils. Signed-off-by: Daisuke HATAYAMA Cc: "Luck, Tony" Cc: Jeff Dike Cc: David Howells Cc: Greg Ungerer Cc: Roland McGrath Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Alexander Viro Cc: Andi Kleen Cc: Alan Cox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/elfcore.c | 16 +++++++++++ arch/um/sys-i386/elfcore.c | 16 +++++++++++ fs/binfmt_elf.c | 66 +++++++++++++++++++++++++++++++++++++++++++--- fs/binfmt_elf_fdpic.c | 63 +++++++++++++++++++++++++++++++++++++++++-- include/linux/elf.h | 26 +++++++++++++++++- include/linux/elfcore.h | 1 + kernel/elfcore.c | 5 ++++ 7 files changed, 187 insertions(+), 6 deletions(-) (limited to 'include/linux/elf.h') diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c index 57a2298a8581..bac1639bc320 100644 --- a/arch/ia64/kernel/elfcore.c +++ b/arch/ia64/kernel/elfcore.c @@ -62,3 +62,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size, } return 1; } + +size_t elf_core_extra_data_size(void) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + size_t size = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + size += PAGE_ALIGN(gate_phdrs[i].p_memsz); + break; + } + } + return size; +} diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c index 30cac52a04b4..6bb49b687c97 100644 --- a/arch/um/sys-i386/elfcore.c +++ b/arch/um/sys-i386/elfcore.c @@ -65,3 +65,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size, } return 1; } + +size_t elf_core_extra_data_size(void) +{ + if ( vsyscall_ehdr ) { + const struct elfhdr *const ehdrp = + (struct elfhdr *)vsyscall_ehdr; + const struct elf_phdr *const phdrp = + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); + int i; + + for (i = 0; i < ehdrp->e_phnum; ++i) + if (phdrp[i].p_type == PT_LOAD) + return (size_t) phdrp[i].p_filesz; + } + return 0; +} diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6fc49b6ed936..78de530cfb02 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1838,6 +1838,34 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, return gate_vma; } +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, + elf_addr_t e_shoff, int segs) +{ + elf->e_shoff = e_shoff; + elf->e_shentsize = sizeof(*shdr4extnum); + elf->e_shnum = 1; + elf->e_shstrndx = SHN_UNDEF; + + memset(shdr4extnum, 0, sizeof(*shdr4extnum)); + + shdr4extnum->sh_type = SHT_NULL; + shdr4extnum->sh_size = elf->e_shnum; + shdr4extnum->sh_link = elf->e_shstrndx; + shdr4extnum->sh_info = segs; +} + +static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma, + unsigned long mm_flags) +{ + struct vm_area_struct *vma; + size_t size = 0; + + for (vma = first_vma(current, gate_vma); vma != NULL; + vma = next_vma(vma, gate_vma)) + size += vma_dump_size(vma, mm_flags); + return size; +} + /* * Actual dumper * @@ -1857,6 +1885,9 @@ static int elf_core_dump(struct coredump_params *cprm) unsigned long mm_flags; struct elf_note_info info; struct elf_phdr *phdr4note = NULL; + struct elf_shdr *shdr4extnum = NULL; + Elf_Half e_phnum; + elf_addr_t e_shoff; /* * We no longer stop all VM operations. @@ -1885,12 +1916,19 @@ static int elf_core_dump(struct coredump_params *cprm) if (gate_vma != NULL) segs++; + /* for notes section */ + segs++; + + /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid + * this, kernel supports extended numbering. Have a look at + * include/linux/elf.h for further information. */ + e_phnum = segs > PN_XNUM ? PN_XNUM : segs; + /* * Collect all the non-memory information about the process for the * notes. This also sets up the file header. */ - if (!fill_note_info(elf, segs + 1, /* including notes section */ - &info, cprm->signr, cprm->regs)) + if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs)) goto cleanup; has_dumped = 1; @@ -1900,7 +1938,7 @@ static int elf_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); offset += sizeof(*elf); /* Elf header */ - offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */ + offset += segs * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; /* Write notes phdr entry */ @@ -1926,6 +1964,19 @@ static int elf_core_dump(struct coredump_params *cprm) */ mm_flags = current->mm->flags; + offset += elf_core_vma_data_size(gate_vma, mm_flags); + offset += elf_core_extra_data_size(); + e_shoff = offset; + + if (e_phnum == PN_XNUM) { + shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); + if (!shdr4extnum) + goto end_coredump; + fill_extnum_info(elf, shdr4extnum, e_shoff, segs); + } + + offset = dataoff; + size += sizeof(*elf); if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) goto end_coredump; @@ -2003,11 +2054,20 @@ static int elf_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) goto end_coredump; + if (e_phnum == PN_XNUM) { + size += sizeof(*shdr4extnum); + if (size > cprm->limit + || !dump_write(cprm->file, shdr4extnum, + sizeof(*shdr4extnum))) + goto end_coredump; + } + end_coredump: set_fs(fs); cleanup: free_note_info(&info); + kfree(shdr4extnum); kfree(phdr4note); kfree(elf); out: diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 112da491d75d..e49d9c06a4b6 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1505,6 +1505,22 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) return sz; } +static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, + elf_addr_t e_shoff, int segs) +{ + elf->e_shoff = e_shoff; + elf->e_shentsize = sizeof(*shdr4extnum); + elf->e_shnum = 1; + elf->e_shstrndx = SHN_UNDEF; + + memset(shdr4extnum, 0, sizeof(*shdr4extnum)); + + shdr4extnum->sh_type = SHT_NULL; + shdr4extnum->sh_size = elf->e_shnum; + shdr4extnum->sh_link = elf->e_shstrndx; + shdr4extnum->sh_info = segs; +} + /* * dump the segments for an MMU process */ @@ -1569,6 +1585,17 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size, } #endif +static size_t elf_core_vma_data_size(unsigned long mm_flags) +{ + struct vm_area_struct *vma; + size_t size = 0; + + for (vma = current->mm->mmap; vma; vma->vm_next) + if (maydump(vma, mm_flags)) + size += vma->vm_end - vma->vm_start; + return size; +} + /* * Actual dumper * @@ -1601,6 +1628,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) elf_addr_t *auxv; unsigned long mm_flags; struct elf_phdr *phdr4note = NULL; + struct elf_shdr *shdr4extnum = NULL; + Elf_Half e_phnum; + elf_addr_t e_shoff; /* * We no longer stop all VM operations. @@ -1667,8 +1697,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); + /* for notes section */ + segs++; + + /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid + * this, kernel supports extended numbering. Have a look at + * include/linux/elf.h for further information. */ + e_phnum = segs > PN_XNUM ? PN_XNUM : segs; + /* Set up header */ - fill_elf_fdpic_header(elf, segs + 1); /* including notes section */ + fill_elf_fdpic_header(elf, e_phnum); has_dumped = 1; current->flags |= PF_DUMPCORE; @@ -1708,7 +1746,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); offset += sizeof(*elf); /* Elf header */ - offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ + offset += segs * sizeof(struct elf_phdr); /* Program headers */ foffset = offset; /* Write notes phdr entry */ @@ -1738,6 +1776,19 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) */ mm_flags = current->mm->flags; + offset += elf_core_vma_data_size(mm_flags); + offset += elf_core_extra_data_size(); + e_shoff = offset; + + if (e_phnum == PN_XNUM) { + shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); + if (!shdr4extnum) + goto end_coredump; + fill_extnum_info(elf, shdr4extnum, e_shoff, segs); + } + + offset = dataoff; + size += sizeof(*elf); if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) goto end_coredump; @@ -1802,6 +1853,14 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) goto end_coredump; + if (e_phnum == PN_XNUM) { + size += sizeof(*shdr4extnum); + if (size > cprm->limit + || !dump_write(cprm->file, shdr4extnum, + sizeof(*shdr4extnum))) + goto end_coredump; + } + if (cprm->file->f_pos != offset) { /* Sanity check */ printk(KERN_WARNING diff --git a/include/linux/elf.h b/include/linux/elf.h index ccde3fd45f36..597858418051 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -50,6 +50,28 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) +/* + * Extended Numbering + * + * If the real number of program header table entries is larger than + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the + * section header at index 0, and PN_XNUM is set to e_phnum + * field. Otherwise, the section header at index 0 is zero + * initialized, if it exists. + * + * Specifications are available in: + * + * - Sun microsystems: Linker and Libraries. + * Part No: 817-1984-17, September 2008. + * URL: http://docs.sun.com/app/docs/doc/817-1984 + * + * - System V ABI AMD64 Architecture Processor Supplement + * Draft Version 0.99., + * May 11, 2009. + * URL: http://www.x86-64.org/ + */ +#define PN_XNUM 0xffff + /* These constants define the different elf file types */ #define ET_NONE 0 #define ET_REL 1 @@ -286,7 +308,7 @@ typedef struct elf64_phdr { #define SHN_COMMON 0xfff2 #define SHN_HIRESERVE 0xffff -typedef struct { +typedef struct elf32_shdr { Elf32_Word sh_name; Elf32_Word sh_type; Elf32_Word sh_flags; @@ -394,6 +416,7 @@ typedef struct elf64_note { extern Elf32_Dyn _DYNAMIC []; #define elfhdr elf32_hdr #define elf_phdr elf32_phdr +#define elf_shdr elf32_shdr #define elf_note elf32_note #define elf_addr_t Elf32_Off #define Elf_Half Elf32_Half @@ -403,6 +426,7 @@ extern Elf32_Dyn _DYNAMIC []; extern Elf64_Dyn _DYNAMIC []; #define elfhdr elf64_hdr #define elf_phdr elf64_phdr +#define elf_shdr elf64_shdr #define elf_note elf64_note #define elf_addr_t Elf64_Off #define Elf_Half Elf64_Half diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index cfda74f521b5..e687bc3ba4da 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -166,5 +166,6 @@ elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size, unsigned long limit); extern int elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit); +extern size_t elf_core_extra_data_size(void); #endif /* _LINUX_ELFCORE_H */ diff --git a/kernel/elfcore.c b/kernel/elfcore.c index 5445741f4b4c..ff915efef66d 100644 --- a/kernel/elfcore.c +++ b/kernel/elfcore.c @@ -21,3 +21,8 @@ int __weak elf_core_write_extra_data(struct file *file, size_t *size, { return 1; } + +size_t __weak elf_core_extra_data_size(void) +{ + return 0; +} -- cgit v1.2.3