From d83212d5dd6761625fe87cc23016bbaa47303271 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Wed, 6 Jun 2018 15:54:10 +0300 Subject: kallsyms, x86: Export addresses of PTI entry trampolines Currently, the addresses of PTI entry trampolines are not exported to user space. Kernel profiling tools need these addresses to identify the kernel code, so add a symbol and address for each CPU's PTI entry trampoline. Signed-off-by: Alexander Shishkin Acked-by: Andi Kleen Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Joerg Roedel Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1528289651-4113-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/mm/cpu_entry_area.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b45f5aaefd74..fab49fd5190f 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -150,6 +151,28 @@ static void __init setup_cpu_entry_area(int cpu) percpu_setup_debug_store(cpu); } +#ifdef CONFIG_X86_64 +int arch_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *name) +{ + unsigned int cpu, ncpu = 0; + + if (symnum >= num_possible_cpus()) + return -EINVAL; + + for_each_possible_cpu(cpu) { + if (ncpu++ >= symnum) + break; + } + + *value = (unsigned long)&get_cpu_entry_area(cpu)->entry_trampoline; + *type = 't'; + strlcpy(name, "__entry_SYSCALL_64_trampoline", KSYM_NAME_LEN); + + return 0; +} +#endif + static __init void setup_cpu_entry_area_ptes(void) { #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 6855dc41b24619c3d1de3dbd27dd0546b0e45272 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Jun 2018 15:54:11 +0300 Subject: x86: Add entry trampolines to kcore Without program headers for PTI entry trampoline pages, the trampoline virtual addresses do not map to anything. Example before: sudo gdb --quiet vmlinux /proc/kcore Reading symbols from vmlinux...done. [New process 1] Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0 root=UUID=a6096b83-b763-4101-807e-f33daff63233'. #0 0x0000000000000000 in irq_stack_union () (gdb) x /21ib 0xfffffe0000006000 0xfffffe0000006000: Cannot access memory at address 0xfffffe0000006000 (gdb) quit After: sudo gdb --quiet vmlinux /proc/kcore [sudo] password for ahunter: Reading symbols from vmlinux...done. [New process 1] Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0-fix-4-00005-gd6e65a8b4072 root=UUID=a6096b83-b7'. #0 0x0000000000000000 in irq_stack_union () (gdb) x /21ib 0xfffffe0000006000 0xfffffe0000006000: swapgs 0xfffffe0000006003: mov %rsp,-0x3e12(%rip) # 0xfffffe00000021f8 0xfffffe000000600a: xchg %ax,%ax 0xfffffe000000600c: mov %cr3,%rsp 0xfffffe000000600f: bts $0x3f,%rsp 0xfffffe0000006014: and $0xffffffffffffe7ff,%rsp 0xfffffe000000601b: mov %rsp,%cr3 0xfffffe000000601e: mov -0x3019(%rip),%rsp # 0xfffffe000000300c 0xfffffe0000006025: pushq $0x2b 0xfffffe0000006027: pushq -0x3e35(%rip) # 0xfffffe00000021f8 0xfffffe000000602d: push %r11 0xfffffe000000602f: pushq $0x33 0xfffffe0000006031: push %rcx 0xfffffe0000006032: push %rdi 0xfffffe0000006033: mov $0xffffffff91a00010,%rdi 0xfffffe000000603a: callq 0xfffffe0000006046 0xfffffe000000603f: pause 0xfffffe0000006041: lfence 0xfffffe0000006044: jmp 0xfffffe000000603f 0xfffffe0000006046: mov %rdi,(%rsp) 0xfffffe000000604a: retq (gdb) quit In addition, entry trampolines all map to the same page. Represent that by giving the corresponding program headers in kcore the same offset. This has the benefit that, when perf tools uses /proc/kcore as a source for kernel object code, samples from different CPU trampolines are aggregated together. Note, such aggregation is normal for profiling i.e. people want to profile the object code, not every different virtual address the object code might be mapped to (across different processes for example). Notes by PeterZ: This also adds the KCORE_REMAP functionality. Signed-off-by: Adrian Hunter Acked-by: Andi Kleen Acked-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Dave Hansen Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Joerg Roedel Cc: Thomas Gleixner Cc: x86@kernel.org Link: http://lkml.kernel.org/r/1528289651-4113-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/mm/cpu_entry_area.c | 10 ++++++++++ fs/proc/kcore.c | 7 +++++-- include/linux/kcore.h | 13 +++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index fab49fd5190f..076ebdce9bd4 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +static DEFINE_PER_CPU(struct kcore_list, kcore_entry_trampoline); #endif struct cpu_entry_area *get_cpu_entry_area(int cpu) @@ -147,6 +149,14 @@ static void __init setup_cpu_entry_area(int cpu) cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); + /* + * The cpu_entry_area alias addresses are not in the kernel binary + * so they do not show up in /proc/kcore normally. This adds entries + * for them manually. + */ + kclist_add_remap(&per_cpu(kcore_entry_trampoline, cpu), + _entry_trampoline, + &get_cpu_entry_area(cpu)->entry_trampoline, PAGE_SIZE); #endif percpu_setup_debug_store(cpu); } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e64ecb9f2720..00282f134336 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -383,8 +383,11 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) phdr->p_type = PT_LOAD; phdr->p_flags = PF_R|PF_W|PF_X; phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff; - phdr->p_vaddr = (size_t)m->addr; - if (m->type == KCORE_RAM || m->type == KCORE_TEXT) + if (m->type == KCORE_REMAP) + phdr->p_vaddr = (size_t)m->vaddr; + else + phdr->p_vaddr = (size_t)m->addr; + if (m->type == KCORE_RAM || m->type == KCORE_TEXT || m->type == KCORE_REMAP) phdr->p_paddr = __pa(m->addr); else phdr->p_paddr = (elf_addr_t)-1; diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 8de55e4b5ee9..bc088ef96358 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -12,11 +12,13 @@ enum kcore_type { KCORE_VMEMMAP, KCORE_USER, KCORE_OTHER, + KCORE_REMAP, }; struct kcore_list { struct list_head list; unsigned long addr; + unsigned long vaddr; size_t size; int type; }; @@ -36,11 +38,22 @@ struct vmcoredd_node { #ifdef CONFIG_PROC_KCORE extern void kclist_add(struct kcore_list *, void *, size_t, int type); +static inline +void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) +{ + m->vaddr = (unsigned long)vaddr; + kclist_add(m, addr, sz, KCORE_REMAP); +} #else static inline void kclist_add(struct kcore_list *new, void *addr, size_t size, int type) { } + +static inline +void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) +{ +} #endif #endif /* _LINUX_KCORE_H */ -- cgit v1.2.3