From 5cec93c216db77c45f7ce970d46283bcb1933884 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 5 Jun 2011 13:50:24 -0400 Subject: x86-64: Emulate legacy vsyscalls There's a fair amount of code in the vsyscall page. It contains a syscall instruction (in the gettimeofday fallback) and who knows what will happen if an exploit jumps into the middle of some other code. Reduce the risk by replacing the vsyscalls with short magic incantations that cause the kernel to emulate the real vsyscalls. These incantations are useless if entered in the middle. This causes vsyscalls to be a little more expensive than real syscalls. Fortunately sensible programs don't use them. The only exception is time() which is still called by glibc through the vsyscall - but calling time() millions of times per second is not sensible. glibc has this fixed in the development tree. This patch is not perfect: the vread_tsc and vread_hpet functions are still at a fixed address. Fixing that might involve making alternative patching work in the vDSO. Signed-off-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Jesper Juhl Cc: Borislav Petkov Cc: Arjan van de Ven Cc: Jan Beulich Cc: richard -rw- weinberger Cc: Mikael Pettersson Cc: Andi Kleen Cc: Brian Gerst Cc: Louis Rilling Cc: Valdis.Kletnieks@vt.edu Cc: pageexec@freemail.hu Link: http://lkml.kernel.org/r/e64e1b3c64858820d12c48fa739efbd1485e79d5.1307292171.git.luto@mit.edu [ Removed the CONFIG option - it's simpler to just do it unconditionally. Tidied up the code as well. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6e976ee3b3ef..a563c509edcb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -17,7 +17,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts + * Vector 204 : legacy x86_64 vsyscall emulation + * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. @@ -50,6 +51,9 @@ #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 #endif +#ifdef CONFIG_X86_64 +# define VSYSCALL_EMU_VECTOR 0xcc +#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. -- cgit v1.2.3 From b77e70bf3535e0bd5472e0681f41cce4ae0598bb Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 8 Jun 2011 10:56:02 +0900 Subject: x86, mce: Replace MCE_SELF_VECTOR by irq_work The MCE handler uses a special vector for self IPI to invoke post-emergency processing in an interrupt context, e.g. call an NMI-unsafe function, wakeup loggers, schedule time-consuming work for recovery, etc. This mechanism is now generalized by the following commit: > e360adbe29241a0194e10e20595360dd7b98a2b3 > Author: Peter Zijlstra > Date: Thu Oct 14 14:01:34 2010 +0800 > > irq_work: Add generic hardirq context callbacks > > Provide a mechanism that allows running code in IRQ context. It is > most useful for NMI code that needs to interact with the rest of the > system -- like wakeup a task to drain buffers. : So change to use provided generic mechanism. Signed-off-by: Hidetoshi Seto Acked-by: Tony Luck Link: http://lkml.kernel.org/r/4DEED6B2.6080005@jp.fujitsu.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/entry_arch.h | 4 ---- arch/x86/include/asm/hw_irq.h | 1 - arch/x86/include/asm/irq_vectors.h | 5 ---- arch/x86/kernel/cpu/mcheck/mce.c | 47 +++++--------------------------------- arch/x86/kernel/entry_64.S | 5 ---- arch/x86/kernel/irqinit.c | 3 --- 6 files changed, 6 insertions(+), 59 deletions(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 1cd6d26a0a8d..0baa628e330c 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -53,8 +53,4 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) #endif -#ifdef CONFIG_X86_MCE -BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) -#endif - #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index bb9efe8706e2..13f5504c76c0 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -34,7 +34,6 @@ extern void irq_work_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); extern void reschedule_interrupt(void); -extern void mce_self_interrupt(void); extern void invalidate_interrupt(void); extern void invalidate_interrupt0(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6e976ee3b3ef..6665026ea3ea 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -109,11 +109,6 @@ #define UV_BAU_MESSAGE 0xf5 -/* - * Self IPI vector for machine checks - */ -#define MCE_SELF_VECTOR 0xf4 - /* Xen vector callback to receive events in a HVM domain */ #define XEN_HVM_EVTCHN_CALLBACK 0xf3 diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ff1ae9b6464d..e81d48b05618 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -38,12 +37,9 @@ #include #include #include +#include #include -#include -#include -#include -#include #include #include @@ -461,22 +457,13 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) m->ip = mce_rdmsrl(rip_msr); } -#ifdef CONFIG_X86_LOCAL_APIC -/* - * Called after interrupts have been reenabled again - * when a MCE happened during an interrupts off region - * in the kernel. - */ -asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) +DEFINE_PER_CPU(struct irq_work, mce_irq_work); + +static void mce_irq_work_cb(struct irq_work *entry) { - ack_APIC_irq(); - exit_idle(); - irq_enter(); mce_notify_irq(); mce_schedule_work(); - irq_exit(); } -#endif static void mce_report_event(struct pt_regs *regs) { @@ -492,29 +479,7 @@ static void mce_report_event(struct pt_regs *regs) return; } -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Without APIC do not notify. The event will be picked - * up eventually. - */ - if (!cpu_has_apic) - return; - - /* - * When interrupts are disabled we cannot use - * kernel services safely. Trigger an self interrupt - * through the APIC to instead do the notification - * after interrupts are reenabled again. - */ - apic->send_IPI_self(MCE_SELF_VECTOR); - - /* - * Wait for idle afterwards again so that we don't leave the - * APIC in a non idle state because the normal APIC writes - * cannot exclude us. - */ - apic_wait_icr_idle(); -#endif + irq_work_queue(&__get_cpu_var(mce_irq_work)); } DEFINE_PER_CPU(unsigned, mce_poll_count); @@ -1444,7 +1409,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - + init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); } /* diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8a445a0c989e..9fa65460d33a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -991,11 +991,6 @@ apicinterrupt THRESHOLD_APIC_VECTOR \ apicinterrupt THERMAL_APIC_VECTOR \ thermal_interrupt smp_thermal_interrupt -#ifdef CONFIG_X86_MCE -apicinterrupt MCE_SELF_VECTOR \ - mce_self_interrupt smp_mce_self_interrupt -#endif - #ifdef CONFIG_SMP apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ call_function_single_interrupt smp_call_function_single_interrupt diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f470e4ef993e..f09d4bbe2d2d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -272,9 +272,6 @@ static void __init apic_intr_init(void) #ifdef CONFIG_X86_MCE_THRESHOLD alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC) - alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); -#endif #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ -- cgit v1.2.3 From 3ae36655b97a03fa1decf72f04078ef945647c1a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 10 Aug 2011 11:15:32 -0400 Subject: x86-64: Rework vsyscall emulation and add vsyscall= parameter There are three choices: vsyscall=native: Vsyscalls are native code that issues the corresponding syscalls. vsyscall=emulate (default): Vsyscalls are emulated by instruction fault traps, tested in the bad_area path. The actual contents of the vsyscall page is the same as the vsyscall=native case except that it's marked NX. This way programs that make assumptions about what the code in the page does will not be confused when they read that code. vsyscall=none: Trying to execute a vsyscall will segfault. Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/8449fb3abf89851fd6b2260972666a6f82542284.1312988155.git.luto@mit.edu Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 21 ++++++++++ arch/x86/include/asm/irq_vectors.h | 4 -- arch/x86/include/asm/traps.h | 2 - arch/x86/include/asm/vsyscall.h | 6 +++ arch/x86/kernel/entry_64.S | 1 - arch/x86/kernel/traps.c | 6 --- arch/x86/kernel/vmlinux.lds.S | 33 ---------------- arch/x86/kernel/vsyscall_64.c | 79 +++++++++++++++++++++++-------------- arch/x86/kernel/vsyscall_emu_64.S | 36 +++++++++++------ arch/x86/mm/fault.c | 12 ++++++ 10 files changed, 111 insertions(+), 89 deletions(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index aa47be71df4c..9cfd6bb9198e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2657,6 +2657,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. vmpoff= [KNL,S390] Perform z/VM CP command after power off. Format: + vsyscall= [X86-64] + Controls the behavior of vsyscalls (i.e. calls to + fixed addresses of 0xffffffffff600x00 from legacy + code). Most statically-linked binaries and older + versions of glibc use these calls. Because these + functions are at fixed addresses, they make nice + targets for exploits that can control RIP. + + emulate [default] Vsyscalls turn into traps and are + emulated reasonably safely. + + native Vsyscalls are native syscall instructions. + This is a little bit faster than trapping + and makes a few dynamic recompilers work + better than they would in emulation mode. + It also makes exploits much easier to write. + + none Vsyscalls don't work at all. This makes + them quite hard to use for exploits but + might break your system. + vt.cur_default= [VT] Default cursor shape. Format: 0xCCBBAA, where AA, BB, and CC are the same as the parameters of the [?A;B;Cc escape sequence; diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index a563c509edcb..2c224e183b52 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -17,7 +17,6 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vector 204 : legacy x86_64 vsyscall emulation * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts * @@ -51,9 +50,6 @@ #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 #endif -#ifdef CONFIG_X86_64 -# define VSYSCALL_EMU_VECTOR 0xcc -#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 2bae0a513b40..0012d0902c5f 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -40,7 +40,6 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ asmlinkage void simd_coprocessor_error(void); -asmlinkage void emulate_vsyscall(void); dotraplinkage void do_divide_error(struct pt_regs *, long); dotraplinkage void do_debug(struct pt_regs *, long); @@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long); dotraplinkage void do_machine_check(struct pt_regs *, long); #endif dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); -dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 60107072c28b..eaea1d31f753 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -27,6 +27,12 @@ extern struct timezone sys_tz; extern void map_vsyscall(void); +/* + * Called on instruction fetch fault in vsyscall page. + * Returns true if handled. + */ +extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); + #endif /* __KERNEL__ */ #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e949793d6b93..46792d900018 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1123,7 +1123,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug zeroentry coprocessor_error do_coprocessor_error errorentry alignment_check do_alignment_check zeroentry simd_coprocessor_error do_simd_coprocessor_error -zeroentry emulate_vsyscall do_emulate_vsyscall /* Reload gs selector with exception handling */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fbc097a085ca..b9b67166f9de 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -872,12 +872,6 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); #endif -#ifdef CONFIG_X86_64 - BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); - set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); - set_bit(VSYSCALL_EMU_VECTOR, used_vectors); -#endif - /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8f3a265476d7..0f703f10901a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -71,7 +71,6 @@ PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(6); /* RW_ */ #ifdef CONFIG_X86_64 - user PT_LOAD FLAGS(5); /* R_E */ #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(6); /* RW_ */ #endif @@ -174,38 +173,6 @@ SECTIONS . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); -#define VSYSCALL_ADDR (-10*1024*1024) - -#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) -#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) - -#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) -#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - - __vsyscall_0 = .; - - . = VSYSCALL_ADDR; - .vsyscall : AT(VLOAD(.vsyscall)) { - /* work around gold bug 13023 */ - __vsyscall_beginning_hack = .; - *(.vsyscall_0) - - . = __vsyscall_beginning_hack + 1024; - *(.vsyscall_1) - - . = __vsyscall_beginning_hack + 2048; - *(.vsyscall_2) - - . = __vsyscall_beginning_hack + 4096; /* Pad the whole page. */ - } :user =0xcc - . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); - -#undef VSYSCALL_ADDR -#undef VLOAD_OFFSET -#undef VLOAD -#undef VVIRT_OFFSET -#undef VVIRT - #endif /* CONFIG_X86_64 */ /* Init code and data - will be freed after init */ diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index bf8e9ffee6e9..18ae83dd1cd7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -56,6 +56,27 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), }; +static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; + +static int __init vsyscall_setup(char *str) +{ + if (str) { + if (!strcmp("emulate", str)) + vsyscall_mode = EMULATE; + else if (!strcmp("native", str)) + vsyscall_mode = NATIVE; + else if (!strcmp("none", str)) + vsyscall_mode = NONE; + else + return -EINVAL; + + return 0; + } + + return -EINVAL; +} +early_param("vsyscall", vsyscall_setup); + void update_vsyscall_tz(void) { unsigned long flags; @@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", level, tsk->comm, task_pid_nr(tsk), - message, regs->ip - 2, regs->cs, + message, regs->ip, regs->cs, regs->sp, regs->ax, regs->si, regs->di); } @@ -118,45 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr) return nr; } -void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) +bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; int vsyscall_nr; long ret; - local_irq_enable(); + /* + * No point in checking CS -- the only way to get here is a user mode + * trap to a high address, which means that we're in 64-bit user code. + */ - if (!user_64bit_mode(regs)) { - /* - * If we trapped from kernel mode, we might as well OOPS now - * instead of returning to some random address and OOPSing - * then. - */ - BUG_ON(!user_mode(regs)); + WARN_ON_ONCE(address != regs->ip); - /* Compat mode and non-compat 32-bit CS should both segfault. */ - warn_bad_vsyscall(KERN_WARNING, regs, - "illegal int 0xcc from 32-bit mode"); - goto sigsegv; + if (vsyscall_mode == NONE) { + warn_bad_vsyscall(KERN_INFO, regs, + "vsyscall attempted with vsyscall=none"); + return false; } - /* - * x86-ism here: regs->ip points to the instruction after the int 0xcc, - * and int 0xcc is two bytes long. - */ - vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2); + vsyscall_nr = addr_to_vsyscall_nr(address); trace_emulate_vsyscall(vsyscall_nr); if (vsyscall_nr < 0) { warn_bad_vsyscall(KERN_WARNING, regs, - "illegal int 0xcc (exploit attempt?)"); + "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); goto sigsegv; } if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { - warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); + warn_bad_vsyscall(KERN_WARNING, regs, + "vsyscall with bad stack (exploit attempt?)"); goto sigsegv; } @@ -201,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) regs->ip = caller; regs->sp += 8; - local_irq_disable(); - return; + return true; sigsegv: - regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ force_sig(SIGSEGV, current); - local_irq_disable(); + return true; } /* @@ -255,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) void __init map_vsyscall(void) { - extern char __vsyscall_0; - unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + extern char __vsyscall_page; + unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); extern char __vvar_page; unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); - /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ - __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); + __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, + vsyscall_mode == NATIVE + ? PAGE_KERNEL_VSYSCALL + : PAGE_KERNEL_VVAR); + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != + (unsigned long)VSYSCALL_START); + __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); - BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); + BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != + (unsigned long)VVAR_ADDRESS); } static int __init vsyscall_init(void) diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S index ffa845eae5ca..c9596a9af159 100644 --- a/arch/x86/kernel/vsyscall_emu_64.S +++ b/arch/x86/kernel/vsyscall_emu_64.S @@ -7,21 +7,31 @@ */ #include + #include +#include +#include + +__PAGE_ALIGNED_DATA + .globl __vsyscall_page + .balign PAGE_SIZE, 0xcc + .type __vsyscall_page, @object +__vsyscall_page: + + mov $__NR_gettimeofday, %rax + syscall + ret -/* The unused parts of the page are filled with 0xcc by the linker script. */ + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall + ret -.section .vsyscall_0, "a" -ENTRY(vsyscall_0) - int $VSYSCALL_EMU_VECTOR -END(vsyscall_0) + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall + ret -.section .vsyscall_1, "a" -ENTRY(vsyscall_1) - int $VSYSCALL_EMU_VECTOR -END(vsyscall_1) + .balign 4096, 0xcc -.section .vsyscall_2, "a" -ENTRY(vsyscall_2) - int $VSYSCALL_EMU_VECTOR -END(vsyscall_2) + .size __vsyscall_page, 4096 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c1d018238f32..e58935c25b94 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -720,6 +720,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; +#ifdef CONFIG_X86_64 + /* + * Instruction fetch faults in the vsyscall page might need + * emulation. + */ + if (unlikely((error_code & PF_INSTR) && + ((address & ~0xfff) == VSYSCALL_START))) { + if (emulate_vsyscall(regs, address)) + return; + } +#endif + if (unlikely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); -- cgit v1.2.3 From 141d55e6cc590293ea1378f55b9ebd38f5024bf0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 12 Oct 2011 11:53:17 -0700 Subject: x86/irq: Standardize on CONFIG_SPARSE_IRQ=y Sparseirq got introduced in v2.6.28 and Thomas did a huge cleanup around v2.6.38 that eliminated basically all disadvantages of it. So we can remove non-sparseirq support now and simplify our IRQ degrees of freedom a bit. Suggested-and-acked-by: Thomas Gleixner Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/4E95E21D.6090200@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/irq_vectors.h | 12 ++---------- arch/x86/kernel/apic/io_apic.c | 23 ----------------------- 3 files changed, 3 insertions(+), 33 deletions(-) (limited to 'arch/x86/include/asm/irq_vectors.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a47bb22657f..fcd34c30debd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -64,6 +64,7 @@ config X86 select HAVE_TEXT_POKE_SMP select HAVE_GENERIC_HARDIRQS select HAVE_SPARSE_IRQ + select SPARSE_IRQ select GENERIC_FIND_FIRST_BIT select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 7e50f06393aa..4b4448761e88 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -160,19 +160,11 @@ static inline int invalid_vm86_irq(int irq) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) #ifdef CONFIG_X86_IO_APIC -# ifdef CONFIG_SPARSE_IRQ -# define CPU_VECTOR_LIMIT (64 * NR_CPUS) -# define NR_IRQS \ +# define CPU_VECTOR_LIMIT (64 * NR_CPUS) +# define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -# else -# define CPU_VECTOR_LIMIT (32 * NR_CPUS) -# define NR_IRQS \ - (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ - (NR_VECTORS + CPU_VECTOR_LIMIT) : \ - (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -# endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS NR_IRQS_LEGACY #endif diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 08f0d165e139..05a30c122b1b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -186,11 +186,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; -#else -static struct irq_cfg irq_cfgx[NR_IRQS]; -#endif int __init arch_early_irq_init(void) { @@ -234,7 +230,6 @@ int __init arch_early_irq_init(void) return 0; } -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { return irq_get_chip_data(irq); @@ -269,22 +264,6 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) kfree(cfg); } -#else - -struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irq < nr_irqs ? irq_cfgx + irq : NULL; -} - -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) -{ - return irq_cfgx + irq; -} - -static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { } - -#endif - static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) { int res = irq_alloc_desc_at(at, node); @@ -3644,7 +3623,6 @@ int get_nr_irqs_gsi(void) return nr_irqs_gsi; } -#ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { int nr; @@ -3664,7 +3642,6 @@ int __init arch_probe_nr_irqs(void) return NR_IRQS_LEGACY; } -#endif int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) -- cgit v1.2.3