diff options
Diffstat (limited to 'arch')
203 files changed, 2295 insertions, 1366 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc5..a62965d057f6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,19 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_PERF_REGS + bool + help + Support selective register dumps for perf events. This includes + bit-mapping of each registers and a unique architecture id. + +config HAVE_PERF_USER_STACK_DUMP + bool + help + Support user stack dumps for perf event samples. This needs + access to the user stack pointer which is not unified across + architectures. + config HAVE_ARCH_JUMP_LABEL bool @@ -281,4 +294,23 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. +config HAVE_RCU_USER_QS + bool + help + Provide kernel entry/exit hooks necessary for userspace + RCU extended quiescent state. Syscalls need to be wrapped inside + rcu_user_exit()-rcu_user_enter() through the slow path using + TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs + are already protected inside rcu_irq_enter/rcu_irq_exit() but + preemption or signal handling on irq exit still need to be protected. + +config HAVE_VIRT_CPU_ACCOUNTING + bool + +config HAVE_IRQ_TIME_ACCOUNTING + bool + help + Archs need to ensure they use a high enough resolution clock to + support irq time accounting and then call enable_sched_clock_irqtime(). + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index d6fde98b74b3..83638aa096d5 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tty.h> #include <linux/console.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/reg.h> #include <asm/uaccess.h> @@ -54,9 +55,12 @@ cpu_idle(void) /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ + rcu_idle_enter(); while (!need_resched()) cpu_relax(); - schedule(); + + rcu_idle_exit(); + schedule_preempt_disabled(); } } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 35ddc02bfa4a..a41ad90a97a6 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,6 +166,7 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); + preempt_disable(); /* Do nothing. */ cpu_idle(); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c5f9ae5dbd1a..2f88d8d97701 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -6,7 +6,7 @@ config ARM select HAVE_DMA_API_DEBUG select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_DMA_ATTRS - select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) + select HAVE_DMA_CONTIGUOUS if MMU select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index f15f82bf3a50..e968a52e4881 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -356,15 +356,15 @@ choice is nothing connected to read from the DCC. config DEBUG_SEMIHOSTING - bool "Kernel low-level debug output via semihosting I" + bool "Kernel low-level debug output via semihosting I/O" help Semihosting enables code running on an ARM target to use the I/O facilities on a host debugger/emulator through a - simple SVC calls. The host debugger or emulator must have + simple SVC call. The host debugger or emulator must have semihosting enabled for the special svc call to be trapped otherwise the kernel will crash. - This is known to work with OpenOCD, as wellas + This is known to work with OpenOCD, as well as ARM's Fast Models, or any other controlling environment that implements semihosting. diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 30eae87ead6d..a051dfbdd7db 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -284,10 +284,10 @@ zImage Image xipImage bootpImage uImage: vmlinux zinstall uinstall install: vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ -%.dtb: +%.dtb: scripts $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ -dtbs: +dtbs: scripts $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ # We use MRPROPER_FILES and CLEAN_FILES now diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index b8c64b80bafc..bc67cbff3944 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -653,16 +653,21 @@ __armv7_mmu_cache_on: mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs #endif mrc p15, 0, r0, c1, c0, 0 @ read control reg + bic r0, r0, #1 << 28 @ clear SCTLR.TRE orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x003c @ write buffer #ifdef CONFIG_MMU #ifdef CONFIG_CPU_ENDIAN_BE8 orr r0, r0, #1 << 25 @ big-endian page tables #endif + mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client + bic r6, r6, #1 << 31 @ 32-bit translation system + bic r6, r6, #3 << 0 @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer mcrne p15, 0, r1, c3, c0, 0 @ load domain access control + mcrne p15, 0, r6, c2, c0, 2 @ load ttb control #endif mcr p15, 0, r0, c7, c5, 4 @ ISB mcr p15, 0, r0, c1, c0, 0 @ load control register diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index 66389c1c6f62..7c95f76398de 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -104,6 +104,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -113,6 +114,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -122,6 +124,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index b460d6ce9eb5..195019b7ca0e 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -95,6 +95,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff400 { @@ -104,6 +105,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff600 { @@ -113,6 +115,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffff800 { @@ -122,6 +125,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioE: gpio@fffffa00 { @@ -131,6 +135,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@ffffee00 { diff --git a/arch/arm/boot/dts/at91sam9g25ek.dts b/arch/arm/boot/dts/at91sam9g25ek.dts index 7829a4d0cb22..96514c134e54 100644 --- a/arch/arm/boot/dts/at91sam9g25ek.dts +++ b/arch/arm/boot/dts/at91sam9g25ek.dts @@ -15,7 +15,7 @@ compatible = "atmel,at91sam9g25ek", "atmel,at91sam9x5ek", "atmel,at91sam9x5", "atmel,at91sam9"; chosen { - bootargs = "128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=ubifs ubi.mtd=1 root=ubi0:rootfs"; + bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=ubifs ubi.mtd=1 root=ubi0:rootfs"; }; ahb { diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index bafa8806fc17..63751b1e744b 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -113,6 +113,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff400 { @@ -122,6 +123,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff600 { @@ -131,6 +133,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffff800 { @@ -140,6 +143,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioE: gpio@fffffa00 { @@ -149,6 +153,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@ffffee00 { diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index bfac0dfc332c..ef9336ae9614 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -107,6 +107,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -116,6 +117,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -125,6 +127,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffffa00 { @@ -134,6 +137,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 4a18c393b136..8a387a8d61b7 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -115,6 +115,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioB: gpio@fffff600 { @@ -124,6 +125,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioC: gpio@fffff800 { @@ -133,6 +135,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; pioD: gpio@fffffa00 { @@ -142,6 +145,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; }; dbgu: serial@fffff200 { diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig index 7d8718468e0d..90610c7030f7 100644 --- a/arch/arm/configs/armadillo800eva_defconfig +++ b/arch/arm/configs/armadillo800eva_defconfig @@ -33,7 +33,7 @@ CONFIG_AEABI=y CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096" +CONFIG_CMDLINE="console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096 rw" CONFIG_CMDLINE_FORCE=y CONFIG_KEXEC=y CONFIG_VFP=y diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 03fb93621d0d..5c8b3bf4d825 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -320,4 +320,12 @@ .size \name , . - \name .endm + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req +#ifndef CONFIG_CPU_USE_DOMAINS + adds \tmp, \addr, #\size - 1 + sbcccs \tmp, \tmp, \limit + bcs \bad +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 2ae842df4551..5c44dcb0987b 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -203,6 +203,13 @@ static inline void dma_free_writecombine(struct device *dev, size_t size, } /* + * This can be called during early boot to increase the size of the atomic + * coherent DMA pool above the default value of 256KiB. It must be called + * before postcore_initcall. + */ +extern void __init init_dma_coherent_pool_size(unsigned long size); + +/* * This can be called during boot to increase the size of the consistent * DMA region above it's default value of 2MB. It must be called before the * memory allocator is initialised, i.e. before any core_initcall. diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index e965f1b560f1..5f6ddcc56452 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -187,6 +187,7 @@ static inline unsigned long __phys_to_virt(unsigned long x) #define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET) #endif #endif +#endif /* __ASSEMBLY__ */ #ifndef PHYS_OFFSET #ifdef PLAT_PHYS_OFFSET @@ -196,6 +197,8 @@ static inline unsigned long __phys_to_virt(unsigned long x) #endif #endif +#ifndef __ASSEMBLY__ + /* * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 314d4664eae7..99a19512ee26 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -199,6 +199,9 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, { pgtable_page_dtor(pte); +#ifdef CONFIG_ARM_LPAE + tlb_add_flush(tlb, addr); +#else /* * With the classic ARM MMU, a pte page has two corresponding pmd * entries, each covering 1MB. @@ -206,6 +209,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, addr &= PMD_MASK; tlb_add_flush(tlb, addr + SZ_1M - PAGE_SIZE); tlb_add_flush(tlb, addr + SZ_1M); +#endif tlb_remove_page(tlb, pte); } diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 479a6352e0b5..77bd79f2ffdb 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -101,28 +101,39 @@ extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); -#define __get_user_x(__r2,__p,__e,__s,__i...) \ +#define __GUP_CLOBBER_1 "lr", "cc" +#ifdef CONFIG_CPU_USE_DOMAINS +#define __GUP_CLOBBER_2 "ip", "lr", "cc" +#else +#define __GUP_CLOBBER_2 "lr", "cc" +#endif +#define __GUP_CLOBBER_4 "lr", "cc" + +#define __get_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%1", "r2") \ + __asmeq("%3", "r1") \ "bl __get_user_" #__s \ : "=&r" (__e), "=r" (__r2) \ - : "0" (__p) \ - : __i, "cc") + : "0" (__p), "r" (__l) \ + : __GUP_CLOBBER_##__s) -#define get_user(x,p) \ +#define __get_user_check(x,p) \ ({ \ + unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ register unsigned long __r2 asm("r2"); \ + register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __get_user_x(__r2, __p, __e, 1, "lr"); \ - break; \ + __get_user_x(__r2, __p, __e, __l, 1); \ + break; \ case 2: \ - __get_user_x(__r2, __p, __e, 2, "r3", "lr"); \ + __get_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __get_user_x(__r2, __p, __e, 4, "lr"); \ + __get_user_x(__r2, __p, __e, __l, 4); \ break; \ default: __e = __get_user_bad(); break; \ } \ @@ -130,42 +141,57 @@ extern int __get_user_4(void *); __e; \ }) +#define get_user(x,p) \ + ({ \ + might_fault(); \ + __get_user_check(x,p); \ + }) + extern int __put_user_1(void *, unsigned int); extern int __put_user_2(void *, unsigned int); extern int __put_user_4(void *, unsigned int); extern int __put_user_8(void *, unsigned long long); -#define __put_user_x(__r2,__p,__e,__s) \ +#define __put_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ __asmeq("%0", "r0") __asmeq("%2", "r2") \ + __asmeq("%3", "r1") \ "bl __put_user_" #__s \ : "=&r" (__e) \ - : "0" (__p), "r" (__r2) \ + : "0" (__p), "r" (__r2), "r" (__l) \ : "ip", "lr", "cc") -#define put_user(x,p) \ +#define __put_user_check(x,p) \ ({ \ + unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __r2 asm("r2") = (x); \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ + register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ case 1: \ - __put_user_x(__r2, __p, __e, 1); \ + __put_user_x(__r2, __p, __e, __l, 1); \ break; \ case 2: \ - __put_user_x(__r2, __p, __e, 2); \ + __put_user_x(__r2, __p, __e, __l, 2); \ break; \ case 4: \ - __put_user_x(__r2, __p, __e, 4); \ + __put_user_x(__r2, __p, __e, __l, 4); \ break; \ case 8: \ - __put_user_x(__r2, __p, __e, 8); \ + __put_user_x(__r2, __p, __e, __l, 8); \ break; \ default: __e = __put_user_bad(); break; \ } \ __e; \ }) +#define put_user(x,p) \ + ({ \ + might_fault(); \ + __put_user_check(x,p); \ + }) + #else /* CONFIG_MMU */ /* @@ -219,6 +245,7 @@ do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: __get_user_asm_byte(__gu_val,__gu_addr,err); break; \ case 2: __get_user_asm_half(__gu_val,__gu_addr,err); break; \ @@ -300,6 +327,7 @@ do { \ unsigned long __pu_addr = (unsigned long)(ptr); \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: __put_user_asm_byte(__pu_val,__pu_addr,err); break; \ case 2: __put_user_asm_half(__pu_val,__pu_addr,err); break; \ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 0cab47d4a83f..2fde5fd1acce 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -404,6 +404,7 @@ #define __NR_setns (__NR_SYSCALL_BASE+375) #define __NR_process_vm_readv (__NR_SYSCALL_BASE+376) #define __NR_process_vm_writev (__NR_SYSCALL_BASE+377) + /* 378 for kcmp */ /* * The following SWIs are ARM private. @@ -483,6 +484,7 @@ */ #define __IGNORE_fadvise64_64 #define __IGNORE_migrate_pages +#define __IGNORE_kcmp #endif /* __KERNEL__ */ #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 463ff4a0ec8a..e337879595e5 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -387,6 +387,7 @@ /* 375 */ CALL(sys_setns) CALL(sys_process_vm_readv) CALL(sys_process_vm_writev) + CALL(sys_ni_syscall) /* reserved for sys_kcmp */ #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index ba386bd94107..281bf3301241 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -159,6 +159,12 @@ static int debug_arch_supported(void) arch >= ARM_DEBUG_ARCH_V7_1; } +/* Can we determine the watchpoint access type from the fsr? */ +static int debug_exception_updates_fsr(void) +{ + return 0; +} + /* Determine number of WRP registers available. */ static int get_num_wrp_resources(void) { @@ -604,13 +610,14 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Aligned */ break; case 1: - /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) - break; case 2: /* Allow halfword watchpoints and breakpoints. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) break; + case 3: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; default: ret = -EINVAL; goto out; @@ -619,18 +626,35 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) info->address &= ~alignment_mask; info->ctrl.len <<= offset; - /* - * Currently we rely on an overflow handler to take - * care of single-stepping the breakpoint when it fires. - * In the case of userspace breakpoints on a core with V7 debug, - * we can use the mismatch feature as a poor-man's hardware - * single-step, but this only works for per-task breakpoints. - */ - if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) || - !core_has_mismatch_brps() || !bp->hw.bp_target)) { - pr_warning("overflow handler required but none found\n"); - ret = -EINVAL; + if (!bp->overflow_handler) { + /* + * Mismatch breakpoints are required for single-stepping + * breakpoints. + */ + if (!core_has_mismatch_brps()) + return -EINVAL; + + /* We don't allow mismatch breakpoints in kernel space. */ + if (arch_check_bp_in_kernelspace(bp)) + return -EPERM; + + /* + * Per-cpu breakpoints are not supported by our stepping + * mechanism. + */ + if (!bp->hw.bp_target) + return -EINVAL; + + /* + * We only support specific access types if the fsr + * reports them. + */ + if (!debug_exception_updates_fsr() && + (info->ctrl.type == ARM_BREAKPOINT_LOAD || + info->ctrl.type == ARM_BREAKPOINT_STORE)) + return -EINVAL; } + out: return ret; } @@ -706,10 +730,12 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, goto unlock; /* Check that the access type matches. */ - access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W : - HW_BREAKPOINT_R; - if (!(access & hw_breakpoint_type(wp))) - goto unlock; + if (debug_exception_updates_fsr()) { + access = (fsr & ARM_FSR_ACCESS_MASK) ? + HW_BREAKPOINT_W : HW_BREAKPOINT_R; + if (!(access & hw_breakpoint_type(wp))) + goto unlock; + } /* We have a winner. */ info->trigger = addr; diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index fef42b21cecb..e1f906989bb8 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/clk.h> -#include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/err.h> @@ -96,7 +95,52 @@ static void twd_timer_stop(struct clock_event_device *clk) disable_percpu_irq(clk->irq); } -#ifdef CONFIG_CPU_FREQ +#ifdef CONFIG_COMMON_CLK + +/* + * Updates clockevent frequency when the cpu frequency changes. + * Called on the cpu that is changing frequency with interrupts disabled. + */ +static void twd_update_frequency(void *new_rate) +{ + twd_timer_rate = *((unsigned long *) new_rate); + + clockevents_update_freq(*__this_cpu_ptr(twd_evt), twd_timer_rate); +} + +static int twd_rate_change(struct notifier_block *nb, + unsigned long flags, void *data) +{ + struct clk_notifier_data *cnd = data; + + /* + * The twd clock events must be reprogrammed to account for the new + * frequency. The timer is local to a cpu, so cross-call to the + * changing cpu. + */ + if (flags == POST_RATE_CHANGE) + smp_call_function(twd_update_frequency, + (void *)&cnd->new_rate, 1); + + return NOTIFY_OK; +} + +static struct notifier_block twd_clk_nb = { + .notifier_call = twd_rate_change, +}; + +static int twd_clk_init(void) +{ + if (twd_evt && *__this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + return clk_notifier_register(twd_clk, &twd_clk_nb); + + return 0; +} +core_initcall(twd_clk_init); + +#elif defined (CONFIG_CPU_FREQ) + +#include <linux/cpufreq.h> /* * Updates clockevent frequency when the cpu frequency changes. diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index f7945218b8c6..b0179b89a04c 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -420,20 +420,23 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) #endif instr = *(u32 *) pc; } else if (thumb_mode(regs)) { - get_user(instr, (u16 __user *)pc); + if (get_user(instr, (u16 __user *)pc)) + goto die_sig; if (is_wide_instruction(instr)) { unsigned int instr2; - get_user(instr2, (u16 __user *)pc+1); + if (get_user(instr2, (u16 __user *)pc+1)) + goto die_sig; instr <<= 16; instr |= instr2; } - } else { - get_user(instr, (u32 __user *)pc); + } else if (get_user(instr, (u32 __user *)pc)) { + goto die_sig; } if (call_undef_hook(regs, instr) == 0) return; +die_sig: #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_UNDEFINED) { printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n", diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index d6dacc69254e..395d5fbb8fa2 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -59,6 +59,7 @@ void __init init_current_timer_delay(unsigned long freq) { pr_info("Switching to timer-based delay loop\n"); lpj_fine = freq / HZ; + loops_per_jiffy = lpj_fine; arm_delay_ops.delay = __timer_delay; arm_delay_ops.const_udelay = __timer_const_udelay; arm_delay_ops.udelay = __timer_udelay; diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 11093a7c3e32..9b06bb41fca6 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -16,8 +16,9 @@ * __get_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2, r3 contains the zero-extended value + * r2 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see <asm/uaccess.h> @@ -27,33 +28,39 @@ * Note also that it is intended that __get_user_bad is not global. */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/errno.h> #include <asm/domain.h> ENTRY(__get_user_1) + check_uaccess r0, 1, r1, r2, __get_user_bad 1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) ENTRY(__get_user_2) -#ifdef CONFIG_THUMB2_KERNEL -2: TUSER(ldrb) r2, [r0] -3: TUSER(ldrb) r3, [r0, #1] + check_uaccess r0, 2, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS +rb .req ip +2: ldrbt r2, [r0], #1 +3: ldrbt rb, [r0], #0 #else -2: TUSER(ldrb) r2, [r0], #1 -3: TUSER(ldrb) r3, [r0] +rb .req r0 +2: ldrb r2, [r0] +3: ldrb rb, [r0, #1] #endif #ifndef __ARMEB__ - orr r2, r2, r3, lsl #8 + orr r2, r2, rb, lsl #8 #else - orr r2, r3, r2, lsl #8 + orr r2, rb, r2, lsl #8 #endif mov r0, #0 mov pc, lr ENDPROC(__get_user_2) ENTRY(__get_user_4) + check_uaccess r0, 4, r1, r2, __get_user_bad 4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 7db25990c589..3d73dcb959b0 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -16,6 +16,7 @@ * __put_user_X * * Inputs: r0 contains the address + * r1 contains the address limit, which must be preserved * r2, r3 contains the value * Outputs: r0 is the error code * lr corrupted @@ -27,16 +28,19 @@ * Note also that it is intended that __put_user_bad is not global. */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/errno.h> #include <asm/domain.h> ENTRY(__put_user_1) + check_uaccess r0, 1, r1, ip, __put_user_bad 1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) ENTRY(__put_user_2) + check_uaccess r0, 2, r1, ip, __put_user_bad mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ @@ -60,12 +64,14 @@ ENTRY(__put_user_2) ENDPROC(__put_user_2) ENTRY(__put_user_4) + check_uaccess r0, 4, r1, ip, __put_user_bad 4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) ENTRY(__put_user_8) + check_uaccess r0, 8, r1, ip, __put_user_bad #ifdef CONFIG_THUMB2_KERNEL 5: TUSER(str) r2, [r0] 6: TUSER(str) r3, [r0, #4] diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 104ca40d8d18..aaa443b48c91 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -197,7 +197,7 @@ void __init at91rm9200_timer_init(void) at91_st_read(AT91_ST_SR); /* Make IRQs happen for the system timer */ - setup_irq(AT91_ID_SYS, &at91rm9200_timer_irq); + setup_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq); /* The 32KiHz "Slow Clock" (tick every 30517.58 nanoseconds) is used * directly for the clocksource and all clockevents, after adjusting diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c index 7b9c2ba396ed..bce572a530ef 100644 --- a/arch/arm/mach-at91/at91sam9260_devices.c +++ b/arch/arm/mach-at91/at91sam9260_devices.c @@ -726,6 +726,8 @@ static struct resource rtt_resources[] = { .flags = IORESOURCE_MEM, }, { .flags = IORESOURCE_MEM, + }, { + .flags = IORESOURCE_IRQ, }, }; @@ -744,10 +746,12 @@ static void __init at91_add_device_rtt_rtc(void) * The second resource is needed: * GPBR will serve as the storage for RTC time offset */ - at91sam9260_rtt_device.num_resources = 2; + at91sam9260_rtt_device.num_resources = 3; rtt_resources[1].start = AT91SAM9260_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR; rtt_resources[1].end = rtt_resources[1].start + 3; + rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS; + rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS; } #else static void __init at91_add_device_rtt_rtc(void) diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c index 8df5c1bdff92..bc2590d712d0 100644 --- a/arch/arm/mach-at91/at91sam9261_devices.c +++ b/arch/arm/mach-at91/at91sam9261_devices.c @@ -609,6 +609,8 @@ static struct resource rtt_resources[] = { .flags = IORESOURCE_MEM, }, { .flags = IORESOURCE_MEM, + }, { + .flags = IORESOURCE_IRQ, } }; @@ -626,10 +628,12 @@ static void __init at91_add_device_rtt_rtc(void) * The second resource is needed: * GPBR will serve as the storage for RTC time offset */ - at91sam9261_rtt_device.num_resources = 2; + at91sam9261_rtt_device.num_resources = 3; rtt_resources[1].start = AT91SAM9261_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR; rtt_resources[1].end = rtt_resources[1].start + 3; + rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS; + rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS; } #else static void __init at91_add_device_rtt_rtc(void) diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c index eb6bbf86fb9f..9b6ca734f1a9 100644 --- a/arch/arm/mach-at91/at91sam9263_devices.c +++ b/arch/arm/mach-at91/at91sam9263_devices.c @@ -990,6 +990,8 @@ static struct resource rtt0_resources[] = { .flags = IORESOURCE_MEM, }, { .flags = IORESOURCE_MEM, + }, { + .flags = IORESOURCE_IRQ, } }; @@ -1006,6 +1008,8 @@ static struct resource rtt1_resources[] = { .flags = IORESOURCE_MEM, }, { .flags = IORESOURCE_MEM, + }, { + .flags = IORESOURCE_IRQ, } }; @@ -1027,14 +1031,14 @@ static void __init at91_add_device_rtt_rtc(void) * The second resource is needed only for the chosen RTT: * GPBR will serve as the storage for RTC time offset */ - at91sam9263_rtt0_device.num_resources = 2; + at91sam9263_rtt0_device.num_resources = 3; at91sam9263_rtt1_device.num_resources = 1; pdev = &at91sam9263_rtt0_device; r = rtt0_resources; break; case 1: at91sam9263_rtt0_device.num_resources = 1; - at91sam9263_rtt1_device.num_resources = 2; + at91sam9263_rtt1_device.num_resources = 3; pdev = &at91sam9263_rtt1_device; r = rtt1_resources; break; @@ -1047,6 +1051,8 @@ static void __init at91_add_device_rtt_rtc(void) pdev->name = "rtc-at91sam9"; r[1].start = AT91SAM9263_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR; r[1].end = r[1].start + 3; + r[2].start = NR_IRQS_LEGACY + AT91_ID_SYS; + r[2].end = NR_IRQS_LEGACY + AT91_ID_SYS; } #else static void __init at91_add_device_rtt_rtc(void) diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 06073996a382..1b47319ca00b 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -1293,6 +1293,8 @@ static struct resource rtt_resources[] = { .flags = IORESOURCE_MEM, }, { .flags = IORESOURCE_MEM, + }, { + .flags = IORESOURCE_IRQ, } }; @@ -1310,10 +1312,12 @@ static void __init at91_add_device_rtt_rtc(void) * The second resource is needed: * GPBR will serve as the storage for RTC time offset */ - at91sam9g45_rtt_device.num_resources = 2; + at91sam9g45_rtt_device.num_resources = 3; rtt_resources[1].start = AT91SAM9G45_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR; rtt_resources[1].end = rtt_resources[1].start + 3; + rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS; + rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS; } #else static void __init at91_add_device_rtt_rtc(void) diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index f09fff932172..b3d365dadef5 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -688,6 +688,8 @@ static struct resource rtt_resources[] = { .flags = IORESOURCE_MEM, }, { .flags = IORESOURCE_MEM, + }, { + .flags = IORESOURCE_IRQ, } }; @@ -705,10 +707,12 @@ static void __init at91_add_device_rtt_rtc(void) * The second resource is needed: * GPBR will serve as the storage for RTC time offset */ - at91sam9rl_rtt_device.num_resources = 2; + at91sam9rl_rtt_device.num_resources = 3; rtt_resources[1].start = AT91SAM9RL_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR; rtt_resources[1].end = rtt_resources[1].start + 3; + rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS; + rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS; } #else static void __init at91_add_device_rtt_rtc(void) diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index de2ec6b8fea7..188c82971ebd 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -63,6 +63,12 @@ EXPORT_SYMBOL_GPL(at91_pmc_base); #define cpu_has_300M_plla() (cpu_is_at91sam9g10()) +#define cpu_has_240M_plla() (cpu_is_at91sam9261() \ + || cpu_is_at91sam9263() \ + || cpu_is_at91sam9rl()) + +#define cpu_has_210M_plla() (cpu_is_at91sam9260()) + #define cpu_has_pllb() (!(cpu_is_at91sam9rl() \ || cpu_is_at91sam9g45() \ || cpu_is_at91sam9x5() \ @@ -706,6 +712,12 @@ static int __init at91_pmc_init(unsigned long main_clock) } else if (cpu_has_800M_plla()) { if (plla.rate_hz > 800000000) pll_overclock = true; + } else if (cpu_has_240M_plla()) { + if (plla.rate_hz > 240000000) + pll_overclock = true; + } else if (cpu_has_210M_plla()) { + if (plla.rate_hz > 210000000) + pll_overclock = true; } else { if (plla.rate_hz > 209000000) pll_overclock = true; diff --git a/arch/arm/mach-gemini/irq.c b/arch/arm/mach-gemini/irq.c index ca70e5fcc7ac..020852d3bdd8 100644 --- a/arch/arm/mach-gemini/irq.c +++ b/arch/arm/mach-gemini/irq.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <asm/irq.h> #include <asm/mach/irq.h> +#include <asm/system_misc.h> #include <mach/hardware.h> #define IRQ_SOURCE(base_addr) (base_addr + 0x00) diff --git a/arch/arm/mach-imx/clk-imx25.c b/arch/arm/mach-imx/clk-imx25.c index fdd8cc87c9fe..d20d4795f4ea 100644 --- a/arch/arm/mach-imx/clk-imx25.c +++ b/arch/arm/mach-imx/clk-imx25.c @@ -222,10 +222,8 @@ int __init mx25_clocks_init(void) clk_register_clkdev(clk[lcdc_ipg], "ipg", "imx-fb.0"); clk_register_clkdev(clk[lcdc_ahb], "ahb", "imx-fb.0"); clk_register_clkdev(clk[wdt_ipg], NULL, "imx2-wdt.0"); - clk_register_clkdev(clk[ssi1_ipg_per], "per", "imx-ssi.0"); - clk_register_clkdev(clk[ssi1_ipg], "ipg", "imx-ssi.0"); - clk_register_clkdev(clk[ssi2_ipg_per], "per", "imx-ssi.1"); - clk_register_clkdev(clk[ssi2_ipg], "ipg", "imx-ssi.1"); + clk_register_clkdev(clk[ssi1_ipg], NULL, "imx-ssi.0"); + clk_register_clkdev(clk[ssi2_ipg], NULL, "imx-ssi.1"); clk_register_clkdev(clk[esdhc1_ipg_per], "per", "sdhci-esdhc-imx25.0"); clk_register_clkdev(clk[esdhc1_ipg], "ipg", "sdhci-esdhc-imx25.0"); clk_register_clkdev(clk[esdhc1_ahb], "ahb", "sdhci-esdhc-imx25.0"); @@ -243,6 +241,6 @@ int __init mx25_clocks_init(void) clk_register_clkdev(clk[sdma_ahb], "ahb", "imx35-sdma"); clk_register_clkdev(clk[iim_ipg], "iim", NULL); - mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54); + mxc_timer_init(MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), MX25_INT_GPT1); return 0; } diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c index c6422fb10bae..65fb8bcd86cb 100644 --- a/arch/arm/mach-imx/clk-imx35.c +++ b/arch/arm/mach-imx/clk-imx35.c @@ -230,10 +230,8 @@ int __init mx35_clocks_init() clk_register_clkdev(clk[ipu_gate], NULL, "mx3_sdc_fb"); clk_register_clkdev(clk[owire_gate], NULL, "mxc_w1"); clk_register_clkdev(clk[sdma_gate], NULL, "imx35-sdma"); - clk_register_clkdev(clk[ipg], "ipg", "imx-ssi.0"); - clk_register_clkdev(clk[ssi1_div_post], "per", "imx-ssi.0"); - clk_register_clkdev(clk[ipg], "ipg", "imx-ssi.1"); - clk_register_clkdev(clk[ssi2_div_post], "per", "imx-ssi.1"); + clk_register_clkdev(clk[ssi1_gate], NULL, "imx-ssi.0"); + clk_register_clkdev(clk[ssi2_gate], NULL, "imx-ssi.1"); /* i.mx35 has the i.mx21 type uart */ clk_register_clkdev(clk[uart1_gate], "per", "imx21-uart.0"); clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.0"); diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c index 2c6ab3273f9e..5985ed1b8c98 100644 --- a/arch/arm/mach-imx/mach-armadillo5x0.c +++ b/arch/arm/mach-imx/mach-armadillo5x0.c @@ -526,7 +526,8 @@ static void __init armadillo5x0_init(void) imx31_add_mxc_nand(&armadillo5x0_nand_board_info); /* set NAND page size to 2k if not configured via boot mode pins */ - __raw_writel(__raw_readl(MXC_CCM_RCSR) | (1 << 30), MXC_CCM_RCSR); + __raw_writel(__raw_readl(mx3_ccm_base + MXC_CCM_RCSR) | + (1 << 30), mx3_ccm_base + MXC_CCM_RCSR); /* RTC */ /* Get RTC IRQ and register the chip */ diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 3226077735b1..1201191d7f1b 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -517,6 +517,13 @@ void __init kirkwood_wdt_init(void) void __init kirkwood_init_early(void) { orion_time_set_base(TIMER_VIRT_BASE); + + /* + * Some Kirkwood devices allocate their coherent buffers from atomic + * context. Increase size of atomic coherent pool to make sure such + * the allocations won't fail. + */ + init_dma_coherent_pool_size(SZ_1M); } int kirkwood_tclk; diff --git a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c index d93359379598..be90b7d0e10b 100644 --- a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c +++ b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/sizes.h> #include <linux/platform_device.h> #include <linux/mtd/partitions.h> #include <linux/ata_platform.h> diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 8dabfe81d07c..ff886e01a0b0 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -261,7 +261,7 @@ static void __init apx4devkit_init(void) enable_clk_enet_out(); if (IS_BUILTIN(CONFIG_PHYLIB)) - phy_register_fixup_for_uid(PHY_ID_KS8051, MICREL_PHY_ID_MASK, + phy_register_fixup_for_uid(PHY_ID_KSZ8051, MICREL_PHY_ID_MASK, apx4devkit_phy_fixup); mxsfb_pdata.mode_list = apx4devkit_video_modes; diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index fcd4e85c4ddc..346fd26f3aa6 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -232,10 +232,11 @@ config MACH_OMAP3_PANDORA select OMAP_PACKAGE_CBB select REGULATOR_FIXED_VOLTAGE if REGULATOR -config MACH_OMAP3_TOUCHBOOK +config MACH_TOUCHBOOK bool "OMAP3 Touch Book" depends on ARCH_OMAP3 default y + select OMAP_PACKAGE_CBB config MACH_OMAP_3430SDP bool "OMAP 3430 SDP board" diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index f6a24b3f9c4f..34c2c7f59f0a 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -255,7 +255,7 @@ obj-$(CONFIG_MACH_OMAP_3630SDP) += board-zoom-display.o obj-$(CONFIG_MACH_CM_T35) += board-cm-t35.o obj-$(CONFIG_MACH_CM_T3517) += board-cm-t3517.o obj-$(CONFIG_MACH_IGEP0020) += board-igep0020.o -obj-$(CONFIG_MACH_OMAP3_TOUCHBOOK) += board-omap3touchbook.o +obj-$(CONFIG_MACH_TOUCHBOOK) += board-omap3touchbook.o obj-$(CONFIG_MACH_OMAP_4430SDP) += board-4430sdp.o obj-$(CONFIG_MACH_OMAP4_PANDA) += board-omap4panda.o diff --git a/arch/arm/mach-omap2/clock33xx_data.c b/arch/arm/mach-omap2/clock33xx_data.c index 25bbcc7ca4dc..ae27de8899a6 100644 --- a/arch/arm/mach-omap2/clock33xx_data.c +++ b/arch/arm/mach-omap2/clock33xx_data.c @@ -1036,13 +1036,13 @@ static struct omap_clk am33xx_clks[] = { CLK(NULL, "mmu_fck", &mmu_fck, CK_AM33XX), CLK(NULL, "smartreflex0_fck", &smartreflex0_fck, CK_AM33XX), CLK(NULL, "smartreflex1_fck", &smartreflex1_fck, CK_AM33XX), - CLK(NULL, "gpt1_fck", &timer1_fck, CK_AM33XX), - CLK(NULL, "gpt2_fck", &timer2_fck, CK_AM33XX), - CLK(NULL, "gpt3_fck", &timer3_fck, CK_AM33XX), - CLK(NULL, "gpt4_fck", &timer4_fck, CK_AM33XX), - CLK(NULL, "gpt5_fck", &timer5_fck, CK_AM33XX), - CLK(NULL, "gpt6_fck", &timer6_fck, CK_AM33XX), - CLK(NULL, "gpt7_fck", &timer7_fck, CK_AM33XX), + CLK(NULL, "timer1_fck", &timer1_fck, CK_AM33XX), + CLK(NULL, "timer2_fck", &timer2_fck, CK_AM33XX), + CLK(NULL, "timer3_fck", &timer3_fck, CK_AM33XX), + CLK(NULL, "timer4_fck", &timer4_fck, CK_AM33XX), + CLK(NULL, "timer5_fck", &timer5_fck, CK_AM33XX), + CLK(NULL, "timer6_fck", &timer6_fck, CK_AM33XX), + CLK(NULL, "timer7_fck", &timer7_fck, CK_AM33XX), CLK(NULL, "usbotg_fck", &usbotg_fck, CK_AM33XX), CLK(NULL, "ieee5000_fck", &ieee5000_fck, CK_AM33XX), CLK(NULL, "wdt1_fck", &wdt1_fck, CK_AM33XX), diff --git a/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c b/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c index a0d68dbecfa3..f99e65cfb862 100644 --- a/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c +++ b/arch/arm/mach-omap2/clockdomain2xxx_3xxx.c @@ -241,6 +241,52 @@ static void omap3_clkdm_deny_idle(struct clockdomain *clkdm) _clkdm_del_autodeps(clkdm); } +static int omap3xxx_clkdm_clk_enable(struct clockdomain *clkdm) +{ + bool hwsup = false; + + if (!clkdm->clktrctrl_mask) + return 0; + + hwsup = omap2_cm_is_clkdm_in_hwsup(clkdm->pwrdm.ptr->prcm_offs, + clkdm->clktrctrl_mask); + + if (hwsup) { + /* Disable HW transitions when we are changing deps */ + _disable_hwsup(clkdm); + _clkdm_add_autodeps(clkdm); + _enable_hwsup(clkdm); + } else { + if (clkdm->flags & CLKDM_CAN_FORCE_WAKEUP) + omap3_clkdm_wakeup(clkdm); + } + + return 0; +} + +static int omap3xxx_clkdm_clk_disable(struct clockdomain *clkdm) +{ + bool hwsup = false; + + if (!clkdm->clktrctrl_mask) + return 0; + + hwsup = omap2_cm_is_clkdm_in_hwsup(clkdm->pwrdm.ptr->prcm_offs, + clkdm->clktrctrl_mask); + + if (hwsup) { + /* Disable HW transitions when we are changing deps */ + _disable_hwsup(clkdm); + _clkdm_del_autodeps(clkdm); + _enable_hwsup(clkdm); + } else { + if (clkdm->flags & CLKDM_CAN_FORCE_SLEEP) + omap3_clkdm_sleep(clkdm); + } + + return 0; +} + struct clkdm_ops omap2_clkdm_operations = { .clkdm_add_wkdep = omap2_clkdm_add_wkdep, .clkdm_del_wkdep = omap2_clkdm_del_wkdep, @@ -267,6 +313,6 @@ struct clkdm_ops omap3_clkdm_operations = { .clkdm_wakeup = omap3_clkdm_wakeup, .clkdm_allow_idle = omap3_clkdm_allow_idle, .clkdm_deny_idle = omap3_clkdm_deny_idle, - .clkdm_clk_enable = omap2_clkdm_clk_enable, - .clkdm_clk_disable = omap2_clkdm_clk_disable, + .clkdm_clk_enable = omap3xxx_clkdm_clk_enable, + .clkdm_clk_disable = omap3xxx_clkdm_clk_disable, }; diff --git a/arch/arm/mach-omap2/cm-regbits-34xx.h b/arch/arm/mach-omap2/cm-regbits-34xx.h index 766338fe4d34..975f6bda0e0b 100644 --- a/arch/arm/mach-omap2/cm-regbits-34xx.h +++ b/arch/arm/mach-omap2/cm-regbits-34xx.h @@ -67,6 +67,7 @@ #define OMAP3430_EN_IVA2_DPLL_MASK (0x7 << 0) /* CM_IDLEST_IVA2 */ +#define OMAP3430_ST_IVA2_SHIFT 0 #define OMAP3430_ST_IVA2_MASK (1 << 0) /* CM_IDLEST_PLL_IVA2 */ diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 05fdebfaa195..330d4c6e746b 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -46,7 +46,7 @@ static void __iomem *wakeupgen_base; static void __iomem *sar_base; static DEFINE_SPINLOCK(wakeupgen_lock); -static unsigned int irq_target_cpu[NR_IRQS]; +static unsigned int irq_target_cpu[MAX_IRQS]; static unsigned int irq_banks = MAX_NR_REG_BANKS; static unsigned int max_irqs = MAX_IRQS; static unsigned int omap_secure_apis; diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 6ca8e519968d..37afbd173c2c 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1889,6 +1889,7 @@ static int _enable(struct omap_hwmod *oh) _enable_sysc(oh); } } else { + _omap4_disable_module(oh); _disable_clocks(oh); pr_debug("omap_hwmod: %s: _wait_target_ready: %d\n", oh->name, r); diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index c9e38200216b..ce7e6068768f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -100,9 +100,9 @@ static struct omap_hwmod omap3xxx_mpu_hwmod = { /* IVA2 (IVA2) */ static struct omap_hwmod_rst_info omap3xxx_iva_resets[] = { - { .name = "logic", .rst_shift = 0 }, - { .name = "seq0", .rst_shift = 1 }, - { .name = "seq1", .rst_shift = 2 }, + { .name = "logic", .rst_shift = 0, .st_shift = 8 }, + { .name = "seq0", .rst_shift = 1, .st_shift = 9 }, + { .name = "seq1", .rst_shift = 2, .st_shift = 10 }, }; static struct omap_hwmod omap3xxx_iva_hwmod = { @@ -112,6 +112,15 @@ static struct omap_hwmod omap3xxx_iva_hwmod = { .rst_lines = omap3xxx_iva_resets, .rst_lines_cnt = ARRAY_SIZE(omap3xxx_iva_resets), .main_clk = "iva2_ck", + .prcm = { + .omap2 = { + .module_offs = OMAP3430_IVA2_MOD, + .prcm_reg_id = 1, + .module_bit = OMAP3430_CM_FCLKEN_IVA2_EN_IVA2_SHIFT, + .idlest_reg_id = 1, + .idlest_idle_bit = OMAP3430_ST_IVA2_SHIFT, + } + }, }; /* timer class */ diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 242aee498ceb..afb60917a948 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -4210,7 +4210,7 @@ static struct omap_hwmod_ocp_if omap44xx_dsp__iva = { }; /* dsp -> sl2if */ -static struct omap_hwmod_ocp_if omap44xx_dsp__sl2if = { +static struct omap_hwmod_ocp_if __maybe_unused omap44xx_dsp__sl2if = { .master = &omap44xx_dsp_hwmod, .slave = &omap44xx_sl2if_hwmod, .clk = "dpll_iva_m5x2_ck", @@ -4828,7 +4828,7 @@ static struct omap_hwmod_ocp_if omap44xx_l3_main_2__iss = { }; /* iva -> sl2if */ -static struct omap_hwmod_ocp_if omap44xx_iva__sl2if = { +static struct omap_hwmod_ocp_if __maybe_unused omap44xx_iva__sl2if = { .master = &omap44xx_iva_hwmod, .slave = &omap44xx_sl2if_hwmod, .clk = "dpll_iva_m5x2_ck", @@ -5362,7 +5362,7 @@ static struct omap_hwmod_ocp_if omap44xx_l4_wkup__scrm = { }; /* l3_main_2 -> sl2if */ -static struct omap_hwmod_ocp_if omap44xx_l3_main_2__sl2if = { +static struct omap_hwmod_ocp_if __maybe_unused omap44xx_l3_main_2__sl2if = { .master = &omap44xx_l3_main_2_hwmod, .slave = &omap44xx_sl2if_hwmod, .clk = "l3_div_ck", @@ -6032,7 +6032,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = { &omap44xx_l4_abe__dmic, &omap44xx_l4_abe__dmic_dma, &omap44xx_dsp__iva, - &omap44xx_dsp__sl2if, + /* &omap44xx_dsp__sl2if, */ &omap44xx_l4_cfg__dsp, &omap44xx_l3_main_2__dss, &omap44xx_l4_per__dss, @@ -6068,7 +6068,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = { &omap44xx_l4_per__i2c4, &omap44xx_l3_main_2__ipu, &omap44xx_l3_main_2__iss, - &omap44xx_iva__sl2if, + /* &omap44xx_iva__sl2if, */ &omap44xx_l3_main_2__iva, &omap44xx_l4_wkup__kbd, &omap44xx_l4_cfg__mailbox, @@ -6099,7 +6099,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = { &omap44xx_l4_cfg__cm_core, &omap44xx_l4_wkup__prm, &omap44xx_l4_wkup__scrm, - &omap44xx_l3_main_2__sl2if, + /* &omap44xx_l3_main_2__sl2if, */ &omap44xx_l4_abe__slimbus1, &omap44xx_l4_abe__slimbus1_dma, &omap44xx_l4_per__slimbus2, diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 2ff6d41ec6c6..2ba4f57dda86 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -260,6 +260,7 @@ static u32 notrace dmtimer_read_sched_clock(void) return 0; } +#ifdef CONFIG_OMAP_32K_TIMER /* Setup free-running counter for clocksource */ static int __init omap2_sync32k_clocksource_init(void) { @@ -299,6 +300,12 @@ static int __init omap2_sync32k_clocksource_init(void) return ret; } +#else +static inline int omap2_sync32k_clocksource_init(void) +{ + return -ENODEV; +} +#endif static void __init omap2_gptimer_clocksource_init(int gptimer_id, const char *fck_source) diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 410291c67666..a6cd14ab1e4e 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -204,6 +204,13 @@ void __init orion5x_wdt_init(void) void __init orion5x_init_early(void) { orion_time_set_base(TIMER_VIRT_BASE); + + /* + * Some Orion5x devices allocate their coherent buffers from atomic + * context. Increase size of atomic coherent pool to make sure such + * the allocations won't fail. + */ + init_dma_coherent_pool_size(SZ_1M); } int orion5x_tclk; diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c index cf10f92856dc..453a6e50db8b 100644 --- a/arch/arm/mach-shmobile/board-armadillo800eva.c +++ b/arch/arm/mach-shmobile/board-armadillo800eva.c @@ -520,13 +520,14 @@ static struct platform_device hdmi_lcdc_device = { }; /* GPIO KEY */ -#define GPIO_KEY(c, g, d) { .code = c, .gpio = g, .desc = d, .active_low = 1 } +#define GPIO_KEY(c, g, d, ...) \ + { .code = c, .gpio = g, .desc = d, .active_low = 1, __VA_ARGS__ } static struct gpio_keys_button gpio_buttons[] = { - GPIO_KEY(KEY_POWER, GPIO_PORT99, "SW1"), - GPIO_KEY(KEY_BACK, GPIO_PORT100, "SW2"), - GPIO_KEY(KEY_MENU, GPIO_PORT97, "SW3"), - GPIO_KEY(KEY_HOME, GPIO_PORT98, "SW4"), + GPIO_KEY(KEY_POWER, GPIO_PORT99, "SW3", .wakeup = 1), + GPIO_KEY(KEY_BACK, GPIO_PORT100, "SW4"), + GPIO_KEY(KEY_MENU, GPIO_PORT97, "SW5"), + GPIO_KEY(KEY_HOME, GPIO_PORT98, "SW6"), }; static struct gpio_keys_platform_data gpio_key_info = { @@ -901,8 +902,8 @@ static struct platform_device *eva_devices[] __initdata = { &camera_device, &ceu0_device, &fsi_device, - &fsi_hdmi_device, &fsi_wm8978_device, + &fsi_hdmi_device, }; static void __init eva_clock_init(void) diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c index 53b7ea92c32c..3b8a0171c3cb 100644 --- a/arch/arm/mach-shmobile/board-kzm9g.c +++ b/arch/arm/mach-shmobile/board-kzm9g.c @@ -346,11 +346,11 @@ static struct resource sh_mmcif_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .start = gic_spi(141), + .start = gic_spi(140), .flags = IORESOURCE_IRQ, }, [2] = { - .start = gic_spi(140), + .start = gic_spi(141), .flags = IORESOURCE_IRQ, }, }; diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 7ea2b31e3199..c129542f6aed 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -695,6 +695,7 @@ static struct platform_device usbhs0_device = { * - J30 "open" * - modify usbhs1_get_id() USBHS_HOST -> USBHS_GADGET * - add .get_vbus = usbhs_get_vbus in usbhs1_private + * - check usbhs0_device(pio)/usbhs1_device(irq) order in mackerel_devices. */ #define IRQ8 evt2irq(0x0300) #define USB_PHY_MODE (1 << 4) @@ -1325,8 +1326,8 @@ static struct platform_device *mackerel_devices[] __initdata = { &nor_flash_device, &smc911x_device, &lcdc_device, - &usbhs1_device, &usbhs0_device, + &usbhs1_device, &leds_device, &fsi_device, &fsi_ak4643_device, diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index 3a528cf4366c..fcf5a47f4772 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -67,7 +67,7 @@ static struct smsc911x_platform_config smsc911x_platdata = { static struct platform_device eth_device = { .name = "smsc911x", - .id = 0, + .id = -1, .dev = { .platform_data = &smsc911x_platdata, }, diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c index ee447404c857..588555a67d9c 100644 --- a/arch/arm/mach-shmobile/intc-sh73a0.c +++ b/arch/arm/mach-shmobile/intc-sh73a0.c @@ -259,9 +259,9 @@ static int sh73a0_set_wake(struct irq_data *data, unsigned int on) return 0; /* always allow wakeup */ } -#define RELOC_BASE 0x1000 +#define RELOC_BASE 0x1200 -/* INTCA IRQ pins at INTCS + 0x1000 to make space for GIC+INTC handling */ +/* INTCA IRQ pins at INTCS + RELOC_BASE to make space for GIC+INTC handling */ #define INTCS_VECT_RELOC(n, vect) INTCS_VECT((n), (vect) + RELOC_BASE) INTC_IRQ_PINS_32(intca_irq_pins, 0xe6900000, diff --git a/arch/arm/mach-tegra/board-harmony-power.c b/arch/arm/mach-tegra/board-harmony-power.c index b7344beec102..94486e7e9dfd 100644 --- a/arch/arm/mach-tegra/board-harmony-power.c +++ b/arch/arm/mach-tegra/board-harmony-power.c @@ -67,6 +67,13 @@ static struct regulator_init_data ldo0_data = { }, \ } +static struct regulator_init_data sys_data = { + .supply_regulator = "vdd_5v0", + .constraints = { + .name = "vdd_sys", + }, +}; + HARMONY_REGULATOR_INIT(sm0, "vdd_sm0", "vdd_sys", 725, 1500, 1); HARMONY_REGULATOR_INIT(sm1, "vdd_sm1", "vdd_sys", 725, 1500, 1); HARMONY_REGULATOR_INIT(sm2, "vdd_sm2", "vdd_sys", 3000, 4550, 1); @@ -74,7 +81,7 @@ HARMONY_REGULATOR_INIT(ldo1, "vdd_ldo1", "vdd_sm2", 725, 1500, 1); HARMONY_REGULATOR_INIT(ldo2, "vdd_ldo2", "vdd_sm2", 725, 1500, 0); HARMONY_REGULATOR_INIT(ldo3, "vdd_ldo3", "vdd_sm2", 1250, 3300, 1); HARMONY_REGULATOR_INIT(ldo4, "vdd_ldo4", "vdd_sm2", 1700, 2475, 1); -HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", NULL, 1250, 3300, 1); +HARMONY_REGULATOR_INIT(ldo5, "vdd_ldo5", "vdd_sys", 1250, 3300, 1); HARMONY_REGULATOR_INIT(ldo6, "vdd_ldo6", "vdd_sm2", 1250, 3300, 0); HARMONY_REGULATOR_INIT(ldo7, "vdd_ldo7", "vdd_sm2", 1250, 3300, 0); HARMONY_REGULATOR_INIT(ldo8, "vdd_ldo8", "vdd_sm2", 1250, 3300, 0); @@ -88,6 +95,7 @@ HARMONY_REGULATOR_INIT(ldo9, "vdd_ldo9", "vdd_sm2", 1250, 3300, 1); } static struct tps6586x_subdev_info tps_devs[] = { + TPS_REG(SYS, &sys_data), TPS_REG(SM_0, &sm0_data), TPS_REG(SM_1, &sm1_data), TPS_REG(SM_2, &sm2_data), @@ -120,7 +128,7 @@ static struct i2c_board_info __initdata harmony_regulators[] = { int __init harmony_regulator_init(void) { - regulator_register_always_on(0, "vdd_sys", + regulator_register_always_on(0, "vdd_5v0", NULL, 0, 5000000); if (machine_is_harmony()) { diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 119bc52ab93e..4e07eec1270d 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -63,10 +63,11 @@ static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, pid = task_pid_nr(thread->task) << ASID_BITS; asm volatile( " mrc p15, 0, %0, c13, c0, 1\n" - " bfi %1, %0, #0, %2\n" - " mcr p15, 0, %1, c13, c0, 1\n" + " and %0, %0, %2\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c13, c0, 1\n" : "=r" (contextidr), "+r" (pid) - : "I" (ASID_BITS)); + : "I" (~ASID_MASK)); isb(); return NOTIFY_OK; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4e7d1182e8a3..13f555d62491 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -267,17 +267,19 @@ static void __dma_free_remap(void *cpu_addr, size_t size) vunmap(cpu_addr); } +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K + struct dma_pool { size_t size; spinlock_t lock; unsigned long *bitmap; unsigned long nr_pages; void *vaddr; - struct page *page; + struct page **pages; }; static struct dma_pool atomic_pool = { - .size = SZ_256K, + .size = DEFAULT_DMA_COHERENT_POOL_SIZE, }; static int __init early_coherent_pool(char *p) @@ -287,6 +289,21 @@ static int __init early_coherent_pool(char *p) } early_param("coherent_pool", early_coherent_pool); +void __init init_dma_coherent_pool_size(unsigned long size) +{ + /* + * Catch any attempt to set the pool size too late. + */ + BUG_ON(atomic_pool.vaddr); + + /* + * Set architecture specific coherent pool size only if + * it has not been changed by kernel command line parameter. + */ + if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE) + atomic_pool.size = size; +} + /* * Initialise the coherent pool for atomic allocations. */ @@ -297,6 +314,7 @@ static int __init atomic_pool_init(void) unsigned long nr_pages = pool->size >> PAGE_SHIFT; unsigned long *bitmap; struct page *page; + struct page **pages; void *ptr; int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long); @@ -304,21 +322,33 @@ static int __init atomic_pool_init(void) if (!bitmap) goto no_bitmap; + pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto no_pages; + if (IS_ENABLED(CONFIG_CMA)) ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page); else ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot, &page, NULL); if (ptr) { + int i; + + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; + spin_lock_init(&pool->lock); pool->vaddr = ptr; - pool->page = page; + pool->pages = pages; pool->bitmap = bitmap; pool->nr_pages = nr_pages; pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n", (unsigned)pool->size / 1024); return 0; } + + kfree(pages); +no_pages: kfree(bitmap); no_bitmap: pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", @@ -443,27 +473,45 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) if (pageno < pool->nr_pages) { bitmap_set(pool->bitmap, pageno, count); ptr = pool->vaddr + PAGE_SIZE * pageno; - *ret_page = pool->page + pageno; + *ret_page = pool->pages[pageno]; + } else { + pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n" + "Please increase it with coherent_pool= kernel parameter!\n", + (unsigned)pool->size / 1024); } spin_unlock_irqrestore(&pool->lock, flags); return ptr; } +static bool __in_atomic_pool(void *start, size_t size) +{ + struct dma_pool *pool = &atomic_pool; + void *end = start + size; + void *pool_start = pool->vaddr; + void *pool_end = pool->vaddr + pool->size; + + if (start < pool_start || start >= pool_end) + return false; + + if (end <= pool_end) + return true; + + WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n", + start, end - 1, pool_start, pool_end - 1); + + return false; +} + static int __free_from_pool(void *start, size_t size) { struct dma_pool *pool = &atomic_pool; unsigned long pageno, count; unsigned long flags; - if (start < pool->vaddr || start > pool->vaddr + pool->size) + if (!__in_atomic_pool(start, size)) return 0; - if (start + size > pool->vaddr + pool->size) { - WARN(1, "freeing wrong coherent size from pool\n"); - return 0; - } - pageno = (start - pool->vaddr) >> PAGE_SHIFT; count = size >> PAGE_SHIFT; @@ -1090,10 +1138,22 @@ static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t si return 0; } +static struct page **__atomic_get_pages(void *addr) +{ + struct dma_pool *pool = &atomic_pool; + struct page **pages = pool->pages; + int offs = (addr - pool->vaddr) >> PAGE_SHIFT; + + return pages + offs; +} + static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) { struct vm_struct *area; + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) return cpu_addr; @@ -1103,6 +1163,34 @@ static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs) return NULL; } +static void *__iommu_alloc_atomic(struct device *dev, size_t size, + dma_addr_t *handle) +{ + struct page *page; + void *addr; + + addr = __alloc_from_pool(size, &page); + if (!addr) + return NULL; + + *handle = __iommu_create_mapping(dev, &page, size); + if (*handle == DMA_ERROR_CODE) + goto err_mapping; + + return addr; + +err_mapping: + __free_from_pool(addr, size); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, struct page **pages, + dma_addr_t handle, size_t size) +{ + __iommu_remove_mapping(dev, handle, size); + __free_from_pool(page_address(pages[0]), size); +} + static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { @@ -1113,6 +1201,9 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); + if (gfp & GFP_ATOMIC) + return __iommu_alloc_atomic(dev, size, handle); + pages = __iommu_alloc_buffer(dev, size, gfp); if (!pages) return NULL; @@ -1179,6 +1270,11 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; } + if (__in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, pages, handle, size); + return; + } + if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { unmap_kernel_range((unsigned long)cpu_addr, size); vunmap(cpu_addr); diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 6776160618ef..a8ee92da3544 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -55,6 +55,9 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page /* permanent static mappings from iotable_init() */ #define VM_ARM_STATIC_MAPPING 0x40000000 +/* empty mapping */ +#define VM_ARM_EMPTY_MAPPING 0x20000000 + /* mapping type (attributes) for permanent static mappings */ #define VM_ARM_MTYPE(mt) ((mt) << 20) #define VM_ARM_MTYPE_MASK (0x1f << 20) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4c2d0451e84a..c2fa21d0103e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -807,7 +807,7 @@ static void __init pmd_empty_section_gap(unsigned long addr) vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); vm->addr = (void *)addr; vm->size = SECTION_SIZE; - vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; vm->caller = pmd_empty_section_gap; vm_area_add_early(vm); } @@ -820,7 +820,7 @@ static void __init fill_pmd_gaps(void) /* we're still single threaded hence no lock needed here */ for (vm = vmlist; vm; vm = vm->next) { - if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + if (!(vm->flags & (VM_ARM_STATIC_MAPPING | VM_ARM_EMPTY_MAPPING))) continue; addr = (unsigned long)vm->addr; if (addr < next) @@ -961,8 +961,8 @@ void __init sanity_check_meminfo(void) * Check whether this memory bank would partially overlap * the vmalloc area. */ - if (__va(bank->start + bank->size) > vmalloc_min || - __va(bank->start + bank->size) < __va(bank->start)) { + if (__va(bank->start + bank->size - 1) >= vmalloc_min || + __va(bank->start + bank->size - 1) <= __va(bank->start)) { unsigned long newsize = vmalloc_min - __va(bank->start); printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " "to -%.8llx (vmalloc region overlap).\n", diff --git a/arch/arm/plat-mxc/include/mach/mx25.h b/arch/arm/plat-mxc/include/mach/mx25.h index 627d94f1b010..ec466400a200 100644 --- a/arch/arm/plat-mxc/include/mach/mx25.h +++ b/arch/arm/plat-mxc/include/mach/mx25.h @@ -98,6 +98,7 @@ #define MX25_INT_UART1 (NR_IRQS_LEGACY + 45) #define MX25_INT_GPIO2 (NR_IRQS_LEGACY + 51) #define MX25_INT_GPIO1 (NR_IRQS_LEGACY + 52) +#define MX25_INT_GPT1 (NR_IRQS_LEGACY + 54) #define MX25_INT_FEC (NR_IRQS_LEGACY + 57) #define MX25_DMA_REQ_SSI2_RX1 22 diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 766181cb5c95..024f3b08db29 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -68,6 +68,7 @@ static unsigned long omap_sram_start; static void __iomem *omap_sram_base; +static unsigned long omap_sram_skip; static unsigned long omap_sram_size; static void __iomem *omap_sram_ceil; @@ -106,6 +107,7 @@ static int is_sram_locked(void) */ static void __init omap_detect_sram(void) { + omap_sram_skip = SRAM_BOOTLOADER_SZ; if (cpu_class_is_omap2()) { if (is_sram_locked()) { if (cpu_is_omap34xx()) { @@ -113,6 +115,7 @@ static void __init omap_detect_sram(void) if ((omap_type() == OMAP2_DEVICE_TYPE_EMU) || (omap_type() == OMAP2_DEVICE_TYPE_SEC)) { omap_sram_size = 0x7000; /* 28K */ + omap_sram_skip += SZ_16K; } else { omap_sram_size = 0x8000; /* 32K */ } @@ -175,8 +178,10 @@ static void __init omap_map_sram(void) return; #ifdef CONFIG_OMAP4_ERRATA_I688 + if (cpu_is_omap44xx()) { omap_sram_start += PAGE_SIZE; omap_sram_size -= SZ_16K; + } #endif if (cpu_is_omap34xx()) { /* @@ -203,8 +208,8 @@ static void __init omap_map_sram(void) * Looks like we need to preserve some bootloader code at the * beginning of SRAM for jumping to flash for reboot to work... */ - memset_io(omap_sram_base + SRAM_BOOTLOADER_SZ, 0, - omap_sram_size - SRAM_BOOTLOADER_SZ); + memset_io(omap_sram_base + omap_sram_skip, 0, + omap_sram_size - omap_sram_skip); } /* @@ -218,7 +223,7 @@ void *omap_sram_push_address(unsigned long size) { unsigned long available, new_ceil = (unsigned long)omap_sram_ceil; - available = omap_sram_ceil - (omap_sram_base + SRAM_BOOTLOADER_SZ); + available = omap_sram_ceil - (omap_sram_base + omap_sram_skip); if (size > available) { pr_err("Not enough space in SRAM\n"); diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c index 65c5eca475e7..d1116e2dfbea 100644 --- a/arch/arm/plat-samsung/clock.c +++ b/arch/arm/plat-samsung/clock.c @@ -144,6 +144,7 @@ long clk_round_rate(struct clk *clk, unsigned long rate) int clk_set_rate(struct clk *clk, unsigned long rate) { + unsigned long flags; int ret; if (IS_ERR(clk)) @@ -159,9 +160,9 @@ int clk_set_rate(struct clk *clk, unsigned long rate) if (clk->ops == NULL || clk->ops->set_rate == NULL) return -EINVAL; - spin_lock(&clocks_lock); + spin_lock_irqsave(&clocks_lock, flags); ret = (clk->ops->set_rate)(clk, rate); - spin_unlock(&clocks_lock); + spin_unlock_irqrestore(&clocks_lock, flags); return ret; } @@ -173,17 +174,18 @@ struct clk *clk_get_parent(struct clk *clk) int clk_set_parent(struct clk *clk, struct clk *parent) { + unsigned long flags; int ret = 0; if (IS_ERR(clk)) return -EINVAL; - spin_lock(&clocks_lock); + spin_lock_irqsave(&clocks_lock, flags); if (clk->ops && clk->ops->set_parent) ret = (clk->ops->set_parent)(clk, parent); - spin_unlock(&clocks_lock); + spin_unlock_irqrestore(&clocks_lock, flags); return ret; } diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index f34861920634..c7092e6057c5 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -38,6 +38,7 @@ config BLACKFIN select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE select IRQ_PER_CPU if SMP + select USE_GENERIC_SMP_HELPERS if SMP select HAVE_NMI_WATCHDOG if NMI_WATCHDOG select GENERIC_SMP_IDLE_THREAD select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile index d3d7e64ca96d..66cf00095b84 100644 --- a/arch/blackfin/Makefile +++ b/arch/blackfin/Makefile @@ -20,7 +20,6 @@ endif KBUILD_AFLAGS += $(call cc-option,-mno-fdpic) KBUILD_CFLAGS_MODULE += -mlong-calls LDFLAGS += -m elf32bfin -KALLSYMS += --symbol-prefix=_ KBUILD_DEFCONFIG := BF537-STAMP_defconfig diff --git a/arch/blackfin/include/asm/smp.h b/arch/blackfin/include/asm/smp.h index dc3d144b4bb5..9631598dcc5d 100644 --- a/arch/blackfin/include/asm/smp.h +++ b/arch/blackfin/include/asm/smp.h @@ -18,6 +18,8 @@ #define raw_smp_processor_id() blackfin_core_id() extern void bfin_relocate_coreb_l1_mem(void); +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #if defined(CONFIG_SMP) && defined(CONFIG_ICACHE_FLUSH_L1) asmlinkage void blackfin_icache_flush_range_l1(unsigned long *ptr); diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 00bbe672b3b3..a40151306b77 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -48,10 +48,13 @@ unsigned long blackfin_iflush_l1_entry[NR_CPUS]; struct blackfin_initial_pda __cpuinitdata initial_pda_coreb; -#define BFIN_IPI_TIMER 0 -#define BFIN_IPI_RESCHEDULE 1 -#define BFIN_IPI_CALL_FUNC 2 -#define BFIN_IPI_CPU_STOP 3 +enum ipi_message_type { + BFIN_IPI_TIMER, + BFIN_IPI_RESCHEDULE, + BFIN_IPI_CALL_FUNC, + BFIN_IPI_CALL_FUNC_SINGLE, + BFIN_IPI_CPU_STOP, +}; struct blackfin_flush_data { unsigned long start; @@ -60,35 +63,20 @@ struct blackfin_flush_data { void *secondary_stack; - -struct smp_call_struct { - void (*func)(void *info); - void *info; - int wait; - cpumask_t *waitmask; -}; - static struct blackfin_flush_data smp_flush_data; static DEFINE_SPINLOCK(stop_lock); -struct ipi_message { - unsigned long type; - struct smp_call_struct call_struct; -}; - /* A magic number - stress test shows this is safe for common cases */ #define BFIN_IPI_MSGQ_LEN 5 /* Simple FIFO buffer, overflow leads to panic */ -struct ipi_message_queue { - spinlock_t lock; +struct ipi_data { unsigned long count; - unsigned long head; /* head of the queue */ - struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN]; + unsigned long bits; }; -static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue); +static DEFINE_PER_CPU(struct ipi_data, bfin_ipi); static void ipi_cpu_stop(unsigned int cpu) { @@ -129,28 +117,6 @@ static void ipi_flush_icache(void *info) blackfin_icache_flush_range(fdata->start, fdata->end); } -static void ipi_call_function(unsigned int cpu, struct ipi_message *msg) -{ - int wait; - void (*func)(void *info); - void *info; - func = msg->call_struct.func; - info = msg->call_struct.info; - wait = msg->call_struct.wait; - func(info); - if (wait) { -#ifdef __ARCH_SYNC_CORE_DCACHE - /* - * 'wait' usually means synchronization between CPUs. - * Invalidate D cache in case shared data was changed - * by func() to ensure cache coherence. - */ - resync_core_dcache(); -#endif - cpumask_clear_cpu(cpu, msg->call_struct.waitmask); - } -} - /* Use IRQ_SUPPLE_0 to request reschedule. * When returning from interrupt to user space, * there is chance to reschedule */ @@ -172,152 +138,95 @@ void ipi_timer(void) static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) { - struct ipi_message *msg; - struct ipi_message_queue *msg_queue; + struct ipi_data *bfin_ipi_data; unsigned int cpu = smp_processor_id(); - unsigned long flags; + unsigned long pending; + unsigned long msg; platform_clear_ipi(cpu, IRQ_SUPPLE_1); - msg_queue = &__get_cpu_var(ipi_msg_queue); - - spin_lock_irqsave(&msg_queue->lock, flags); - - while (msg_queue->count) { - msg = &msg_queue->ipi_message[msg_queue->head]; - switch (msg->type) { - case BFIN_IPI_TIMER: - ipi_timer(); - break; - case BFIN_IPI_RESCHEDULE: - scheduler_ipi(); - break; - case BFIN_IPI_CALL_FUNC: - ipi_call_function(cpu, msg); - break; - case BFIN_IPI_CPU_STOP: - ipi_cpu_stop(cpu); - break; - default: - printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n", - cpu, msg->type); - break; - } - msg_queue->head++; - msg_queue->head %= BFIN_IPI_MSGQ_LEN; - msg_queue->count--; + bfin_ipi_data = &__get_cpu_var(bfin_ipi); + + while ((pending = xchg(&bfin_ipi_data->bits, 0)) != 0) { + msg = 0; + do { + msg = find_next_bit(&pending, BITS_PER_LONG, msg + 1); + switch (msg) { + case BFIN_IPI_TIMER: + ipi_timer(); + break; + case BFIN_IPI_RESCHEDULE: + scheduler_ipi(); + break; + case BFIN_IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + + case BFIN_IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + + case BFIN_IPI_CPU_STOP: + ipi_cpu_stop(cpu); + break; + } + } while (msg < BITS_PER_LONG); + + smp_mb(); } - spin_unlock_irqrestore(&msg_queue->lock, flags); return IRQ_HANDLED; } -static void ipi_queue_init(void) +static void bfin_ipi_init(void) { unsigned int cpu; - struct ipi_message_queue *msg_queue; + struct ipi_data *bfin_ipi_data; for_each_possible_cpu(cpu) { - msg_queue = &per_cpu(ipi_msg_queue, cpu); - spin_lock_init(&msg_queue->lock); - msg_queue->count = 0; - msg_queue->head = 0; + bfin_ipi_data = &per_cpu(bfin_ipi, cpu); + bfin_ipi_data->bits = 0; + bfin_ipi_data->count = 0; } } -static inline void smp_send_message(cpumask_t callmap, unsigned long type, - void (*func) (void *info), void *info, int wait) +void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg) { unsigned int cpu; - struct ipi_message_queue *msg_queue; - struct ipi_message *msg; - unsigned long flags, next_msg; - cpumask_t waitmask; /* waitmask is shared by all cpus */ - - cpumask_copy(&waitmask, &callmap); - for_each_cpu(cpu, &callmap) { - msg_queue = &per_cpu(ipi_msg_queue, cpu); - spin_lock_irqsave(&msg_queue->lock, flags); - if (msg_queue->count < BFIN_IPI_MSGQ_LEN) { - next_msg = (msg_queue->head + msg_queue->count) - % BFIN_IPI_MSGQ_LEN; - msg = &msg_queue->ipi_message[next_msg]; - msg->type = type; - if (type == BFIN_IPI_CALL_FUNC) { - msg->call_struct.func = func; - msg->call_struct.info = info; - msg->call_struct.wait = wait; - msg->call_struct.waitmask = &waitmask; - } - msg_queue->count++; - } else - panic("IPI message queue overflow\n"); - spin_unlock_irqrestore(&msg_queue->lock, flags); + struct ipi_data *bfin_ipi_data; + unsigned long flags; + + local_irq_save(flags); + + for_each_cpu(cpu, cpumask) { + bfin_ipi_data = &per_cpu(bfin_ipi, cpu); + smp_mb(); + set_bit(msg, &bfin_ipi_data->bits); + bfin_ipi_data->count++; platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1); } - if (wait) { - while (!cpumask_empty(&waitmask)) - blackfin_dcache_invalidate_range( - (unsigned long)(&waitmask), - (unsigned long)(&waitmask)); -#ifdef __ARCH_SYNC_CORE_DCACHE - /* - * Invalidate D cache in case shared data was changed by - * other processors to ensure cache coherence. - */ - resync_core_dcache(); -#endif - } + local_irq_restore(flags); } -int smp_call_function(void (*func)(void *info), void *info, int wait) +void arch_send_call_function_single_ipi(int cpu) { - cpumask_t callmap; - - preempt_disable(); - cpumask_copy(&callmap, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), &callmap); - if (!cpumask_empty(&callmap)) - smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait); - - preempt_enable(); - - return 0; + send_ipi(cpumask_of(cpu), BFIN_IPI_CALL_FUNC_SINGLE); } -EXPORT_SYMBOL_GPL(smp_call_function); -int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int wait) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - unsigned int cpu = cpuid; - cpumask_t callmap; - - if (cpu_is_offline(cpu)) - return 0; - cpumask_clear(&callmap); - cpumask_set_cpu(cpu, &callmap); - - smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait); - - return 0; + send_ipi(mask, BFIN_IPI_CALL_FUNC); } -EXPORT_SYMBOL_GPL(smp_call_function_single); void smp_send_reschedule(int cpu) { - cpumask_t callmap; - /* simply trigger an ipi */ - - cpumask_clear(&callmap); - cpumask_set_cpu(cpu, &callmap); - - smp_send_message(callmap, BFIN_IPI_RESCHEDULE, NULL, NULL, 0); + send_ipi(cpumask_of(cpu), BFIN_IPI_RESCHEDULE); return; } void smp_send_msg(const struct cpumask *mask, unsigned long type) { - smp_send_message(*mask, type, NULL, NULL, 0); + send_ipi(mask, type); } void smp_timer_broadcast(const struct cpumask *mask) @@ -333,7 +242,7 @@ void smp_send_stop(void) cpumask_copy(&callmap, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &callmap); if (!cpumask_empty(&callmap)) - smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0); + send_ipi(&callmap, BFIN_IPI_CPU_STOP); preempt_enable(); @@ -436,7 +345,7 @@ void __init smp_prepare_boot_cpu(void) void __init smp_prepare_cpus(unsigned int max_cpus) { platform_prepare_cpus(max_cpus); - ipi_queue_init(); + bfin_ipi_init(); platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0); platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1); } diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 3af601e31e66..f08e89183cda 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm generic-y += atomic.h generic-y += auxvec.h +generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += cputime.h diff --git a/arch/c6x/include/asm/barrier.h b/arch/c6x/include/asm/barrier.h deleted file mode 100644 index 538240e85909..000000000000 --- a/arch/c6x/include/asm/barrier.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef _ASM_C6X_BARRIER_H -#define _ASM_C6X_BARRIER_H - -#define nop() asm("NOP\n"); - -#define mb() barrier() -#define rmb() barrier() -#define wmb() barrier() -#define set_mb(var, value) do { var = value; mb(); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) - -#endif /* _ASM_C6X_BARRIER_H */ diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 66fd01728790..7f65be6f7f17 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/elfcore.h> #include <linux/mqueue.h> #include <linux/reboot.h> +#include <linux/rcupdate.h> //#define DEBUG @@ -74,6 +75,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); /* @@ -86,6 +88,7 @@ void cpu_idle (void) idle = default_idle; idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index ff95f50efea5..2eb7fa5bf9d8 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/interrupt.h> #include <linux/pagemap.h> +#include <linux/rcupdate.h> #include <asm/asm-offsets.h> #include <asm/uaccess.h> @@ -69,12 +70,14 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); if (!frv_dma_inprogress && idle) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0e9c315be104..f153ed1a4c08 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/reboot.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -78,8 +79,10 @@ void (*idle)(void) = default_idle; void cpu_idle(void) { while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 310cf5781fad..3c720ef6c32d 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,6 +25,7 @@ config IA64 select HAVE_GENERIC_HARDIRQS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select HAVE_VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP @@ -340,17 +341,6 @@ config FORCE_MAX_ZONEORDER default "17" if HUGETLB_PAGE default "11" -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - default n - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. - If in doubt, say N here. - config SMP bool "Symmetric multi-processing support" select USE_GENERIC_SMP_HELPERS diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h index cb2412fcd17f..d38c7ea5eea5 100644 --- a/arch/ia64/include/asm/switch_to.h +++ b/arch/ia64/include/asm/switch_to.h @@ -30,13 +30,6 @@ extern struct task_struct *ia64_switch_to (void *next_task); extern void ia64_save_extra (struct task_struct *task); extern void ia64_load_extra (struct task_struct *task); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next); -# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n) -#else -# define IA64_ACCOUNT_ON_SWITCH(p,n) -#endif - #ifdef CONFIG_PERFMON DECLARE_PER_CPU(unsigned long, pfm_syst_info); # define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) @@ -49,7 +42,6 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct || PERFMON_IS_SYSWIDE()) #define __switch_to(prev,next,last) do { \ - IA64_ACCOUNT_ON_SWITCH(prev, next); \ if (IA64_HAS_EXTRA_STATE(prev)) \ ia64_save_extra(prev); \ if (IA64_HAS_EXTRA_STATE(next)) \ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index dd6fc1449741..3e316ec0b835 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -29,6 +29,7 @@ #include <linux/kdebug.h> #include <linux/utsname.h> #include <linux/tracehook.h> +#include <linux/rcupdate.h> #include <asm/cpu.h> #include <asm/delay.h> @@ -279,6 +280,7 @@ cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); if (can_do_pal_halt) { current_thread_info()->status &= ~TS_POLLING; /* @@ -309,6 +311,7 @@ cpu_idle (void) normal_xtp(); #endif } + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); if (cpu_is_offline(cpu)) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index ecc904b33c5f..80ff9acc5edf 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -83,32 +83,36 @@ static struct clocksource *itc_clocksource; extern cputime_t cycle_to_cputime(u64 cyc); +static void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_utime; + struct thread_info *ti = task_thread_info(tsk); + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(tsk, delta_utime, delta_utime); + ti->ac_utime = 0; + } +} + /* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on * the next process. */ -void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *pi = task_thread_info(prev); - struct thread_info *ni = task_thread_info(next); - cputime_t delta_stime, delta_utime; - __u64 now; + struct thread_info *ni = task_thread_info(current); - now = ia64_get_itc(); - - delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); if (idle_task(smp_processor_id()) != prev) - account_system_time(prev, 0, delta_stime, delta_stime); + vtime_account_system(prev); else - account_idle_time(delta_stime); + vtime_account_idle(prev); - if (pi->ac_utime) { - delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime, delta_utime); - } + vtime_account_user(prev); - pi->ac_stamp = ni->ac_stamp = now; + pi->ac_stamp = ni->ac_stamp; ni->ac_stime = ni->ac_utime = 0; } @@ -116,29 +120,32 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) * Account time for a transition between system, hard irq or soft irq state. * Note that this function is called with interrupts enabled. */ -void account_system_vtime(struct task_struct *tsk) +static cputime_t vtime_delta(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - unsigned long flags; cputime_t delta_stime; __u64 now; - local_irq_save(flags); - now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - if (irq_count() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta_stime, delta_stime); - else - account_idle_time(delta_stime); ti->ac_stime = 0; - ti->ac_stamp = now; - local_irq_restore(flags); + return delta_stime; +} + +void vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta = vtime_delta(tsk); + + account_system_time(tsk, 0, delta, delta); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + account_idle_time(vtime_delta(tsk)); } -EXPORT_SYMBOL_GPL(account_system_vtime); /* * Called from the timer interrupt handler to charge accumulated user time @@ -146,14 +153,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime); */ void account_process_tick(struct task_struct *p, int user_tick) { - struct thread_info *ti = task_thread_info(p); - cputime_t delta_utime; - - if (ti->ac_utime) { - delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime, delta_utime); - ti->ac_utime = 0; - } + vtime_account_user(p); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 3a4a32b27208..384e63f3a4c4 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -26,6 +26,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/hardirq.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/uaccess.h> @@ -82,6 +83,7 @@ void cpu_idle (void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void) = pm_idle; @@ -90,6 +92,7 @@ void cpu_idle (void) idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index c488e3cfab53..ac2892e49c7c 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/init_task.h> #include <linux/mqueue.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/traps.h> @@ -75,8 +76,10 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/m68k/platform/coldfire/clk.c b/arch/m68k/platform/coldfire/clk.c index 75f9ee967ea7..9cd13b4ce42b 100644 --- a/arch/m68k/platform/coldfire/clk.c +++ b/arch/m68k/platform/coldfire/clk.c @@ -146,9 +146,3 @@ struct clk_ops clk_ops1 = { }; #endif /* MCFPM_PPMCR1 */ #endif /* MCFPM_PPMCR0 */ - -struct clk *devm_clk_get(struct device *dev, const char *id) -{ - return NULL; -} -EXPORT_SYMBOL(devm_clk_get); diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index e7e03ecf5495..afc379ca3753 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -102,7 +102,7 @@ static void cmp_init_secondary(void) c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE; #endif #ifdef CONFIG_MIPS_MT_SMTC - c->tc_id = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC; + c->tc_id = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT; #endif } diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 33aadbcf170b..dcfd573871c1 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -152,6 +152,8 @@ static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); (*nr)++; page++; refs++; diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 7b13a4caeea4..fea823f18479 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -273,16 +273,19 @@ asmlinkage void plat_irq_dispatch(void) unsigned int pending = read_c0_cause() & read_c0_status() & ST0_IM; int irq; + if (unlikely(!pending)) { + spurious_interrupt(); + return; + } + irq = irq_ffs(pending); if (irq == MIPSCPU_INT_I8259A) malta_hw0_irqdispatch(); else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()])) malta_ipi_irqdispatch(); - else if (irq >= 0) - do_IRQ(MIPS_CPU_IRQ_BASE + irq); else - spurious_interrupt(); + do_IRQ(MIPS_CPU_IRQ_BASE + irq); } #ifdef CONFIG_MIPS_MT_SMP diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index 4c35301720e7..80562b81f0f2 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -138,11 +138,6 @@ static int __init malta_add_devices(void) if (err) return err; - /* - * Set RTC to BCD mode to support current alarm code. - */ - CMOS_WRITE(CMOS_READ(RTC_CONTROL) & ~RTC_DM_BINARY, RTC_CONTROL); - return 0; } diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 7dab0cd36466..e9cceba193b6 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -107,6 +108,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ for (;;) { + rcu_idle_enter(); while (!need_resched()) { void (*idle)(void); @@ -121,6 +123,7 @@ void cpu_idle(void) } idle(); } + rcu_idle_exit(); schedule_preempt_disabled(); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 2c05a9292a81..8c6b6b6561f0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -48,6 +48,7 @@ #include <linux/unistd.h> #include <linux/kallsyms.h> #include <linux/uaccess.h> +#include <linux/rcupdate.h> #include <asm/io.h> #include <asm/asm-offsets.h> @@ -69,8 +70,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); + rcu_idle_exit(); schedule_preempt_disabled(); check_pgt_cache(); } diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 1c1aadc8c48f..c32ae5ce9fff 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -1,10 +1,6 @@ addnote empty.c hack-coff -infblock.c -infblock.h -infcodes.c -infcodes.h inffast.c inffast.h inffixed.h diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 53b6dfa83344..54b73a28c205 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -386,6 +386,7 @@ extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ +extern void power7_nap(void); #ifdef CONFIG_PSERIES_IDLE extern void update_smt_snooze_delay(int snooze); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 3b4b4a8da922..c1f267694acb 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -197,12 +197,6 @@ struct cpu_usage { DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) -#define account_process_vtime(tsk) account_process_tick(tsk, 0) -#else -#define account_process_vtime(tsk) do { } while (0) -#endif - extern void secondary_cpu_time_init(void); DECLARE_PER_CPU(u64, decrementers_next_tb); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 85b05c463fae..e8995727b1c1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); + DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 5b25c8060fd6..a892680668d8 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -28,6 +28,8 @@ void doorbell_setup_this_cpu(void) void doorbell_cause_ipi(int cpu, unsigned long data) { + /* Order previous accesses vs. msgsnd, which is treated as a store */ + mb(); ppc_msgsnd(PPC_DBELL, 0, data); } diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4b01a25e29ef..b40e0b4815b3 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -370,6 +370,12 @@ _GLOBAL(ret_from_fork) li r3,0 b syscall_exit + .section ".toc","aw" +DSCR_DEFAULT: + .tc dscr_default[TC],dscr_default + + .section ".text" + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -509,9 +515,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) mr r1,r8 /* start using new stack pointer */ std r7,PACAKSAVE(r13) - ld r6,_CCR(r1) - mtcrf 0xFF,r6 - #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION ld r0,THREAD_VRSAVE(r4) @@ -520,14 +523,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION + lwz r6,THREAD_DSCR_INHERIT(r4) + ld r7,DSCR_DEFAULT@toc(2) ld r0,THREAD_DSCR(r4) - cmpd r0,r25 - beq 1f + cmpwi r6,0 + bne 1f + ld r0,0(r7) +1: cmpd r0,r25 + beq 2f mtspr SPRN_DSCR,r0 -1: +2: END_FTR_SECTION_IFSET(CPU_FTR_DSCR) #endif + ld r6,_CCR(r1) + mtcrf 0xFF,r6 + /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) REST_10GPRS(22, r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e894515e77bb..39aa97d3ff88 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -186,7 +186,7 @@ hardware_interrupt_hv: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800) MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) - MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer) + STD_EXCEPTION_HV(0x980, 0x982, hdecrementer) STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) @@ -486,6 +486,7 @@ machine_check_common: STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 7140d838339e..e11863f4e595 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -28,7 +28,9 @@ _GLOBAL(power7_idle) lwz r4,ADDROFF(powersave_nap)(r3) cmpwi 0,r4,0 beqlr + /* fall through */ +_GLOBAL(power7_nap) /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and * stick a pointer to it in PACAR1. We really only diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 710f400476de..e9cb51f5f801 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -514,9 +514,6 @@ struct task_struct *__switch_to(struct task_struct *prev, local_irq_save(flags); - account_system_vtime(current); - account_process_vtime(current); - /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out @@ -802,16 +799,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { - if (current->thread.dscr_inherit) { - p->thread.dscr_inherit = 1; - p->thread.dscr = current->thread.dscr; - } else if (0 != dscr_default) { - p->thread.dscr_inherit = 1; - p->thread.dscr = dscr_default; - } else { - p->thread.dscr_inherit = 0; - p->thread.dscr = 0; - } + p->thread.dscr_inherit = current->thread.dscr_inherit; + p->thread.dscr = current->thread.dscr; } #endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0321007086f7..8d4214afc21d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -198,8 +198,15 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) struct cpu_messages *info = &per_cpu(ipi_message, cpu); char *message = (char *)&info->messages; + /* + * Order previous accesses before accesses in the IPI handler. + */ + smp_mb(); message[msg] = 1; - mb(); + /* + * cause_ipi functions are required to include a full barrier + * before doing whatever causes the IPI. + */ smp_ops->cause_ipi(cpu, info->data); } @@ -211,7 +218,7 @@ irqreturn_t smp_ipi_demux(void) mb(); /* order any irq clear */ do { - all = xchg_local(&info->messages, 0); + all = xchg(&info->messages, 0); #ifdef __BIG_ENDIAN if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 3529446c2abd..8302af649219 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -194,6 +194,14 @@ static ssize_t show_dscr_default(struct device *dev, return sprintf(buf, "%lx\n", dscr_default); } +static void update_dscr(void *dummy) +{ + if (!current->thread.dscr_inherit) { + current->thread.dscr = dscr_default; + mtspr(SPRN_DSCR, dscr_default); + } +} + static ssize_t __used store_dscr_default(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -206,6 +214,8 @@ static ssize_t __used store_dscr_default(struct device *dev, return -EINVAL; dscr_default = val; + on_each_cpu(update_dscr, NULL, 1); + return count; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index be171ee73bf8..eaa9d0e6abca 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -291,13 +291,12 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -void account_system_vtime(struct task_struct *tsk) +static u64 vtime_delta(struct task_struct *tsk, + u64 *sys_scaled, u64 *stolen) { - u64 now, nowscaled, delta, deltascaled; - unsigned long flags; - u64 stolen, udelta, sys_scaled, user_scaled; + u64 now, nowscaled, deltascaled; + u64 udelta, delta, user_scaled; - local_irq_save(flags); now = mftb(); nowscaled = read_spurr(now); get_paca()->system_time += now - get_paca()->starttime; @@ -305,7 +304,7 @@ void account_system_vtime(struct task_struct *tsk) deltascaled = nowscaled - get_paca()->startspurr; get_paca()->startspurr = nowscaled; - stolen = calculate_stolen_time(now); + *stolen = calculate_stolen_time(now); delta = get_paca()->system_time; get_paca()->system_time = 0; @@ -322,35 +321,45 @@ void account_system_vtime(struct task_struct *tsk) * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - sys_scaled = delta; + *sys_scaled = delta; user_scaled = udelta; if (deltascaled != delta + udelta) { if (udelta) { - sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - sys_scaled; + *sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - *sys_scaled; } else { - sys_scaled = deltascaled; + *sys_scaled = deltascaled; } } get_paca()->user_time_scaled += user_scaled; - if (in_interrupt() || idle_task(smp_processor_id()) != tsk) { - account_system_time(tsk, 0, delta, sys_scaled); - if (stolen) - account_steal_time(stolen); - } else { - account_idle_time(delta + stolen); - } - local_irq_restore(flags); + return delta; +} + +void vtime_account_system(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; + + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; + + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_idle_time(delta + stolen); } -EXPORT_SYMBOL_GPL(account_system_vtime); /* * Transfer the user and system times accumulated in the paca * by the exception entry and exit code to the generic process * user and system time records. * Must be called with interrupts disabled. - * Assumes that account_system_vtime() has been called recently + * Assumes that vtime_account() has been called recently * (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ @@ -366,6 +375,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick) account_user_time(tsk, utime, utimescaled); } +void vtime_task_switch(struct task_struct *prev) +{ + vtime_account(prev); + account_process_tick(prev, 0); +} + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ #define calc_cputime_factors() #endif @@ -535,6 +550,15 @@ void timer_interrupt(struct pt_regs * regs) trace_timer_interrupt_exit(regs); } +/* + * Hypervisor decrementer interrupts shouldn't occur but are sometimes + * left pending on exit from a KVM guest. We don't need to do anything + * to clear them, as they are edge-triggered. + */ +void hdec_interrupt(struct pt_regs *regs) +{ +} + #ifdef CONFIG_SUSPEND static void generic_suspend_disable_irqs(void) { diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 158972341a2d..ae0843fa7a61 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -972,8 +972,9 @@ static int emulate_instruction(struct pt_regs *regs) cpu_has_feature(CPU_FTR_DSCR)) { PPC_WARN_EMULATED(mtdscr, regs); rd = (instword >> 21) & 0x1f; - mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr = regs->gpr[rd]; current->thread.dscr_inherit = 1; + mtspr(SPRN_DSCR, current->thread.dscr); return 0; } #endif diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index dd223b3eb333..17e5b2364312 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -20,7 +20,7 @@ int patch_instruction(unsigned int *addr, unsigned int instr) { int err; - err = __put_user(instr, addr); + __put_user_size(instr, addr, 4, err); if (err) return err; asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr)); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 39b159751c35..59213cfaeca9 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1436,11 +1436,11 @@ static long vphn_get_associativity(unsigned long cpu, /* * Update the node maps and sysfs entries for each cpu whose home node - * has changed. + * has changed. Returns 1 when the topology has changed, and 0 otherwise. */ int arch_update_cpu_topology(void) { - int cpu, nid, old_nid; + int cpu, nid, old_nid, changed = 0; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; struct device *dev; @@ -1466,9 +1466,10 @@ int arch_update_cpu_topology(void) dev = get_cpu_device(cpu); if (dev) kobject_uevent(&dev->kobj, KOBJ_CHANGE); + changed = 1; } - return 1; + return changed; } static void topology_work_fn(struct work_struct *work) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 30fd01de6bed..72afd2888cad 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,6 +1,7 @@ config PPC64 bool "64-bit kernel" default n + select HAVE_VIRT_CPU_ACCOUNTING help This option selects whether a 32-bit or a 64-bit kernel will be built. @@ -337,21 +338,6 @@ config PPC_MM_SLICES default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES) default n -config VIRT_CPU_ACCOUNTING - bool "Deterministic task and CPU time accounting" - depends on PPC64 - default y - help - Select this option to enable more accurate task and CPU time - accounting. This is done by reading a CPU counter on each - kernel entry and exit and on transitions within the kernel - between system, softirq and hardirq state, so there is a - small performance impact. This also enables accounting of - stolen time on logically-partitioned systems running on - IBM POWER5-based machines. - - If in doubt, say Y here. - config PPC_HAVE_PMU_SUPPORT bool diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 3ef46254c35b..7698b6e13c57 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -106,14 +106,6 @@ static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; - /* If powersave_nap is enabled, use NAP mode, else just - * spin aimlessly - */ - if (!powersave_nap) { - generic_mach_cpu_die(); - return; - } - /* Standard hot unplug procedure */ local_irq_disable(); idle_task_exit(); @@ -128,7 +120,7 @@ static void pnv_smp_cpu_kill_self(void) */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); while (!generic_check_cpu_restart(cpu)) { - power7_idle(); + power7_nap(); if (!generic_check_cpu_restart(cpu)) { DBG("CPU%d Unexpected exit while offline !\n", cpu); /* We may be getting an IPI, so we re-enable diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index 14469cf9df68..df0fc5821469 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -65,7 +65,11 @@ static inline void icp_hv_set_xirr(unsigned int value) static inline void icp_hv_set_qirr(int n_cpu , u8 value) { int hw_cpu = get_hard_smp_processor_id(n_cpu); - long rc = plpar_hcall_norets(H_IPI, hw_cpu, value); + long rc; + + /* Make sure all previous accesses are ordered before IPI sending */ + mb(); + rc = plpar_hcall_norets(H_IPI, hw_cpu, value); if (rc != H_SUCCESS) { pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x " "returned %ld\n", __func__, n_cpu, hw_cpu, value, rc); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 107610e01a29..f5ab543396da 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -49,9 +49,6 @@ config GENERIC_LOCKBREAK config PGSTE def_bool y if KVM -config VIRT_CPU_ACCOUNTING - def_bool y - config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y @@ -89,6 +86,8 @@ config S390 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_CMPXCHG_LOCAL + select HAVE_VIRT_CPU_ACCOUNTING + select VIRT_CPU_ACCOUNTING select ARCH_DISCARD_MEMBLOCK select BUILDTIME_EXTABLE_SORT select ARCH_INLINE_SPIN_TRYLOCK diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 8709bdef233c..023d5ae24482 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -12,6 +12,9 @@ #include <linux/spinlock.h> #include <asm/div64.h> + +#define __ARCH_HAS_VTIME_ACCOUNT + /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long __nocast cputime_t; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 799ed0f1643d..2d6e6e380564 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -66,16 +66,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return pte; } -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = huge_ptep_get(ptep); - - mm->context.flush_mm = 1; - pmd_clear((pmd_t *) ptep); - return pte; -} - static inline void __pmd_csp(pmd_t *pmdp) { register unsigned long reg2 asm("2") = pmd_val(*pmdp); @@ -117,6 +107,15 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, __pmd_csp(pmdp); } +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get(ptep); + + huge_ptep_invalidate(mm, addr, ptep); + return pte; +} + #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ ({ \ int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ @@ -131,10 +130,7 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, ({ \ pte_t __pte = huge_ptep_get(__ptep); \ if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ + huge_ptep_invalidate(__mm, __addr, __ptep); \ set_huge_pte_at(__mm, __addr, __ptep, \ huge_pte_wrprotect(__pte)); \ } \ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index f223068b7822..314cc9426fc4 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -89,12 +89,8 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -extern void account_vtime(struct task_struct *, struct task_struct *); -extern void account_tick_vtime(struct task_struct *); - #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ - account_vtime(prev, current); \ } while (0) #endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 9fde315f3a7c..1d8fe2b17ef6 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -90,12 +90,10 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) static inline void __tlb_flush_mm_cond(struct mm_struct * mm) { - spin_lock(&mm->page_table_lock); if (mm->context.flush_mm) { __tlb_flush_mm(mm); mm->context.flush_mm = 0; } - spin_unlock(&mm->page_table_lock); } /* diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f86c81e13c37..40b57693de38 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -974,11 +974,13 @@ static void __init setup_hwcaps(void) if (MACHINE_HAS_HPAGE) elf_hwcap |= HWCAP_S390_HPAGE; +#if defined(CONFIG_64BIT) /* * 64-bit register support for 31-bit processes * HWCAP_S390_HIGH_GPRS is bit 9. */ elf_hwcap |= HWCAP_S390_HIGH_GPRS; +#endif get_cpu_id(&cpu_id); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4fc97b40a6e1..cb5093c26d16 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -99,7 +99,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) return virt_timer_forward(user + system); } -void account_vtime(struct task_struct *prev, struct task_struct *next) +void vtime_task_switch(struct task_struct *prev) { struct thread_info *ti; @@ -107,7 +107,7 @@ void account_vtime(struct task_struct *prev, struct task_struct *next) ti = task_thread_info(prev); ti->user_timer = S390_lowcore.user_timer; ti->system_timer = S390_lowcore.system_timer; - ti = task_thread_info(next); + ti = task_thread_info(current); S390_lowcore.user_timer = ti->user_timer; S390_lowcore.system_timer = ti->system_timer; } @@ -122,7 +122,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_system_vtime(struct task_struct *tsk) +void vtime_account(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); u64 timer, system; @@ -138,7 +138,7 @@ void account_system_vtime(struct task_struct *tsk) virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(account_system_vtime); +EXPORT_SYMBOL_GPL(vtime_account); void __kprobes vtime_stop_cpu(void) { diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 60ee2b883797..2d37bb861faf 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -2,69 +2,82 @@ * User access functions based on page table walks for enhanced * system layout without hardware support. * - * Copyright IBM Corp. 2006 + * Copyright IBM Corp. 2006, 2012 * Author(s): Gerald Schaefer (gerald.schaefer@de.ibm.com) */ #include <linux/errno.h> #include <linux/hardirq.h> #include <linux/mm.h> +#include <linux/hugetlb.h> #include <asm/uaccess.h> #include <asm/futex.h> #include "uaccess.h" -static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) + +/* + * Returns kernel address for user virtual address. If the returned address is + * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address + * contains the (negative) exception code. + */ +static __always_inline unsigned long follow_table(struct mm_struct *mm, + unsigned long addr, int write) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; + pte_t *ptep; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return (pte_t *) 0x3a; + return -0x3aUL; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return (pte_t *) 0x3b; + return -0x3bUL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return (pte_t *) 0x10; + if (pmd_none(*pmd)) + return -0x10UL; + if (pmd_huge(*pmd)) { + if (write && (pmd_val(*pmd) & _SEGMENT_ENTRY_RO)) + return -0x04UL; + return (pmd_val(*pmd) & HPAGE_MASK) + (addr & ~HPAGE_MASK); + } + if (unlikely(pmd_bad(*pmd))) + return -0x10UL; + + ptep = pte_offset_map(pmd, addr); + if (!pte_present(*ptep)) + return -0x11UL; + if (write && !pte_write(*ptep)) + return -0x04UL; - return pte_offset_map(pmd, addr); + return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK); } static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, size_t n, int write_user) { struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, size; - pte_t *pte; + unsigned long offset, done, size, kaddr; void *from, *to; done = 0; retry: spin_lock(&mm->page_table_lock); do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) + kaddr = follow_table(mm, uaddr, write_user); + if (IS_ERR_VALUE(kaddr)) goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; - goto fault; - } else if (write_user && !pte_write(*pte)) { - pte = (pte_t *) 0x04; - goto fault; - } - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE - 1); + offset = uaddr & ~PAGE_MASK; size = min(n - done, PAGE_SIZE - offset); if (write_user) { - to = (void *)((pfn << PAGE_SHIFT) + offset); + to = (void *) kaddr; from = kptr + done; } else { - from = (void *)((pfn << PAGE_SHIFT) + offset); + from = (void *) kaddr; to = kptr + done; } memcpy(to, from, size); @@ -75,7 +88,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, write_user)) + if (__handle_fault(uaddr, -kaddr, write_user)) return n - done; goto retry; } @@ -84,27 +97,22 @@ fault: * Do DAT for user address by page table walk, return kernel address. * This function needs to be called with current->mm->page_table_lock held. */ -static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) +static __always_inline unsigned long __dat_user_addr(unsigned long uaddr, + int write) { struct mm_struct *mm = current->mm; - unsigned long pfn; - pte_t *pte; + unsigned long kaddr; int rc; retry: - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; + kaddr = follow_table(mm, uaddr, write); + if (IS_ERR_VALUE(kaddr)) goto fault; - } - pfn = pte_pfn(*pte); - return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); + return kaddr; fault: spin_unlock(&mm->page_table_lock); - rc = __handle_fault(uaddr, (unsigned long) pte, 0); + rc = __handle_fault(uaddr, -kaddr, write); spin_lock(&mm->page_table_lock); if (!rc) goto retry; @@ -159,11 +167,9 @@ static size_t clear_user_pt(size_t n, void __user *to) static size_t strnlen_user_pt(size_t count, const char __user *src) { - char *addr; unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; - unsigned long offset, pfn, done, len; - pte_t *pte; + unsigned long offset, done, len, kaddr; size_t len_str; if (segment_eq(get_fs(), KERNEL_DS)) @@ -172,19 +178,13 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) retry: spin_lock(&mm->page_table_lock); do { - pte = follow_table(mm, uaddr); - if ((unsigned long) pte < 0x1000) - goto fault; - if (!pte_present(*pte)) { - pte = (pte_t *) 0x11; + kaddr = follow_table(mm, uaddr, 0); + if (IS_ERR_VALUE(kaddr)) goto fault; - } - pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE-1); - addr = (char *)(pfn << PAGE_SHIFT) + offset; + offset = uaddr & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); - len_str = strnlen(addr, len); + len_str = strnlen((char *) kaddr, len); done += len_str; uaddr += len_str; } while ((len_str == len) && (done < count)); @@ -192,7 +192,7 @@ retry: return done + 1; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, (unsigned long) pte, 0)) + if (__handle_fault(uaddr, -kaddr, 0)) return 0; goto retry; } @@ -225,11 +225,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to, const void __user *from) { struct mm_struct *mm = current->mm; - unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, - uaddr, done, size, error_code; + unsigned long offset_max, uaddr, done, size, error_code; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; - pte_t *pte_from, *pte_to; + unsigned long kaddr_to, kaddr_from; int write_user; if (segment_eq(get_fs(), KERNEL_DS)) { @@ -242,38 +241,23 @@ retry: do { write_user = 0; uaddr = uaddr_from; - pte_from = follow_table(mm, uaddr_from); - error_code = (unsigned long) pte_from; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_from)) { - error_code = 0x11; + kaddr_from = follow_table(mm, uaddr_from, 0); + error_code = kaddr_from; + if (IS_ERR_VALUE(error_code)) goto fault; - } write_user = 1; uaddr = uaddr_to; - pte_to = follow_table(mm, uaddr_to); - error_code = (unsigned long) pte_to; - if (error_code < 0x1000) - goto fault; - if (!pte_present(*pte_to)) { - error_code = 0x11; + kaddr_to = follow_table(mm, uaddr_to, 1); + error_code = (unsigned long) kaddr_to; + if (IS_ERR_VALUE(error_code)) goto fault; - } else if (!pte_write(*pte_to)) { - error_code = 0x04; - goto fault; - } - pfn_from = pte_pfn(*pte_from); - pfn_to = pte_pfn(*pte_to); - offset_from = uaddr_from & (PAGE_SIZE-1); - offset_to = uaddr_from & (PAGE_SIZE-1); - offset_max = max(offset_from, offset_to); + offset_max = max(uaddr_from & ~PAGE_MASK, + uaddr_to & ~PAGE_MASK); size = min(n - done, PAGE_SIZE - offset_max); - memcpy((void *)(pfn_to << PAGE_SHIFT) + offset_to, - (void *)(pfn_from << PAGE_SHIFT) + offset_from, size); + memcpy((void *) kaddr_to, (void *) kaddr_from, size); done += size; uaddr_from += size; uaddr_to += size; @@ -282,7 +266,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(uaddr, error_code, write_user)) + if (__handle_fault(uaddr, -error_code, write_user)) return n - done; goto retry; } @@ -341,7 +325,7 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) - __dat_user_addr((__force unsigned long) uaddr); + __dat_user_addr((__force unsigned long) uaddr, 1); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +362,7 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); uaddr = (u32 __force __user *) - __dat_user_addr((__force unsigned long) uaddr); + __dat_user_addr((__force unsigned long) uaddr, 1); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index a1e9d69a9c90..584b93674ea4 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -169,7 +169,7 @@ static ssize_t hw_interval_write(struct file *file, char const __user *buf, if (*offset) return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; if (val < oprofile_min_interval) oprofile_hw_interval = oprofile_min_interval; @@ -212,7 +212,7 @@ static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; if (val != 0) return -EINVAL; @@ -243,7 +243,7 @@ static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; if (val != 0 && val != 1) @@ -278,7 +278,7 @@ static ssize_t hwsampler_user_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; if (val != 0 && val != 1) @@ -317,7 +317,7 @@ static ssize_t timer_enabled_write(struct file *file, char const __user *buf, return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; if (val != 0 && val != 1) diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 2707023c7563..637970cfd3f4 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -27,6 +27,7 @@ #include <linux/reboot.h> #include <linux/elfcore.h> #include <linux/pm.h> +#include <linux/rcupdate.h> void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -50,9 +51,10 @@ void __noreturn cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) barrier(); - + rcu_idle_exit(); schedule_preempt_disabled(); } } diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index b7cf6a547f11..7e605b95592a 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -933,7 +933,7 @@ ret_with_reschedule: pta restore_all, tr1 - movi _TIF_SIGPENDING, r8 + movi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), r8 and r8, r7, r8 pta work_notifysig, tr0 bne r8, ZERO, tr0 diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index f67601cb3f1f..b96489d8b27d 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -139,7 +139,7 @@ work_pending: ! r8: current_thread_info ! t: result of "tst #_TIF_NEED_RESCHED, r0" bf/s work_resched - tst #_TIF_SIGPENDING, r0 + tst #(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME), r0 work_notifysig: bt/s __restore_all mov r15, r4 diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 15e0a1693976..f1ddc0d23679 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -48,9 +48,7 @@ void *module_alloc(unsigned long size) return NULL; ret = module_map(size); - if (!ret) - ret = ERR_PTR(-ENOMEM); - else + if (ret) memset(ret, 0, size); return ret; @@ -116,6 +114,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, v = sym->st_value + rel[i].r_addend; switch (ELF_R_TYPE(rel[i].r_info) & 0xff) { + case R_SPARC_DISP32: + v -= (Elf_Addr) location; + *loc32 = v; + break; #ifdef CONFIG_SPARC64 case R_SPARC_64: location[0] = v >> 56; @@ -128,11 +130,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, location[7] = v >> 0; break; - case R_SPARC_DISP32: - v -= (Elf_Addr) location; - *loc32 = v; - break; - case R_SPARC_WDISP19: v -= (Elf_Addr) location; *loc32 = (*loc32 & ~0x7ffff) | diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 7a7ce390534f..d5e86c9f74fd 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -69,7 +69,6 @@ static inline const struct cpumask *cpumask_of_node(int node) | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ | 0*SD_WAKE_AFFINE \ - | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h index 15fb77992083..58105c31228b 100644 --- a/arch/tile/include/gxio/iorpc_trio.h +++ b/arch/tile/include/gxio/iorpc_trio.h @@ -25,21 +25,23 @@ #include <linux/module.h> #include <asm/pgtable.h> -#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_DEALLOC_ASID IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1400) +#define GXIO_TRIO_OP_ALLOC_ASIDS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1401) -#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1402) +#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404) -#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e) -#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140f) +#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412) -#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1417) -#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1418) -#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1419) -#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x141a) +#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414) -#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141c) -#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141d) -#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_INIT_MEMORY_MAP_MMU_AUX IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141e) +#define GXIO_TRIO_OP_GET_PORT_PROPERTY IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x141f) +#define GXIO_TRIO_OP_CONFIG_LEGACY_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1420) +#define GXIO_TRIO_OP_CONFIG_MSI_INTR IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1421) + +#define GXIO_TRIO_OP_SET_MPS_MRS IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1423) +#define GXIO_TRIO_OP_FORCE_RC_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1424) +#define GXIO_TRIO_OP_FORCE_EP_LINK_UP IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x1425) #define GXIO_TRIO_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 664a60e8dfb4..c17de0db6736 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -705,6 +705,7 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; + rcu_switch(from, to); switch_to(from, to, from); } diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 69f1c57a8d0d..33a6a2423bd2 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -20,14 +20,6 @@ struct mm_struct; struct thread_struct { struct task_struct *saved_task; - /* - * This flag is set to 1 before calling do_fork (and analyzed in - * copy_thread) to mark that we are begin called from userspace (fork / - * vfork / clone), and reset to 0 after. It is left to 0 when called - * from kernelspace (i.e. kernel_thread() or fork_idle(), - * as of 2.6.11). - */ - int forking; struct pt_regs regs; int singlestep_syscall; void *fault_addr; @@ -58,7 +50,6 @@ struct thread_struct { #define INIT_THREAD \ { \ - .forking = 0, \ .regs = EMPTY_REGS, \ .fault_addr = NULL, \ .prev_sched = NULL, \ diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 40db8f71deae..2df313b6a586 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); -DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); -DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); -DEFINE_STR(UM_KERN_ERR, KERN_ERR); -DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); -DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); -DEFINE_STR(UM_KERN_INFO, KERN_INFO); -DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); -DEFINE_STR(UM_KERN_CONT, KERN_CONT); - DEFINE(UM_ELF_CLASS, ELF_CLASS); DEFINE(UM_ELFCLASS32, ELFCLASS32); DEFINE(UM_ELFCLASS64, ELFCLASS64); diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 4fa82c055aab..cef068563336 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -26,6 +26,17 @@ extern void panic(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +/* Requires preincluding include/linux/kern_levels.h */ +#define UM_KERN_EMERG KERN_EMERG +#define UM_KERN_ALERT KERN_ALERT +#define UM_KERN_CRIT KERN_CRIT +#define UM_KERN_ERR KERN_ERR +#define UM_KERN_WARNING KERN_WARNING +#define UM_KERN_NOTICE KERN_NOTICE +#define UM_KERN_INFO KERN_INFO +#define UM_KERN_DEBUG KERN_DEBUG +#define UM_KERN_CONT KERN_CONT + #ifdef UML_CONFIG_PRINTK extern int printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 6cade9366364..8c82786da823 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -39,34 +39,21 @@ void flush_thread(void) void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { + get_safe_registers(regs->regs.gp, regs->regs.fp); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; -} -EXPORT_SYMBOL(start_thread); - -static long execve1(const char *file, - const char __user *const __user *argv, - const char __user *const __user *env) -{ - long error; - - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; + current->ptrace &= ~PT_DTRACE; #ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(¤t->thread.regs.regs); + SUBARCH_EXECVE1(regs->regs); #endif - task_unlock(current); - } - return error; } +EXPORT_SYMBOL(start_thread); long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; - err = execve1(file, argv, env); + err = do_execve(file, argv, env, ¤t->thread.regs); if (!err) UML_LONGJMP(current->thread.exec_buf, 1); return err; @@ -81,7 +68,7 @@ long sys_execve(const char __user *file, const char __user *const __user *argv, filename = getname(file); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); + error = do_execve(filename, argv, env, ¤t->thread.regs); putname(filename); out: return error; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 57fc7028714a..c5f5afa50745 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *regs) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { + if (!kthread) { memcpy(&p->thread.regs.regs, ®s->regs, sizeof(p->thread.regs.regs)); PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); @@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { + } else { get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); p->thread.request.u.thread = current->thread.request.u.thread; handler = new_thread_handler; @@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 7362d58efc29..cc9c2350e417 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, struct k_sigaction *ka, siginfo_t *info) { sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ @@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, if (err) force_sigsegv(signr, current); else - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index f958cb876ee3..a4c6d8eee74c 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -17,25 +17,25 @@ long sys_fork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + return do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; } long sys_vfork(void) { - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, UPT_SP(¤t->thread.regs.regs), ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + return do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); } long old_mmap(unsigned long addr, unsigned long len, diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index f60238559af3..0748fe0c8a73 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -114,7 +114,7 @@ static void deliver_alarm(void) skew += this_tick - last_tick; while (skew >= one_tick) { - alarm_handler(SIGVTALRM, NULL); + alarm_handler(SIGVTALRM, NULL, NULL); skew -= one_tick; } diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index d50270d26b42..15889df9b466 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) $(USER_OBJS:.o=.%): \ - c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o) + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o) # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of # using it directly. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4abd..57fecc1db94d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -7,11 +7,13 @@ config 64BIT Say no to build a 32-bit kernel - formerly known as i386 config X86_32 - def_bool !64BIT + def_bool y + depends on !64BIT select CLKSRC_I8253 config X86_64 - def_bool 64BIT + def_bool y + depends on 64BIT select X86_DEV_DMA_OPS ### Arch settings @@ -36,6 +38,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER @@ -60,6 +63,8 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 @@ -97,9 +102,12 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_RCU_USER_QS if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS || UPROBES) + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES config OUTPUT_FORMAT string @@ -127,13 +135,15 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + def_bool y + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG config NEED_SG_DMA_LENGTH def_bool y config GENERIC_ISA_DMA - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config GENERIC_BUG def_bool y @@ -150,13 +160,16 @@ config GENERIC_GPIO bool config ARCH_MAY_HAVE_PC_FDC - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool y + depends on !X86_XADD config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool y + depends on X86_XADD config GENERIC_CALIBRATE_DELAY def_bool y @@ -746,13 +759,14 @@ config SWIOTLB def_bool y if X86_64 ---help--- Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU (e.g. the current generation - of Intel's x86-64 CPUs). Using this PCI devices which can only - access 32-bits of memory can be used on systems with more than - 3 GB of memory. If unsure, say Y. + which don't have a hardware IOMMU. Using this PCI devices + which can only access 32-bits of memory can be used on systems + with more than 3 GB of memory. + If unsure, say Y. config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + def_bool y + depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" @@ -796,17 +810,6 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. -config IRQ_TIME_ACCOUNTING - bool "Fine granularity task level IRQ time accounting" - default n - ---help--- - Select this option to enable fine granularity task irq time - accounting. This is done by reading a timestamp on each - transitions between softirq and hardirq state, so there can be a - small performance impact. - - If in doubt, say N here. - source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -1159,10 +1162,12 @@ config X86_PAE consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool y + depends on X86_64 || X86_PAE config ARCH_DMA_ADDR_T_64BIT - def_bool X86_64 || HIGHMEM64G + def_bool y + depends on X86_64 || HIGHMEM64G config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EXPERT @@ -1285,8 +1290,8 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG + def_bool y + depends on X86_64 && MEMORY_HOTPLUG config ARCH_PROC_KCORE_TEXT def_bool y @@ -1975,7 +1980,6 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - default n depends on PCI && EXPERIMENTAL help Read the PCI windows out of the CNB20LE host bridge. This allows @@ -2186,18 +2190,18 @@ config COMPAT depends on IA32_EMULATION || X86_X32 select ARCH_WANT_OLD_COMPAT_IPC +if COMPAT config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - depends on X86_64 + def_bool y config SYSVIPC_COMPAT def_bool y - depends on COMPAT && SYSVIPC + depends on SYSVIPC config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y + def_bool y + depends on KEYS +endif endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 706e12e9984b..f3b86d0df44e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT default X86_L1_CACHE_SHIFT config X86_CMPXCHG - def_bool X86_64 || (X86_32 && !M386) + def_bool y + depends on X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int @@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_64 || !M386 + depends on !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 682e9c210baa..474ca35b1bce 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) -archscripts: +archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs ### diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b4e15dd6786a..2a017441b8b2 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define SVGA_MODE ASK_VGA #endif -#ifndef RAMDISK -#define RAMDISK 0 -#endif - #ifndef ROOT_RDONLY #define ROOT_RDONLY 1 #endif diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 119db67dcb03..5598547281a7 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 76eb2903809f..671524d0f6c0 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y CONFIG_INTEL_IOMMU=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 70780689599a..444704c8e186 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); +extern void alternatives_enable_smp(void); extern int alternatives_text_reserved(void *start, void *end); extern bool skip_smp_alternatives; #else @@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} +static inline void alternatives_enable_smp(void) {} static inline int alternatives_text_reserved(void *start, void *end) { return 0; diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 72f5009deb5a..6dfd0195bb55 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; @@ -417,10 +417,9 @@ static inline int ffs(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -459,10 +458,9 @@ static inline int fls(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,13 +488,13 @@ static inline int fls(int x) #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { - long bitpos = -1; + int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ - asm("bsrq %1,%0" + asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index a9e3a740f697..7f8422a28a46 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -49,38 +49,36 @@ For 32-bit we have the following conventions - kernel is built with #include "dwarf2.h" /* - * 64-bit system call stack frame layout defines and helpers, for - * assembly code (note that the seemingly unnecessary parentheses - * are to prevent cpp from inserting spaces in expressions that get - * passed to macros): + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ -#define R15 (0) -#define R14 (8) -#define R13 (16) -#define R12 (24) -#define RBP (32) -#define RBX (40) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 /* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 (48) -#define R10 (56) -#define R9 (64) -#define R8 (72) -#define RAX (80) -#define RCX (88) -#define RDX (96) -#define RSI (104) -#define RDI (112) -#define ORIG_RAX (120) /* + error_code */ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ /* end of arguments */ /* cpu exception frame or undefined in case of fast syscall: */ -#define RIP (128) -#define CS (136) -#define EFLAGS (144) -#define RSP (152) -#define SS (160) +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 #define ARGOFFSET R11 #define SWFRAME ORIG_RAX diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc08740..9a25b522d377 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,38 +3,54 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 2c392d663dce..434e2106cc87 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -35,8 +35,6 @@ #define HPET_ID_NUMBER_SHIFT 8 #define HPET_ID_VENDOR_SHIFT 16 -#define HPET_ID_VENDOR_8086 0x8086 - #define HPET_CFG_ENABLE 0x001 #define HPET_CFG_LEGACY 0x002 #define HPET_LEGACY_8254 2 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 547882539157..d3ddd17405d0 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617efd67f..41e08cb6a092 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7e..1eaa6b056670 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98a..4fabcdf1cfa7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 000000000000..3f2207bfd17b --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad9bcad..b98c0d958ebb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); -extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR @@ -759,6 +758,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 000000000000..d1ac07a23979 --- /dev/null +++ b/arch/x86/include/asm/rcu.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_RCU_H +#define _ASM_X86_RCU_H + +#ifndef __ASSEMBLY__ + +#include <linux/rcupdate.h> +#include <asm/ptrace.h> + +static inline void exception_enter(struct pt_regs *regs) +{ + rcu_user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_RCU_USER_QS + if (user_mode(regs)) + rcu_user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_RCU_USER_QS +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc7d784..cdf5674dd23a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f007ec..c535d847e3b5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -89,6 +89,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ +#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -114,6 +115,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -126,12 +128,13 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -141,7 +144,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bbcd1de..8ff8be7835ab 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,11 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb963595b..36ec21c36d68 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include <linux/types.h> /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 93971e841dd5..472b9b783019 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s, extern int m2p_add_override(unsigned long mfn, struct page *page, struct gnttab_map_grant_ref *kmap_op); -extern int m2p_remove_override(struct page *page, bool clear_pte); +extern int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d97..8d7a619718b5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..e651f7a589ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* - * If mp_register_lapic successfully generates a new logical cpu + * If acpi_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ cpumask_andnot(new_map, cpu_present_mask, tmp_map); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534baed5..ef5ccca79a6c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,19 +23,6 @@ #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) @@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, /* turn DS segment override prefix into lock prefix */ if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - if (noreplace_smp) - return; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, /* turn lock prefix into DS segment override prefix */ if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -359,7 +343,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by smp_alt */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: mutex_unlock(&smp_alt); } @@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - if (smp_alt_once || noreplace_smp) - return; - mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); kfree(item); - return; + break; } mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; @@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) pr_info("lockdep: fixing up alternatives\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { + if (uniproc_patched) { pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; mutex_unlock(&smp_alt); } @@ -540,40 +503,22 @@ void __init alternative_instructions(void) apply_alternatives(__alt_instructions, __alt_instructions_end); - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb3082328..b17416e72fbd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); i++; v1 >>= 1; - }; + } apic_printk(APIC_DEBUG, KERN_CONT "\n"); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c5fccc..9961e2e23709 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1116,8 +1116,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -unsigned long kernel_eflags; - /* * Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. @@ -1299,8 +1297,6 @@ void __cpuinit cpu_init(void) fpu_init(); xsave_init(); - raw_local_save_flags(kernel_eflags); - if (is_uv_system()) uv_cpu_init(); } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6605a81ba339..8b6defe7eefc 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[]; extern struct event_constraint intel_snb_pebs_event_constraints[]; +extern struct event_constraint intel_ivb_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 7bfb5bec8630..eebd5ffe1bba 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +static const struct perf_event_attr ibs_notsupp = { + .exclude_user = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + .exclude_idle = 1, + .exclude_host = 1, + .exclude_guest = 1, +}; + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event) if (event->pmu != &perf_ibs->pmu) return -ENOENT; + if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp)) + return -EINVAL; + if (config & ~perf_ibs->config_mask) return -EINVAL; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7f2739e03e79..6bca492b8547 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2008,6 +2008,7 @@ __init int intel_pmu_init(void) break; case 28: /* Atom */ + case 54: /* Cedariew */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2047,7 +2048,6 @@ __init int intel_pmu_init(void) case 42: /* SandyBridge */ case 45: /* SandyBridge, "Romely-EP" */ x86_add_quirk(intel_sandybridge_quirk); - case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, @@ -2072,6 +2072,29 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; + case 58: /* IvyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + x86_pmu.extra_regs = intel_snb_extra_regs; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); + + pr_cont("IvyBridge events, "); + break; + default: switch (x86_pmu.version) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e38d97bf4259..826054a4f2ee 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_ivb_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 520b4265fcd2..da02e9cc3754 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void) * to have an operational LBR which can freeze * on PMU interrupt */ - if (boot_cpu_data.x86_mask < 10) { + if (boot_cpu_data.x86_model == 28 + && boot_cpu_data.x86_mask < 10) { pr_cont("LBR disabled due to erratum"); return; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 0a5571080e74..99d96a4978b5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box) } } +static struct uncore_event_desc snb_uncore_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"), + { /* end: all zeroes */ }, +}; + static struct attribute *snb_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = { .constraints = snb_uncore_cbox_constraints, .ops = &snb_uncore_msr_ops, .format_group = &snb_uncore_format_group, + .event_descs = snb_uncore_events, }; static struct intel_uncore_type *snb_msr_uncores[] = { @@ -1944,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp static struct intel_uncore_box * uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - static struct intel_uncore_box *box; + struct intel_uncore_box *box; box = *per_cpu_ptr(pmu->box, cpu); if (box) @@ -2341,6 +2347,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2378,8 +2405,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2431,9 +2458,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c1856aac..e68a4550e952 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c6681485..fbd895562292 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = cpumask_first(cpu_online_mask); - else - *pos = cpumask_next(*pos - 1, cpu_online_mask); + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd2323f..60c78917190c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } + put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); } module_init(cpuid_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced4a874..b1581527a236 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* + * The first NR_IRQS_LEGACY irq descs are allocated in + * early_irq_init() and need just a mapping. The + * remaining irqs need both. All of them are preallocated + * and assigned so we can keep the 1:1 mapping which the ioapic + * is having. + */ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f28837476..f438a44bf8f9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,17 +1109,21 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1131,6 +1135,71 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -1171,9 +1240,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834f..066334be7b74 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,7 @@ #include <asm/ftrace.h> #include <asm/percpu.h> #include <asm/asm.h> +#include <asm/rcu.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -68,25 +69,51 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) + +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else + movq 8(%rbp), %rsi +#endif +.endm ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -97,8 +124,78 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -119,8 +216,12 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -128,20 +229,22 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return @@ -342,15 +445,15 @@ ENDPROC(native_usergs_sysret64) .macro SAVE_ARGS_IRQ cld /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) /* Save rbp so that we can unwind from get_irq_regs() */ movq_cfi rbp, 0 @@ -384,7 +487,7 @@ ENDPROC(native_usergs_sysret64) .endm ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 + PARTIAL_FRAME 1 (REST_SKIP+8) movq 5*8+16(%rsp), %r11 /* save return address */ movq_cfi rbx, RBX+16 movq_cfi rbp, RBP+16 @@ -440,7 +543,7 @@ ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi kernel_eflags(%rip) + pushq_cfi $0x0002 popfq_cfi # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -565,7 +668,7 @@ sysret_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi jmp sysret_check @@ -678,7 +781,7 @@ int_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -974,7 +1077,7 @@ retint_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) @@ -1449,7 +1552,7 @@ paranoid_userspace: paranoid_schedule: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) - call schedule + SCHEDULE_USER DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF jmp paranoid_userspace diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e6..1d414029f1d8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,21 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853e91af..9a5c460404dc 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -263,7 +263,7 @@ static void i8259A_shutdown(void) * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b1..57916c0d3cf6 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 4873e62db6a1..9e5bcf1e2376 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, if (do_microcode_update(buf, len) == 0) ret = (ssize_t)len; + if (ret > 0) + perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb113693f043..a7c5661f8496 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -257,12 +257,14 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -271,6 +273,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); + put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -281,11 +284,13 @@ out: static void __exit msr_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); } module_init(msr_init); diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..e309cc5c276e --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,105 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <linux/stddef.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c2bf0f..9f94f8ec26e4 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + rcu_user_exit(); + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs) !test_thread_flag(TIF_SYSCALL_EMU); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + rcu_user_enter(); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908a376e..bca0ab903e57 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + rcu_user_exit(); + #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) @@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ + + rcu_user_enter(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c314c02..c80a33bc528b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long boot_error = 0; int timeout; - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); @@ -1053,20 +1054,6 @@ out: preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d1161488..cd3b2438a980 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) return 1; } +void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); +} + /* * Enable single or block step. */ @@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c9369..cfbe3fc41586 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -55,6 +55,7 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/mce.h> +#include <asm/rcu.h> #include <asm/mach_traps.h> @@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) +static int __kprobes +do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, + struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; - #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ - if (trapnr < X86_TRAP_UD) - goto vm86_trap; - goto trap_signal; + if (trapnr < X86_TRAP_UD) { + if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + return 0; + } + return -1; } #endif + if (!user_mode(regs)) { + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = trapnr; + die(str, regs, error_code); + } + return 0; + } - if (!user_mode(regs)) - goto kernel_trap; + return -1; +} -#ifdef CONFIG_X86_32 -trap_signal: -#endif +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -158,33 +174,20 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif } #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ + exception_exit(regs); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ + exception_exit(regs); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { + exception_enter(regs); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { + preempt_conditional_sti(regs); + do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); + } + exception_exit(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; + exception_enter(regs); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + exception_enter(regs); conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; + if (regs->flags & X86_VM_MASK) { + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + goto exit; + } #endif tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; + if (!user_mode(regs)) { + if (fixup_exception(regs)) + goto exit; + + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_GP; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) + die("general protection fault", regs, error_code); + goto exit; + } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); +exit: + exception_exit(regs); } /* May run on IST stack. */ @@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif + exception_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); debug_stack_usage_dec(); +exit: + exception_exit(regs); } #ifdef CONFIG_X86_64 @@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; + exception_enter(regs); + get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - return; + goto exit; /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); @@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) X86_TRAP_DB); preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; + goto exit; } /* @@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; +exit: + exception_exit(regs); } /* @@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 ignore_fpu_irq = 1; #endif - + exception_enter(regs); math_error(regs, error_code, X86_TRAP_MF); + exception_exit(regs); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { + exception_enter(regs); math_error(regs, error_code, X86_TRAP_XF); + exception_exit(regs); } dotraplinkage void @@ -629,6 +644,7 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { + exception_enter(regs); #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; @@ -637,6 +653,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); + exception_exit(regs); return; } #endif @@ -644,12 +661,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif + exception_exit(regs); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + + exception_enter(regs); local_irq_enable(); info.si_signo = SIGILL; @@ -657,10 +677,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_code = ILL_BADSTK; info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, - X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) - return; - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + &info); + } + exception_exit(regs); } #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd42091fa7..9538f00827a9 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -646,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * Skip these instructions as per the currently known x86 ISA. * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; @@ -673,3 +680,46 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) } return false; } + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} + +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); + + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); + + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; + } +} diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927c..1330dd102950 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index e498b18f010c..9fc9aa7ac703 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -318,7 +318,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) if (val & 0x10) { u8 edge_irr = s->irr & ~s->elcr; int i; - bool found; + bool found = false; struct kvm_vcpu *vcpu; s->init4 = val & 1; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf727ff3..bca63f04dccb 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c00f03de1b79..b06737d122f4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3619,6 +3619,7 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { + struct page *page; struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; @@ -3633,7 +3634,13 @@ static int alloc_apic_access_page(struct kvm *kvm) if (r) goto out; - kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); + page = gfn_to_page(kvm, 0xfee00); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + kvm->arch.apic_access_page = page; out: mutex_unlock(&kvm->slots_lock); return r; @@ -3641,6 +3648,7 @@ out: static int alloc_identity_pagetable(struct kvm *kvm) { + struct page *page; struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; @@ -3656,8 +3664,13 @@ static int alloc_identity_pagetable(struct kvm *kvm) if (r) goto out; - kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, - kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); + page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); + if (is_error_page(page)) { + r = -EFAULT; + goto out; + } + + kvm->arch.ept_identity_pagetable = page; out: mutex_unlock(&kvm->slots_lock); return r; @@ -4530,7 +4543,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } - }; + } break; case 2: /* clts */ handle_clts(vcpu); @@ -6575,7 +6588,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) /* Exposing INVPCID only when PCID is exposed */ best = kvm_find_cpuid_entry(vcpu, 0x7, 0); if (vmx_invpcid_supported() && - best && (best->ecx & bit(X86_FEATURE_INVPCID)) && + best && (best->ebx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) { exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, @@ -6585,7 +6598,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); if (best) - best->ecx &= ~bit(X86_FEATURE_INVPCID); + best->ebx &= ~bit(X86_FEATURE_INVPCID); } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 148ed666e311..2966c847d489 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5113,17 +5113,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) !kvm_event_needs_reinjection(vcpu); } -static void vapic_enter(struct kvm_vcpu *vcpu) +static int vapic_enter(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; struct page *page; if (!apic || !apic->vapic_addr) - return; + return 0; page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); + if (is_error_page(page)) + return -EFAULT; vcpu->arch.apic->vapic_page = page; + return 0; } static void vapic_exit(struct kvm_vcpu *vcpu) @@ -5430,7 +5433,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - vapic_enter(vcpu); + r = vapic_enter(vcpu); + if (r) { + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + return r; + } r = 1; while (r > 0) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d8e0bc..7dde46d68a25 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,6 +18,7 @@ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ +#include <asm/rcu.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address) * and the problem, and then passes it off to one of the appropriate * routines. */ -dotraplinkage void __kprobes -do_page_fault(struct pt_regs *regs, unsigned long error_code) +static void __kprobes +__do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -1209,3 +1210,11 @@ good_area: up_read(&mm->mmap_sem); } + +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + exception_enter(regs); + __do_page_fault(regs, error_code); + exception_exit(regs); +} diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e0e6990723e9..ab1f6a93b527 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, */ int devmem_is_allowed(unsigned long pagenr) { - if (pagenr <= 256) + if (pagenr < 256) return 1; if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 937bcece7006..704b9ec043d7 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) while (i >= sizeof(struct acpi_mcfg_allocation)) { entries++; i -= sizeof(struct acpi_mcfg_allocation); - }; + } if (entries == 0) { pr_err(PREFIX "MMCONFIG has no entries\n"); return -ENODEV; diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 9926e11a772d..aeaff8bef2f1 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -21,6 +21,7 @@ config 64BIT config X86_32 def_bool !64BIT select HAVE_AOUT + select ARCH_WANT_IPC_PARSE_VERSION config X86_64 def_bool 64BIT diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 5868526b5eef..46a9df99f3c5 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -7,9 +7,6 @@ #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define STR(x) #x -#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) - #define BLANK() asm volatile("\n->" : : ) #define OFFSET(sym, str, mem) \ diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h index bd9a89b67e41..ca255a805ed9 100644 --- a/arch/x86/um/shared/sysdep/syscalls.h +++ b/arch/x86/um/shared/sysdep/syscalls.h @@ -1,3 +1,5 @@ +extern long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); #ifdef __i386__ #include "syscalls_32.h" #else diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index a508cea13503..ba7363ecf896 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) 0; PT_REGS_CX(regs) = (unsigned long) 0; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } @@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, PT_REGS_AX(regs) = (unsigned long) sig; PT_REGS_DX(regs) = (unsigned long) &frame->info; PT_REGS_CX(regs) = (unsigned long) &frame->uc; - - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) - ptrace_notify(SIGTRAP); return 0; } diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc91b37b..b5408cecac6c 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -28,7 +28,7 @@ #define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old -#define ptregs_clone sys_clone +#define ptregs_clone i386_clone #define ptregs_vm86 sys_vm86 #define ptregs_sigaltstack sys_sigaltstack #define ptregs_vfork sys_vfork diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c index b853e8600b9d..db444c7218fe 100644 --- a/arch/x86/um/syscalls_32.c +++ b/arch/x86/um/syscalls_32.c @@ -3,37 +3,24 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "linux/shm.h" -#include "linux/ipc.h" -#include "linux/syscalls.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include <linux/syscalls.h> +#include <sysdep/syscalls.h> /* * The prototype on i386 is: * - * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls * * and the "newtls" arg. on i386 is read by copy_thread directly from the * register saved on the stack. */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tid, void *newtls, int __user *child_tid) +long i386_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) { - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; + return sys_clone(clone_flags, newsp, parent_tid, child_tid); } + long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index f3d82bb6e15a..adb08eb5c22a 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -5,12 +5,9 @@ * Licensed under the GPL */ -#include "linux/linkage.h" -#include "linux/personality.h" -#include "linux/utsname.h" -#include "asm/prctl.h" /* XXX This should get the constants from libc */ -#include "asm/uaccess.h" -#include "os.h" +#include <linux/sched.h> +#include <asm/prctl.h> /* XXX This should get the constants from libc */ +#include <os.h> long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { @@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr) return arch_prctl(current, code, (unsigned long __user *) addr); } -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid) -{ - long ret; - - if (!newsp) - newsp = UPT_SP(¤t->thread.regs.regs); - current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, - child_tid); - current->thread.forking = 0; - return ret; -} - void arch_switch_to(struct task_struct *to) { if ((to->thread.arch.fs == 0) || (to->mm == NULL)) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9642d4a38602..1fbe75a95f15 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void) pci_request_acs(); xen_acpi_sleep_register(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index b65a76133f4f..5141d808e751 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1283,7 +1283,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { + if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) { args->op.cmd = MMUEXT_INVLPG_MULTI; args->op.arg1.linear_addr = start; } diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index d4b255463253..72213da605f5 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -599,7 +599,7 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_ if (p2m_index(set_pfn)) return false; - for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { + for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { topidx = p2m_top_index(pfn); if (!p2m_top[topidx]) @@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); } - /* let's use dev_bus_addr to record the old mfn instead */ - kmap_op->dev_bus_addr = page->index; - page->index = (unsigned long) kmap_op; } spin_lock_irqsave(&m2p_override_lock, flags); list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); @@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page, return 0; } EXPORT_SYMBOL_GPL(m2p_add_override); -int m2p_remove_override(struct page *page, bool clear_pte) +int m2p_remove_override(struct page *page, + struct gnttab_map_grant_ref *kmap_op) { unsigned long flags; unsigned long mfn; @@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) WARN_ON(!PagePrivate(page)); ClearPagePrivate(page); - if (clear_pte) { - struct gnttab_map_grant_ref *map_op = - (struct gnttab_map_grant_ref *) page->index; - set_phys_to_machine(pfn, map_op->dev_bus_addr); + set_phys_to_machine(pfn, page->index); + if (kmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_grant_ref *unmap_op; @@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte) * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (map_op->handle == -1) + if (kmap_op->handle == -1) xen_mc_flush(); /* - * Now if map_op->handle is negative it means that the + * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (map_op->handle == GNTST_general_error) { + if (kmap_op->handle == GNTST_general_error) { printk(KERN_WARNING "m2p_remove_override: " "pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); @@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte) mcs = xen_mc_entry( sizeof(struct gnttab_unmap_grant_ref)); unmap_op = mcs.args; - unmap_op->host_addr = map_op->host_addr; - unmap_op->handle = map_op->handle; + unmap_op->host_addr = kmap_op->host_addr; + unmap_op->handle = kmap_op->handle; unmap_op->dev_bus_addr = 0; MULTI_grant_table_op(mcs.mc, @@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte) set_pte_at(&init_mm, address, ptep, pfn_pte(pfn, PAGE_KERNEL)); __flush_tlb_single(address); - map_op->host_addr = 0; + kmap_op->host_addr = 0; } - } else - set_phys_to_machine(pfn, page->index); + } /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present * somewhere in this domain, even before being added to the diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d11ca11d14fc..e2d62d697b5d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -17,6 +17,7 @@ #include <asm/e820.h> #include <asm/setup.h> #include <asm/acpi.h> +#include <asm/numa.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -544,4 +545,7 @@ void __init xen_arch_setup(void) disable_cpufreq(); WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); +#ifdef CONFIG_NUMA + numa_off = 1; +#endif } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f58dca7a6e52..353c50f18702 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; if (num_online_cpus() == 1) - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); rc = xen_smp_intr_init(cpu); if (rc) @@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); } static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 2c8d6a3d250a..bc44311aa18c 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -31,6 +31,7 @@ #include <linux/mqueue.h> #include <linux/fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -110,8 +111,10 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { + rcu_idle_enter(); while (!need_resched()) platform_idle(); + rcu_idle_exit(); schedule_preempt_disabled(); } } |