diff options
Diffstat (limited to 'arch')
111 files changed, 729 insertions, 387 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index e9734796531f..15996290fed4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -226,8 +226,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index aab14a019c20..c2ebb6f36c9d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return ret; } @@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d4df6be66d58..85814e74677d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -66,8 +66,6 @@ endif endif -cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables - # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok ifeq ($(atleast_gcc48),y) cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 86ed671286df..3749234b7419 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, + pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, __get_order_pte()); return pte; @@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte()); if (!pte_pg) return 0; memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index e0efff15a5ae..b9192a653b7e 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, * prelogue is setup (callee regs saved and then fp set and not other * way around */ - pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); return 0; #endif diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 19cfab526d13..20febb368844 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -29,7 +29,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)get_zeroed_page(GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 893941ec98dc..f1bde7c4e736 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_timer_vcpu_terminate(vcpu); kvm_vgic_vcpu_destroy(vcpu); kvm_pmu_vcpu_destroy(vcpu); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7085e322dc42..648a32c89541 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -95,7 +95,7 @@ boot := arch/arm64/boot Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -Image.%: vmlinux +Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zinstall install: diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 87e1985f3be8..9d9fd4b9a72e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,12 +80,14 @@ #define APM_CPU_PART_POTENZA 0x000 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define BRCM_CPU_PART_VULCAN 0x516 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index ff98585d085a..d25f4f137c2a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6e7fa8..7f94755089e2 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -117,6 +117,8 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; #define arch_has_single_step() (1) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 433e50405274..022644704a93 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void) cpu_park_loop(); } +/* + * If a secondary CPU enters the kernel but fails to come online, + * (e.g. due to mismatched features), and cannot exit the kernel, + * we increment cpus_stuck_in_kernel and leave the CPU in a + * quiesecent loop within the kernel text. The memory containing + * this loop must not be re-used for anything else as the 'stuck' + * core is executing it. + * + * This function is used to inhibit features like kexec and hibernate. + */ +bool cpus_are_stuck_in_kernel(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47f0880..2f4ba774488a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -60,6 +60,7 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d42789499f17..af716b65110d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, (1 << MIDR_VARIANT_SHIFT) | 1), }, + { + /* Cavium ThunderX, T81 pass 1.0 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + }, #endif { } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2bcb3f9..6c3b7345a6c4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,7 @@ #include <asm/errno.h> #include <asm/esr.h> #include <asm/irq.h> +#include <asm/memory.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -97,7 +98,14 @@ mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] @@ -128,6 +136,14 @@ .endm .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -406,7 +422,6 @@ el1_irq: bl trace_hardirqs_off #endif - get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f8df75d740f4..21ab5df9fa76 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -33,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/sections.h> +#include <asm/smp.h> #include <asm/suspend.h> #include <asm/virt.h> @@ -236,6 +237,11 @@ int swsusp_arch_suspend(void) unsigned long flags; struct sleep_stack_data state; + if (cpus_are_stuck_in_kernel()) { + pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); + return -EBUSY; + } + local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 678e0842cb3b..62ff3c0622e2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static bool have_cpu_die(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int any_cpu = raw_smp_processor_id(); + + if (cpu_ops[any_cpu]->cpu_die) + return true; +#endif + return false; +} + +bool cpus_are_stuck_in_kernel(void) +{ + bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); + + return !!cpus_stuck_in_kernel || smp_spin_tables; +} diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b7b397802088..efcf1f7ef1e4 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) &asid_generation); flush_context(cpu); - /* We have at least 1 ASID per CPU, so this will always succeed */ + /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); set_asid: @@ -227,8 +227,11 @@ switch_mm_fastpath: static int asids_init(void) { asid_bits = get_cpu_asid_bits(); - /* If we end up with more CPUs than ASIDs, expect things to crash */ - WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is reserved for init_mm. + */ + WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus()); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), GFP_KERNEL); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 013e2cbe7924..b1166d1e5955 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (permission_fault(esr) && (addr < USER_DS)) { - if (get_fs() == KERNEL_DS) + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (!search_exception_tables(regs->pc)) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dbd12ea8ce68..43a76b07eb32 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); - /* no flushing needed for anonymous pages */ - if (!page_mapping(page)) - return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h index 1aba19d68c5e..db039cb368be 100644 --- a/arch/avr32/include/asm/pgalloc.h +++ b/arch/avr32/include/asm/pgalloc.h @@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x) */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor); + return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) @@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index 235ece437ddd..42f1affb9c2d 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 41907d25ed38..c9ed14f6c67d 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *page; #ifdef CONFIG_HIGHPTE - page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + page = alloc_pages(GFP_KERNEL, 0); #endif if (!page) return NULL; diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 77da3b0ae3c2..eeebf862c46c 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + pte = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { @@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; return (pte_t *) __get_free_page(flags); } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80758cb7157..e109ee95e919 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c index 01d877c6868f..cf3023dced49 100644 --- a/arch/m32r/boot/compressed/m32r_sio.c +++ b/arch/m32r/boot/compressed/m32r_sio.c @@ -8,12 +8,13 @@ #include <asm/processor.h> -static void putc(char c); +static void m32r_putc(char c); static int puts(const char *s) { char c; - while ((c = *s++)) putc(c); + while ((c = *s++)) + m32r_putc(c); return 0; } @@ -41,7 +42,7 @@ static int puts(const char *s) #define BOOT_SIO0TXB PLD_ESIO0TXB #endif -static void putc(char c) +static void m32r_putc(char c) { while ((*BOOT_SIO0STS & 0x3) != 0x3) cpu_relax(); @@ -61,7 +62,7 @@ static void putc(char c) #define SIO0TXB (volatile unsigned short *)(0x00efd000 + 30) #endif -static void putc(char c) +static void m32r_putc(char c) { while ((*SIO0STS & 0x1) == 0) cpu_relax(); diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index f9924fbcfe42..fb95aed5f428 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -14,7 +14,7 @@ extern const char bad_pmd_string[]; extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_DMA); if (!page) return NULL; @@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_DMA, 0); pte_t *pte; if (!page) diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index 24bcba496c75..c895b987202c 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (pte) { __flush_page_to_ram(pte); flush_tlb_kernel_page(pte); @@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres struct page *page; pte_t *pte; - page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); if(!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 0931388de47f..1901f61f926f 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -37,7 +37,7 @@ do { \ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT); + unsigned long page = __get_free_page(GFP_KERNEL); if (!page) return NULL; @@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + struct page *page = alloc_pages(GFP_KERNEL, 0); if (page == NULL) return NULL; diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h index 3104df0a4822..c2caa1ee4360 100644 --- a/arch/metag/include/asm/pgalloc.h +++ b/arch/metag/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | - __GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); return pte; } @@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 61436d69775c..7c89390c0c13 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *ptepage; #ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; + int flags = GFP_KERNEL | __GFP_HIGHMEM; #else - int flags = GFP_KERNEL | __GFP_REPEAT; + int flags = GFP_KERNEL; #endif ptepage = alloc_pages(flags, 0); diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 4f4520e779a5..eb99fcc76088 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL | - __GFP_REPEAT | __GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } else { pte = (pte_t *)early_get_page(); if (pte) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index b336037e8768..93c079a1cfc8 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); @@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER); + pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index a6b611f1da43..7d44e888134f 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -24,7 +24,7 @@ struct mm_struct; struct vm_area_struct; #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \ - _CACHE_CACHABLE_NONCOHERENT) + _page_cachable_default) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ _page_cachable_default) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \ @@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK); pte.pte_high &= (_PFN_MASK | _CACHE_MASK); pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK; - pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK; + pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK); return pte; } #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) @@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } #endif @@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot); + pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK); return pmd; } diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c index e77a7c728081..9577cf768875 100644 --- a/arch/mn10300/mm/pgtable.c +++ b/arch/mn10300/mm/pgtable.c @@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL); if (pte) clear_page(pte); return pte; @@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; #ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0); #else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); #endif if (!pte) return NULL; diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 6e2985e0a7b9..bb47d08c8ef7 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (pte) { if (!pgtable_page_ctor(pte)) { __free_page(pte); diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 21484e5b9e9a..87eebd185089 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL, 0); if (!pte) return NULL; clear_page(page_address(pte)); diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 62b08ef392be..5b2a95116e8f 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *pte; if (likely(mem_init_done)) { - pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT); + pte = (pte_t *) __get_free_page(GFP_KERNEL); } else { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); #if 0 diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f2fd327dce2e..f08dda3f0995 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, - PMD_ORDER); + pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER); if (pmd) memset(pmd, 0, PAGE_SIZE<<PMD_ORDER); return pmd; @@ -124,7 +123,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { @@ -137,7 +136,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464d9fea..0a9d439bcda6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,7 +128,7 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT + select HAVE_CBPF_JIT if CPU_BIG_ENDIAN select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index a2350194fc76..8e21bb492dca 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); pgtable_page_dtor(table); pgtable_free_tlb(tlb, page_address(table), 0); } diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 290157e8d5b2..74839f24f412 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -88,6 +88,7 @@ #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) #define HPTE_R_G ASM_CONST(0x0000000000000008) #define HPTE_R_M ASM_CONST(0x0000000000000010) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 488279edb1f0..cd5e7aa8cc34 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); @@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(PGALLOC_GFP); #else struct page *page; - page = alloc_pages(PGALLOC_GFP, 4); + page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4); if (!page) return NULL; return (pgd_t *) page_address(page); @@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -110,13 +109,17 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE); } static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) @@ -127,6 +130,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long address) { + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX); } @@ -151,7 +159,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -198,7 +206,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - tlb_flush_pgtable(tlb, address); + /* + * By now all the pud entries should be none entries. So go + * ahead and flush the page walk cache + */ + flush_tlb_pgtable(tlb, address); pgtable_free_tlb(tlb, table, 0); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 88a5ecaa157b..ab84c89c9e98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size; #define KERN_VIRT_SIZE __kernel_virt_size extern struct page *vmemmap; extern unsigned long ioremap_bot; +extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ #include <asm/book3s/64/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 937d4e247ac3..df294224e280 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start, extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int psz); + +static inline unsigned long radix__get_tree_size(void) +{ + unsigned long rts_field; + /* + * we support 52 bits, hence 52-31 = 21, 0b10101 + * RTS encoding details + * bits 0 - 3 of rts -> bits 6 - 8 unsigned long + * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long + */ + rts_field = (0x5UL << 5); /* 6 - 8 bits */ + rts_field |= (0x2UL << 61); + + return rts_field; +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 13ef38828dfe..3fa94fcac628 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid); +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) +#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif #endif diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d98424ae356c..96e5769b18b0 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) #endif /* CONFIG_SMP */ +/* + * flush the page walk cache for the address + */ +static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address) +{ + /* + * Flush the page table walk cache on freeing a page table. We already + * have marked the upper/higher level page table entry none by now. + * So it is safe to flush PWC here. + */ + if (!radix_enabled()) + return; + radix__flush_tlb_pwc(tlb, address); +} #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h index 54f591e9572e..c0a69ae92256 100644 --- a/arch/powerpc/include/asm/book3s/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/pgalloc.h @@ -4,11 +4,6 @@ #include <linux/mm.h> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_flush_pgtable(struct mmu_gather *tlb, - unsigned long address) -{ - -} #ifdef CONFIG_PPC64 #include <asm/book3s/64/pgalloc.h> diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 069369f6414b..897d2e1c8a9b 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -88,7 +87,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b81d24..d70101e1e25c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -642,13 +642,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* @@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) + if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); - else + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); + } } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4c9440629128..8bcc1b457115 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ +BEGIN_MMU_FTR_SECTION beq- 2f +FTR_SECTION_ELSE + b 2f +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) .machine push .machine "power4" diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..a5ae49a2dcc4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..0b93893424f5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..b7019b559ddb 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -176,7 +170,17 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +201,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -397,11 +406,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +443,33 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 40e05e7f43de..f8a871a72985 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N | HPTE_R_C))); } native_unlock_hpte(hptep); @@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N))); + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N))); /* * Ensure it is out of the tlb too. Bolted entries base and * actual page size will be same. diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c67e172..2971ea18c768 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * We can't allow hardware to update hpte bits. Hence always * set 'R' bit and set 'C' if it is a write fault - * Memory coherence is always enabled */ - rflags |= HPTE_R_R | HPTE_R_M; + rflags |= HPTE_R_R; if (pteflags & _PAGE_DIRTY) rflags |= HPTE_R_C; @@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_I | HPTE_R_W); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); + else + /* + * Add memory coherence if cache inhibited is not set + */ + rflags |= HPTE_R_M; return rflags; } @@ -918,6 +922,10 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5aac1a3f86cd..119d18611500 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, cachep = PGT_CACHE(pdshift - pshift); #endif - new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); + new = kmem_cache_zalloc(cachep, GFP_KERNEL); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 227b2a6c4544..196222227e82 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index) /* * set the process table entry, */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); return 0; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c939e6e57a9e..7931e1496f0d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -160,9 +160,8 @@ redo: process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. - * we support 52 bits, hence 52-28 = 24, 11000 */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); /* * Fill in the partition table. We are suppose to use effective address @@ -176,10 +175,8 @@ redo: static void __init radix_init_partition_table(void) { unsigned long rts_field; - /* - * we support 52 bits, hence 52-28 = 24, 11000 - */ - rts_field = 3ull << PPC_BITLSHIFT(2); + + rts_field = radix__get_tree_size(); BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); @@ -331,6 +328,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bf7bf32b54f8..7f922f557936 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add pte_t *pte; if (slab_is_available()) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); } else { pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) @@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; ptepage = alloc_pages(flags, 0); if (!ptepage) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e009e0604a8a..f5e8d4edb808 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!kernel && !pgtable_page_ctor(page)) { diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 54efba2fd66e..ab2f60e812e2 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -18,16 +18,20 @@ static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbiel_pid(unsigned long pid, int set) +#define RIC_FLUSH_TLB 0 +#define RIC_FLUSH_PWC 1 +#define RIC_FLUSH_ALL 2 + +static inline void __tlbiel_pid(unsigned long pid, int set, + unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static inline void _tlbiel_pid(unsigned long pid) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { - __tlbiel_pid(pid, set); + __tlbiel_pid(pid, set, ric); } return; } -static inline void _tlbie_pid(unsigned long pid) +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid) } static inline void _tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" @@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, } static inline void _tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" @@ -122,11 +123,26 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); +void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + + preempt_enable(); +} +EXPORT_SYMBOL(radix__local_flush_tlb_pwc); + void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { @@ -135,7 +151,7 @@ void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); pid = mm ? mm->context.id : 0; if (pid != MMU_NO_CONTEXT) - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); preempt_enable(); } @@ -172,16 +188,42 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(pid); + _tlbie_pid(pid, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } EXPORT_SYMBOL(radix__flush_tlb_mm); +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + if (!mm_is_core_local(mm)) { + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_pid(pid, RIC_FLUSH_PWC); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } else + _tlbiel_pid(pid, RIC_FLUSH_PWC); +no_context: + preempt_enable(); +} +EXPORT_SYMBOL(radix__flush_tlb_pwc); + void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { @@ -196,11 +238,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_va(vmaddr, pid, ap); + _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); } @@ -224,7 +266,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(0); + _tlbie_pid(0, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 5e04f3cbd320..8ae236b0f80b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc) " la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) + : "=d" (rc), "=&d" (orig_fpc) : "d" (fpc), "0" (-EINVAL)); return rc; } diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f20abdb5630a..d14069d4b88d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2064,12 +2064,5 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = (unsigned long) s390_base_pgm_handler; - /* - * Clear subchannel ID and number to signal new kernel that no CCW or - * SCSI IPL has been done (for kexec and kdump) - */ - S390_lowcore.subchannel_id = 0; - S390_lowcore.subchannel_nr = 0; - do_reset_calls(); } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 59215c518f37..7ec63b1d920d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) /* Performance monitoring unit for s390x */ static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, .event_init = cpumf_pmu_event_init, @@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void) goto out; } - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e8b5962ac12a..e2565d2d0c32 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) return table; } /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + page = alloc_page(GFP_KERNEL); if (!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4324b87f9398..9f0ce0e6eeb4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); pte = *ptep; - if (pte_swap(pte) && + if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 2e067657db98..49b012d78c1a 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, - PTE_ORDER); + pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); return pte; } @@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; - pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); + pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (!pte) return NULL; clear_highpage(pte); diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index a33673b3687d..f3f42c84c40f 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, struct page *page; void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); if (!pg) return NULL; page = virt_to_page(pg); diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index 26e03a1f7ca4..a62bd8696779 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -1,7 +1,7 @@ #include <linux/mm.h> #include <linux/slab.h> -#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 5e3187185b4a..3529f1378cd8 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 14bb0d5ed3c6..aec508e37490 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2704,8 +2704,7 @@ void __flush_tlb_all(void) pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pte_t *pte = NULL; if (page) @@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!pgtable_page_ctor(page)) { diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 4b7cef9e94e0..c1467ac59ce6 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 6b705ccc9cc1..a465d8372edd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 7bf2491a9c1f..c4d5bf841a7f 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, int order) { - gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; + gfp_t flags = GFP_KERNEL|__GFP_ZERO; struct page *p; int i; diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b2a2dff50b4e..e7437ec62710 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); return pte; } @@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; - pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!pte) return NULL; if (!pgtable_page_ctor(pte)) { diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 2e02d1356fdf..26775793c204 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* * Allocate one PTE table. diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee817f0..91eac39625be 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -263,7 +263,7 @@ static bool check_hw_exists(void) msr_fail: pr_cont("Broken PMU hardware detected, using software events only.\n"); - pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, reg, val_new); @@ -2319,7 +2319,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct stack_frame frame; - const void __user *fp; + const unsigned long __user *fp; if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ @@ -2332,7 +2332,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM)) return; - fp = (void __user *)regs->bp; + fp = (unsigned long __user *)regs->bp; perf_callchain_store(entry, regs->ip); @@ -2345,16 +2345,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs pagefault_disable(); while (entry->nr < entry->max_stack) { unsigned long bytes; + frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 16)) + if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) break; - bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8); + bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); if (bytes != 0) break; - bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8); + bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp)); if (bytes != 0) break; diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3660b2cf245a..06c2baa51814 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -1,8 +1,8 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o -intel-rapl-objs := rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o +intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c666958a625..9b4f9d3ce465 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -115,6 +115,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -139,6 +143,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly = INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -182,6 +190,16 @@ struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ + + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc6, 0xf), /* FRONTEND_RETIRED.* */ + EVENT_CONSTRAINT_END }; @@ -250,6 +268,10 @@ static struct event_constraint intel_hsw_event_constraints[] = { /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), + /* + * When HT is off these events can only run on the bottom 4 counters + * When HT is on, they are impacted by the HT bug and require EXCL access + */ INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -264,6 +286,13 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ + /* + * when HT is off, these can only run on the bottom 4 counters + */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_L3_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xcd, 0xf), /* MEM_TRANS_RETIRED.* */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4a413485f9eb..c64b1e9c5d1a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -301,10 +301,6 @@ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ -#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */ - - #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional @@ -312,5 +308,7 @@ */ #define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4421b5da409d..d1d1e5094c28 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t; #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 #define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) \ - (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) +#define CUR_STACK_SIZE(ADDR) \ + (current_top_of_stack() - (unsigned long)(ADDR)) +#define MIN_STACK_SIZE(ADDR) \ + (MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ? \ + MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR)) #define flush_insn_slot(p) do { } while (0) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index bf7f8b55b0f9..574c23cf761a 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *page; - page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + return (pud_t *)get_zeroed_page(GFP_KERNEL); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index fdcc04020636..7c1c89598688 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -69,29 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) -{ - u64 delta = rdtsc_ordered() - src->tsc_timestamp; - return pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); -} - -static __always_inline unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, cycle_t *cycles, u8 *flags) { unsigned version; - cycle_t ret, offset; - u8 ret_flags; + cycle_t offset; + u64 delta; version = src->version; + /* Make the latest version visible */ + smp_rmb(); - offset = pvclock_get_nsec_offset(src); - ret = src->system_time + offset; - ret_flags = src->flags; - - *cycles = ret; - *flags = ret_flags; + delta = rdtsc_ordered() - src->tsc_timestamp; + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + *cycles = src->system_time + offset; + *flags = src->flags; return version; } diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 7c247e7404be..0944218af9e2 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -14,7 +14,7 @@ extern int kstack_depth_to_print; struct thread_info; struct stacktrace_ops; -typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, +typedef unsigned long (*walk_stack_t)(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, @@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, int *graph); extern unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); extern unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index a147e676fc7b..e991d5c8bb3a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -71,8 +71,8 @@ int amd_cache_northbridges(void) while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL) i++; - if (i == 0) - return 0; + if (!i) + return -ENODEV; nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2bb25c3fe2e8..ef8017ca5ba9 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -42,16 +42,14 @@ void printk_address(unsigned long address) static void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { - struct task_struct *task; unsigned long ret_addr; int index; if (addr != (unsigned long)return_to_handler) return; - task = tinfo->task; index = task->curr_ret_stack; if (!task->ret_stack || index < *graph) @@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { } #endif @@ -79,10 +77,10 @@ print_ftrace_graph_addr(unsigned long addr, void *data, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, +static inline int valid_stack_ptr(struct task_struct *task, void *p, unsigned int size, void *end) { - void *t = tinfo; + void *t = task_stack_page(task); if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; @@ -93,14 +91,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, } unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; @@ -112,7 +110,7 @@ print_context_stack(struct thread_info *tinfo, } else { ops->address(data, addr, 0); } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } stack++; } @@ -121,7 +119,7 @@ print_context_stack(struct thread_info *tinfo, EXPORT_SYMBOL_GPL(print_context_stack); unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) @@ -129,7 +127,7 @@ print_context_stack_bp(struct thread_info *tinfo, struct stack_frame *frame = (struct stack_frame *)bp; unsigned long *ret_addr = &frame->return_address; - while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; if (!__kernel_text_address(addr)) @@ -139,7 +137,7 @@ print_context_stack_bp(struct thread_info *tinfo, break; frame = frame->next_frame; ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } return (unsigned long)frame; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 464ffd69b92e..fef917e79b9d 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, bp = stack_frame(task, regs); for (;;) { - struct thread_info *context; void *end_stack; end_stack = is_hardirq_stack(stack, cpu); if (!end_stack) end_stack = is_softirq_stack(stack, cpu); - context = task_thread_info(task); - bp = ops->walk_stack(context, stack, bp, ops, data, + bp = ops->walk_stack(task, stack, bp, ops, data, end_stack, &graph); /* Stop if not on irq stack */ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5f1c6266eb30..d558a8a49016 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - struct thread_info *tinfo; unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned long dummy; unsigned used = 0; @@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * current stack address. If the stacks consist of nested * exceptions */ - tinfo = task_thread_info(task); while (!done) { unsigned long *stack_end; enum stack_type stype; @@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, ops, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); ops->stack(data, "<EOE>"); /* @@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, + bp = ops->walk_stack(task, stack, bp, ops, data, stack_end, &graph); /* * We link to the next stack (which would be @@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index bca14c899137..57b71373bae3 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -11,7 +11,11 @@ #include <linux/pci.h> #include <linux/acpi.h> +#include <linux/delay.h> +#include <linux/dmi.h> #include <linux/pci_ids.h> +#include <linux/bcma/bcma.h> +#include <linux/bcma/bcma_regs.h> #include <drm/i915_drm.h> #include <asm/pci-direct.h> #include <asm/dma.h> @@ -21,6 +25,9 @@ #include <asm/iommu.h> #include <asm/gart.h> #include <asm/irq_remapping.h> +#include <asm/early_ioremap.h> + +#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg) static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -76,6 +83,13 @@ static void __init nvidia_bugs(int num, int slot, int func) #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC /* + * Only applies to Nvidia root ports (bus 0) and not to + * Nvidia graphics cards with PCI ports on secondary buses. + */ + if (num) + return; + + /* * All timer overrides on Nvidia are * wrong unless HPET is enabled. * Unfortunately that's not true on many Asus boards. @@ -590,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func) #endif } +#define BCM4331_MMIO_SIZE 16384 +#define BCM4331_PM_CAP 0x40 +#define bcma_aread32(reg) ioread32(mmio + 1 * BCMA_CORE_SIZE + reg) +#define bcma_awrite32(reg, val) iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg) + +static void __init apple_airport_reset(int bus, int slot, int func) +{ + void __iomem *mmio; + u16 pmcsr; + u64 addr; + int i; + + if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc.")) + return; + + /* Card may have been put into PCI_D3hot by grub quirk */ + pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); + + if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; + write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr); + mdelay(10); + + pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); + if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { + dev_err("Cannot power up Apple AirPort card\n"); + return; + } + } + + addr = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); + addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32; + addr &= PCI_BASE_ADDRESS_MEM_MASK; + + mmio = early_ioremap(addr, BCM4331_MMIO_SIZE); + if (!mmio) { + dev_err("Cannot iomap Apple AirPort card\n"); + return; + } + + pr_info("Resetting Apple AirPort card (left enabled by EFI)\n"); + + for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++) + udelay(10); + + bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET); + bcma_aread32(BCMA_RESET_CTL); + udelay(1); + + bcma_awrite32(BCMA_RESET_CTL, 0); + bcma_aread32(BCMA_RESET_CTL); + udelay(10); + + early_iounmap(mmio, BCM4331_MMIO_SIZE); +} #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 @@ -603,12 +672,6 @@ struct chipset { void (*f)(int num, int slot, int func); }; -/* - * Only works for devices on the root bus. If you add any devices - * not on bus 0 readd another loop level in early_quirks(). But - * be careful because at least the Nvidia quirk here relies on - * only matching on bus 0. - */ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, @@ -638,9 +701,13 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, + { PCI_VENDOR_ID_BROADCOM, 0x4331, + PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} }; +static void __init early_pci_scan_bus(int bus); + /** * check_dev_quirk - apply early quirks to a given PCI device * @num: bus number @@ -649,7 +716,7 @@ static struct chipset early_qrk[] __initdata = { * * Check the vendor & device ID against the early quirks table. * - * If the device is single function, let early_quirks() know so we don't + * If the device is single function, let early_pci_scan_bus() know so we don't * poke at this device again. */ static int __init check_dev_quirk(int num, int slot, int func) @@ -658,6 +725,7 @@ static int __init check_dev_quirk(int num, int slot, int func) u16 vendor; u16 device; u8 type; + u8 sec; int i; class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); @@ -685,25 +753,36 @@ static int __init check_dev_quirk(int num, int slot, int func) type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); + + if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) { + sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS); + if (sec > num) + early_pci_scan_bus(sec); + } + if (!(type & 0x80)) return -1; return 0; } -void __init early_quirks(void) +static void __init early_pci_scan_bus(int bus) { int slot, func; - if (!early_pci_allowed()) - return; - /* Poor man's PCI discovery */ - /* Only scan the root bus */ for (slot = 0; slot < 32; slot++) for (func = 0; func < 8; func++) { /* Only probe function 0 on single fn devices */ - if (check_dev_quirk(0, slot, func)) + if (check_dev_quirk(bus, slot, func)) break; } } + +void __init early_quirks(void) +{ + if (!early_pci_allowed()) + return; + + early_pci_scan_bus(0); +} diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 4d38416e2a7f..04f89caef9c4 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -57,7 +57,7 @@ # error "Need more than one PGD for the ESPFIX hack" #endif -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) /* This contains the *bottom* address of the espfix stack */ DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 38da8f29a9c8..c627bf8d98ad 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -130,11 +130,9 @@ void irq_ctx_init(int cpu) void do_softirq_own_stack(void) { - struct thread_info *curstk; struct irq_stack *irqstk; u32 *isp, *prev_esp; - curstk = current_stack(); irqstk = __this_cpu_read(softirq_stack); /* build the stack frame on the softirq stack */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 38cf7a741250..7847e5c0e0b5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * normal page fault. */ regs->ip = (unsigned long)cur->addr; + /* + * Trap flag (TF) has been set here because this fault + * happened where the single stepping will be done. + * So clear it by resetting the current kprobe: + */ + regs->flags &= ~X86_EFLAGS_TF; + + /* + * If the TF flag was set before the kprobe hit, + * don't touch it: + */ regs->flags |= kcb->kprobe_old_flags; + if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 99bfc025111d..06c58ce46762 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -61,11 +61,16 @@ void pvclock_resume(void) u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) { unsigned version; - cycle_t ret; u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); + version = src->version; + /* Make the latest version visible */ + smp_rmb(); + + flags = src->flags; + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); return flags & valid_flags; @@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bbb5b283ff63..a397200281c1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(tsc_deadline - guest_tsc); + __delay(min(tsc_deadline - guest_tsc, + nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } static void start_apic_timer(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 003618e324ce..64a79f271276 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Checks for #GP/#SS exceptions. */ exn = false; - if (is_protmode(vcpu)) { + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is the only check on the memory + * destination for long mode! + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: * - segment type check (#GP(0) may be thrown) @@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * execute-only code segment */ exn = ((s.type & 0xa) == 8); - } - if (exn) { - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - if (is_long_mode(vcpu)) { - /* Long mode: #GP(0)/#SS(0) if the memory address is in a - * non-canonical form. This is an only check for long mode. - */ - exn = is_noncanonical_address(*ret); - } else if (is_protmode(vcpu)) { + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902d9da12392..7da5dd2057a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); static unsigned long max_tsc_khz; -static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) -{ - return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, - vcpu->arch.virtual_tsc_shift); -} - static u32 adjust_tsc_khz(u32 khz, s32 ppm) { u64 v = (u64)khz * (1000000 + ppm); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7ce3634ab5fe..a82ca466b62e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,6 +2,7 @@ #define ARCH_X86_KVM_X86_H #include <linux/kvm_host.h> +#include <asm/pvclock.h> #include "kvm_cache_regs.h" #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL @@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) +{ + return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, + vcpu->arch.virtual_tsc_shift); +} + /* Same "calling convention" as do_div: * - divide (n << 32) by base * - put result in n diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1b1110fa0057..0493c17b8a51 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -54,8 +54,8 @@ static int kasan_die_handler(struct notifier_block *self, void *data) { if (val == DIE_GPF) { - pr_emerg("CONFIG_KASAN_INLINE enabled"); - pr_emerg("GPF could be caused by NULL-ptr deref or user memory access"); + pr_emerg("CONFIG_KASAN_INLINE enabled\n"); + pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n"); } return NOTIFY_OK; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 4eb287e25043..aa0ff4b02a96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -6,7 +6,7 @@ #include <asm/fixmap.h> #include <asm/mtrr.h> -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO #ifdef CONFIG_HIGHPTE #define PGALLOC_USER_GFP __GFP_HIGHMEM diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index b2a4e2a61f6b..3cd69832d7f4 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -396,6 +396,7 @@ int __init pci_acpi_init(void) return -ENODEV; printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; x86_init.pci.init_irq = x86_init_noop; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6e7242be1c87..b226b3f497f1 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_page(gfp_mask); if (!efi_pgd) return -ENOMEM; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 009947d419a6..f2b5e6a5cf95 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -19,6 +19,7 @@ #include <asm/mtrr.h> #include <asm/sections.h> #include <asm/suspend.h> +#include <asm/tlbflush.h> /* Defined in hibernate_asm_64.S */ extern asmlinkage __visible int restore_image(void); @@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void); * kernel's text (this value is passed in the image header). */ unsigned long restore_jump_address __visible; +unsigned long jump_address_phys; /* * Value of the cr3 register from before the hibernation (this value is passed @@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible; pgd_t *temp_level4_pgt __visible; -void *relocated_restore_code __visible; +unsigned long relocated_restore_code __visible; + +static int set_up_temporary_text_mapping(void) +{ + pmd_t *pmd; + pud_t *pud; + + /* + * The new mapping only has to cover the page containing the image + * kernel's entry point (jump_address_phys), because the switch over to + * it is carried out by relocated code running from a page allocated + * specifically for this purpose and covered by the identity mapping, so + * the temporary kernel text mapping is only needed for the final jump. + * Moreover, in that mapping the virtual address of the image kernel's + * entry point must be the same as its virtual address in the image + * kernel (restore_jump_address), so the image kernel's + * restore_registers() code doesn't find itself in a different area of + * the virtual address space after switching over to the original page + * tables used by the image kernel. + */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + + set_pmd(pmd + pmd_index(restore_jump_address), + __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); + set_pud(pud + pud_index(restore_jump_address), + __pud(__pa(pmd) | _KERNPG_TABLE)); + set_pgd(temp_level4_pgt + pgd_index(restore_jump_address), + __pgd(__pa(pud) | _KERNPG_TABLE)); + + return 0; +} static void *alloc_pgt_page(void *context) { @@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void) if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ - set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), - init_level4_pgt[pgd_index(__START_KERNEL_map)]); + /* Prepare a temporary mapping for the kernel text */ + result = set_up_temporary_text_mapping(); + if (result) + return result; /* Set up the direct mapping from scratch */ for (i = 0; i < nr_pfn_mapped; i++) { @@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void) return 0; } +static int relocate_restore_code(void) +{ + pgd_t *pgd; + pud_t *pud; + + relocated_restore_code = get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + + memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE); + + /* Make the page containing the relocated code executable */ + pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code); + pud = pud_offset(pgd, relocated_restore_code); + if (pud_large(*pud)) { + set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); + } else { + pmd_t *pmd = pmd_offset(pud, relocated_restore_code); + + if (pmd_large(*pmd)) { + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); + } else { + pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code); + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); + } + } + __flush_tlb_all(); + + return 0; +} + int swsusp_arch_resume(void) { int error; /* We have got enough memory and from now on we cannot recover */ - if ((error = set_up_temporary_mappings())) + error = set_up_temporary_mappings(); + if (error) return error; - relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); - if (!relocated_restore_code) - return -ENOMEM; - memcpy(relocated_restore_code, &core_restore_code, - &restore_registers - &core_restore_code); + error = relocate_restore_code(); + if (error) + return error; restore_image(); return 0; @@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn) struct restore_data_record { unsigned long jump_address; + unsigned long jump_address_phys; unsigned long cr3; unsigned long magic; }; -#define RESTORE_MAGIC 0x0123456789ABCDEFUL +#define RESTORE_MAGIC 0x123456789ABCDEF0UL /** * arch_hibernation_header_save - populate the architecture specific part @@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) if (max_size < sizeof(struct restore_data_record)) return -EOVERFLOW; - rdr->jump_address = restore_jump_address; + rdr->jump_address = (unsigned long)&restore_registers; + rdr->jump_address_phys = __pa_symbol(&restore_registers); rdr->cr3 = restore_cr3; rdr->magic = RESTORE_MAGIC; return 0; @@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr) struct restore_data_record *rdr = addr; restore_jump_address = rdr->jump_address; + jump_address_phys = rdr->jump_address_phys; restore_cr3 = rdr->cr3; return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 4400a43b9e28..3177c2bc26f6 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend) pushfq popq pt_regs_flags(%rax) - /* save the address of restore_registers */ - movq $restore_registers, %rax - movq %rax, restore_jump_address(%rip) /* save cr3 */ movq %cr3, %rax movq %rax, restore_cr3(%rip) @@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend) ENDPROC(swsusp_arch_suspend) ENTRY(restore_image) - /* switch to temporary page tables */ - movq $__PAGE_OFFSET, %rdx - movq temp_level4_pgt(%rip), %rax - subq %rdx, %rax - movq %rax, %cr3 - /* Flush TLB */ - movq mmu_cr4_features(%rip), %rax - movq %rax, %rdx - andq $~(X86_CR4_PGE), %rdx - movq %rdx, %cr4; # turn off PGE - movq %cr3, %rcx; # flush TLB - movq %rcx, %cr3; - movq %rax, %cr4; # turn PGE back on - /* prepare to jump to the image kernel */ - movq restore_jump_address(%rip), %rax - movq restore_cr3(%rip), %rbx + movq restore_jump_address(%rip), %r8 + movq restore_cr3(%rip), %r9 + + /* prepare to switch to temporary page tables */ + movq temp_level4_pgt(%rip), %rax + movq mmu_cr4_features(%rip), %rbx /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx + + /* jump to relocated restore code */ movq relocated_restore_code(%rip), %rcx jmpq *%rcx /* code below has been relocated to a safe page */ ENTRY(core_restore_code) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rcx + subq %rcx, %rax + movq %rax, %cr3 + /* flush TLB */ + movq %rbx, %rcx + andq $~(X86_CR4_PGE), %rcx + movq %rcx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + movq %rbx, %cr4; # turn PGE back on .Lloop: testq %rdx, %rdx jz .Ldone @@ -96,24 +96,17 @@ ENTRY(core_restore_code) /* progress to the next pbe */ movq pbe_next(%rdx), %rdx jmp .Lloop + .Ldone: /* jump to the restore_registers address from the image header */ - jmpq *%rax - /* - * NOTE: This assumes that the boot kernel's text mapping covers the - * image kernel's page containing restore_registers and the address of - * this page is the same as in the image kernel's text mapping (it - * should always be true, because the text mapping is linear, starting - * from 0, and is supposed to cover the entire kernel text for every - * kernel). - * - * code below belongs to the image kernel - */ + jmpq *%r8 + /* code below belongs to the image kernel */ + .align PAGE_SIZE ENTRY(restore_registers) FRAME_BEGIN /* go back to the original page tables */ - movq %rbx, %cr3 + movq %r9, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 478a2de543a5..67433714b791 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr, /* NOTE: The loop is more greedy than the cleanup_highmap variant. * We include the PMD passed in on _both_ boundaries. */ - for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD)); pmd++, vaddr += PMD_SIZE) { if (pmd_none(*pmd)) continue; @@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } -#ifdef CONFIG_X86_32 -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} -#else /* CONFIG_X86_64 */ -static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) -{ - unsigned long pfn; - - if (xen_feature(XENFEAT_writable_page_tables) || - xen_feature(XENFEAT_auto_translated_physmap) || - xen_start_info->mfn_list >= __START_KERNEL_map) - return pte; - - /* - * Pages belonging to the initial p2m list mapped outside the default - * address range must be mapped read-only. This region contains the - * page tables for mapping the p2m list, too, and page tables MUST be - * mapped read-only. - */ - pfn = pte_pfn(pte); - if (pfn >= xen_start_info->first_p2m_pfn && - pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) - pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW); - - return pte; -} -#endif /* CONFIG_X86_64 */ - /* * Init-time set_pte while constructing initial pagetables, which * doesn't allow RO page table pages to be remapped RW. @@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) * so always write the PTE directly and rely on Xen trapping and * emulating any updates as necessary. */ -static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +__visible pte_t xen_make_pte_init(pteval_t pte) { - if (pte_mfn(pte) != INVALID_P2M_ENTRY) - pte = mask_rw_pte(ptep, pte); - else - pte = __pte_ma(0); +#ifdef CONFIG_X86_64 + unsigned long pfn; + + /* + * Pages belonging to the initial p2m list mapped outside the default + * address range must be mapped read-only. This region contains the + * page tables for mapping the p2m list, too, and page tables MUST be + * mapped read-only. + */ + pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT; + if (xen_start_info->mfn_list < __START_KERNEL_map && + pfn >= xen_start_info->first_p2m_pfn && + pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames) + pte &= ~_PAGE_RW; +#endif + pte = pte_pfn_to_mfn(pte); + return native_make_pte(pte); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init); +static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) +{ +#ifdef CONFIG_X86_32 + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_mfn(pte) != INVALID_P2M_ENTRY + && pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); +#endif native_set_pte(ptep, pte); } @@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte); #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; @@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .pte_val = PV_CALLEE_SAVE(xen_pte_val), .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pte = PV_CALLEE_SAVE(xen_make_pte_init), .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab9f766bb06..dd2a49a8aacc 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void) if (unlikely(!slab_is_available())) return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); - return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); + return (void *)__get_free_page(GFP_KERNEL); } static void __ref free_p2m_page(void *p) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index d38eb9237e64..1065bc8bcae5 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pte_t *ptep; int i; - ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + ptep = (pte_t *)__get_free_page(GFP_KERNEL); if (!ptep) return NULL; for (i = 0; i < 1024; i++) |