diff options
Diffstat (limited to 'arch')
55 files changed, 585 insertions, 290 deletions
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h index d953e234daa8..88971460fa6c 100644 --- a/arch/alpha/include/asm/hardirq.h +++ b/arch/alpha/include/asm/hardirq.h @@ -14,17 +14,4 @@ typedef struct { void ack_bad_irq(unsigned int irq); -#define HARDIRQ_BITS 12 - -/* - * The hardirq mask has to be large enough to have - * space for potentially nestable IRQ sources in the system - * to nest on a single CPU. On Alpha, interrupts are masked at the CPU - * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode) - * so we really only have 8 nestable IRQs, but allow some overhead - */ -#if (1 << HARDIRQ_BITS) < 16 -#error HARDIRQ_BITS is too low! -#endif - #endif /* _ALPHA_HARDIRQ_H */ diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index f238370c907d..8d0097f10208 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -93,8 +93,8 @@ common_shutdown_1(void *generic_ptr) if (cpuid != boot_cpuid) { flags |= 0x00040000UL; /* "remain halted" */ *pflags = flags; - cpu_clear(cpuid, cpu_present_map); - cpu_clear(cpuid, cpu_possible_map); + set_cpu_present(cpuid, false); + set_cpu_possible(cpuid, false); halt(); } #endif @@ -120,8 +120,8 @@ common_shutdown_1(void *generic_ptr) #ifdef CONFIG_SMP /* Wait for the secondaries to halt. */ - cpu_clear(boot_cpuid, cpu_present_map); - cpu_clear(boot_cpuid, cpu_possible_map); + set_cpu_present(boot_cpuid, false); + set_cpu_possible(boot_cpuid, false); while (cpus_weight(cpu_present_map)) barrier(); #endif diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 00f1dc3dfd5f..b1fe5674c3a1 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -120,12 +120,12 @@ void __cpuinit smp_callin(void) { int cpuid = hard_smp_processor_id(); - cpumask_t mask = cpu_online_map; - if (cpu_test_and_set(cpuid, mask)) { + if (cpu_online(cpuid)) { printk("??, cpu 0x%x already present??\n", cpuid); BUG(); } + set_cpu_online(cpuid, true); /* Turn on machine checks. */ wrmces(7); @@ -436,8 +436,8 @@ setup_smp(void) ((char *)cpubase + i*hwrpb->processor_size); if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; - cpu_set(i, cpu_possible_map); - cpu_set(i, cpu_present_map); + set_cpu_possible(i, true); + set_cpu_present(i, true); cpu->pal_revision = boot_cpu_palrev; } @@ -470,8 +470,8 @@ smp_prepare_cpus(unsigned int max_cpus) /* Nothing to do on a UP box, or when told not to. */ if (smp_num_probed == 1 || max_cpus == 0) { - cpu_possible_map = cpumask_of_cpu(boot_cpuid); - cpu_present_map = cpumask_of_cpu(boot_cpuid); + init_cpu_possible(cpumask_of(boot_cpuid)); + init_cpu_present(cpumask_of(boot_cpuid)); printk(KERN_INFO "SMP mode deactivated.\n"); return; } diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h index 267354356f60..015bc75ea798 100644 --- a/arch/avr32/include/asm/hardirq.h +++ b/arch/avr32/include/asm/hardirq.h @@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq); #endif /* __ASSEMBLY__ */ -#define HARDIRQ_BITS 12 - -/* - * The hardirq mask has to be large enough to have - * space for potentially all IRQ sources in the system - * nesting on a single CPU: - */ -#if (1 << HARDIRQ_BITS) < NR_IRQS -# error HARDIRQ_BITS is too low! -#endif - #endif /* __ASM_AVR32_HARDIRQ_H */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6183aeccecf1..8b6a8a554afa 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -22,6 +22,9 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_DYNAMIC_FTRACE if (!ITANIUM) + select HAVE_FUNCTION_TRACER select HAVE_DMA_ATTRS select HAVE_KVM select HAVE_ARCH_TRACEHOOK diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h new file mode 100644 index 000000000000..d20db3c2a656 --- /dev/null +++ b/arch/ia64/include/asm/ftrace.h @@ -0,0 +1,28 @@ +#ifndef _ASM_IA64_FTRACE_H +#define _ASM_IA64_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0); +#define mcount _mcount + +#include <asm/kprobes.h> +/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */ +#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip) +#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip) + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* second bundle, insn 2 */ + return addr - 0x12; +} + +struct dyn_arch_ftrace { +}; +#endif + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_IA64_FTRACE_H */ diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index 140e495b8e0e..d514cd9edb49 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h @@ -20,16 +20,6 @@ #define local_softirq_pending() (local_cpu_data->softirq_pending) -#define HARDIRQ_BITS 14 - -/* - * The hardirq mask has to be large enough to have space for potentially all IRQ sources - * in the system nesting on a single CPU: - */ -#if (1 << HARDIRQ_BITS) < NR_IRQS -# error HARDIRQ_BITS is too low! -#endif - extern void __iomem *ipi_base_addr; void ack_bad_irq(unsigned int irq); diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 68aa6da807c1..bfa86b6af7cd 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -25,6 +25,10 @@ #include <linux/ioctl.h> +/* Select x86 specific features in <linux/kvm.h> */ +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h index 34efe88eb849..f2ca32069b3f 100644 --- a/arch/ia64/include/asm/mmzone.h +++ b/arch/ia64/include/asm/mmzone.h @@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif - #ifdef CONFIG_IA64_DIG /* DIG systems are small */ # define MAX_PHYSNODE_ID 8 # define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index c381ea954892..ab6e7ec0bba3 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -2,6 +2,10 @@ # Makefile for the linux kernel. # +ifdef CONFIG_DYNAMIC_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +endif + extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ @@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index e5341e2c1175..7e3382b06d56 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -47,6 +47,7 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/unistd.h> +#include <asm/ftrace.h> #include "minstate.h" @@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running) br.ret.sptk.many rp END(unw_init_running) +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +GLOBAL_ENTRY(_mcount) + br ftrace_stub +END(_mcount) + +.here: + br.ret.sptk.many b0 + +GLOBAL_ENTRY(ftrace_caller) + alloc out0 = ar.pfs, 8, 0, 4, 0 + mov out3 = r0 + ;; + mov out2 = b0 + add r3 = 0x20, r3 + mov out1 = r1; + br.call.sptk.many b0 = ftrace_patch_gp + //this might be called from module, so we must patch gp +ftrace_patch_gp: + movl gp=__gp + mov b0 = r3 + ;; +.global ftrace_call; +ftrace_call: +{ + .mlx + nop.m 0x0 + movl r3 = .here;; +} + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(ftrace_caller) + +#else +GLOBAL_ENTRY(_mcount) + movl r2 = ftrace_stub + movl r3 = ftrace_trace_function;; + ld8 r3 = [r3];; + ld8 r3 = [r3];; + cmp.eq p7,p0 = r2, r3 +(p7) br.sptk.many ftrace_stub + ;; + + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(_mcount) +#endif + +GLOBAL_ENTRY(ftrace_stub) + mov r3 = b0 + movl r2 = _mcount_ret_helper + ;; + mov b6 = r2 + mov b7 = r3 + br.ret.sptk.many b6 + +_mcount_ret_helper: + mov b0 = r42 + mov r1 = r41 + mov ar.pfs = r40 + br b7 +END(ftrace_stub) + +#endif /* CONFIG_FUNCTION_TRACER */ + .rodata .align 8 .globl sys_call_table diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c new file mode 100644 index 000000000000..7fc8c961b1f7 --- /dev/null +++ b/arch/ia64/kernel/ftrace.c @@ -0,0 +1,206 @@ +/* + * Dynamic function tracing support. + * + * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com> + * + * For licencing details, see COPYING. + * + * Defines low-level handling of mcount calls when the kernel + * is compiled with the -pg flag. When using dynamic ftrace, the + * mcount call-sites get patched lazily with NOP till they are + * enabled. All code mutation routines here take effect atomically. + */ + +#include <linux/uaccess.h> +#include <linux/ftrace.h> + +#include <asm/cacheflush.h> +#include <asm/patch.h> + +/* In IA64, each function will be added below two bundles with -pg option */ +static unsigned char __attribute__((aligned(8))) +ftrace_orig_code[MCOUNT_INSN_SIZE] = { + 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */ + 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */ + 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */ + 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */ + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */ + 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */ +}; + +struct ftrace_orig_insn { + u64 dummy1, dummy2, dummy3; + u64 dummy4:64-41+13; + u64 imm20:20; + u64 dummy5:3; + u64 sign:1; + u64 dummy6:4; +}; + +/* mcount stub will be converted below for nop */ +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */ + 0x00, 0x00, 0x04, 0x00 +}; + +static unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop_code; +} + +/* + * mcount stub will be converted below for call + * Note: Just the last instruction is changed against nop + * */ +static unsigned char __attribute__((aligned(8))) +ftrace_call_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/ + 0xf8, 0xff, 0xff, 0xc8 +}; + +struct ftrace_call_insn { + u64 dummy1, dummy2; + u64 dummy3:48; + u64 imm39_l:16; + u64 imm39_h:23; + u64 dummy4:13; + u64 imm20:20; + u64 dummy5:3; + u64 i:1; + u64 dummy6:4; +}; + +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + struct ftrace_call_insn *code = (void *)ftrace_call_code; + unsigned long offset = addr - (ip + 0x10); + + code->imm39_l = offset >> 24; + code->imm39_h = offset >> 40; + code->imm20 = offset >> 4; + code->i = offset >> 63; + return ftrace_call_code; +} + +static int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code, int do_check) +{ + unsigned char replaced[MCOUNT_INSN_SIZE]; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. We do this by using the + * probe_kernel_* functions. + * + * No real locking needed, this code is run through + * kstop_machine, or before SMP starts. + */ + + if (!do_check) + goto skip_check; + + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + +skip_check: + /* replace the text with the new text */ + if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); + + return 0; +} + +static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; + unsigned long ip = rec->ip; + + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + if (rec->flags & FTRACE_FL_CONVERTED) { + struct ftrace_call_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_call_code; + tmp_call = (void *)replaced; + call_insn->imm39_l = tmp_call->imm39_l; + call_insn->imm39_h = tmp_call->imm39_h; + call_insn->imm20 = tmp_call->imm20; + call_insn->i = tmp_call->i; + if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } else { + struct ftrace_orig_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_orig_code; + tmp_call = (void *)replaced; + call_insn->sign = tmp_call->sign; + call_insn->imm20 = tmp_call->imm20; + if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + int ret; + char *new; + + ret = ftrace_make_nop_check(rec, addr); + if (ret) + return ret; + new = ftrace_nop_replace(); + return ftrace_modify_code(rec->ip, NULL, new, 0); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned char *old, *new; + + old= ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new, 1); +} + +/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip; + unsigned long addr = ((struct fnptr *)ftrace_call)->ip; + + if (func == ftrace_stub) + return 0; + ip = ((struct fnptr *)func)->ip; + + ia64_patch_imm64(addr + 2, ip); + + flush_icache_range(addr, addr + 16); + return 0; +} + +/* run from kstop_machine */ +int __init ftrace_dyn_arch_init(void *data) +{ + *(unsigned long *)data = 0; + + return 0; +} diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 6da1f20d7372..2d311864e359 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys); #endif extern char ia64_ivt[]; EXPORT_SYMBOL(ia64_ivt); + +#include <asm/ftrace.h> +#ifdef CONFIG_FUNCTION_TRACER +/* mcount is defined in assembly */ +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4e586f6110aa..28f982045f29 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index 552d07724207..230eae482f32 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -455,13 +455,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, if (!vmm_fpswa_interface) return (fpswa_ret_t) {-1, 0, 0, 0}; - /* - * Just let fpswa driver to use hardware fp registers. - * No fp register is valid in memory. - */ memset(&fp_state, 0, sizeof(fp_state_t)); /* + * compute fp_state. only FP registers f6 - f11 are used by the + * vmm, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; + + /* * unsigned long (*EFI_FPSWA) ( * unsigned long trap_type, * void *Bundle, @@ -545,10 +550,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim, status = vmm_handle_fpu_swa(0, regs, isr); if (!status) return ; - else if (-EAGAIN == status) { - vcpu_decrement_iip(vcpu); - return ; - } break; } diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index b73bf1838e57..3efea7d0a351 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr) * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where * the section resides. */ -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; @@ -70,7 +70,7 @@ int early_pfn_to_nid(unsigned long pfn) return node_memblk[i].nid; } - return 0; + return -1; } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/powerpc/include/asm/pgtable-4k.h b/arch/powerpc/include/asm/pgtable-4k.h index 6b18ba9d2d85..1dbca4e7de67 100644 --- a/arch/powerpc/include/asm/pgtable-4k.h +++ b/arch/powerpc/include/asm/pgtable-4k.h @@ -60,7 +60,7 @@ /* It should be preserving the high 48 bits and then specifically */ /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_HPTEFLAGS) + _PAGE_HPTEFLAGS | _PAGE_SPECIAL) /* Bits to mask out from a PMD to get to the PTE page */ #define PMD_MASKED_BITS 0 diff --git a/arch/powerpc/include/asm/pgtable-64k.h b/arch/powerpc/include/asm/pgtable-64k.h index 07b0d8f09cb6..7389003349a6 100644 --- a/arch/powerpc/include/asm/pgtable-64k.h +++ b/arch/powerpc/include/asm/pgtable-64k.h @@ -114,7 +114,7 @@ static inline struct subpage_prot_table *pgd_subpage_prot(pgd_t *pgd) * pgprot changes */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ - _PAGE_ACCESSED) + _PAGE_ACCESSED | _PAGE_SPECIAL) /* Bits to mask out from a PMD to get to the PTE page */ #define PMD_MASKED_BITS 0x1ff diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index f69a4d977729..820b5f0a35ce 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -429,7 +429,8 @@ extern int icache_44x_need_flush; #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 5af4e9b2dbe2..ada06924a423 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -646,11 +646,16 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg, unsigned int areg, struct pt_regs *regs, unsigned int flags, unsigned int length) { - char *ptr = (char *) ¤t->thread.TS_FPR(reg); + char *ptr; int ret = 0; flush_vsx_to_thread(current); + if (reg < 32) + ptr = (char *) ¤t->thread.TS_FPR(reg); + else + ptr = (char *) ¤t->thread.vr[reg - 32]; + if (flags & ST) ret = __copy_to_user(addr, ptr, length); else { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2822c8ccfaaf..5f81256287f5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvmppc_free_vcpus(kvm); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 7393bd76d698..5ac08b8ab654 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <linux/lmb.h> #include <linux/of.h> +#include <linux/pfn.h> #include <asm/sparsemem.h> #include <asm/prom.h> #include <asm/system.h> @@ -882,7 +883,7 @@ static void mark_reserved_regions_for_nid(int nid) unsigned long physbase = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; unsigned long start_pfn = physbase >> PAGE_SHIFT; - unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT); + unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; unsigned long node_end_pfn = node->node_start_pfn + node->node_spanned_pages; @@ -908,7 +909,7 @@ static void mark_reserved_regions_for_nid(int nid) */ if (end_pfn > node_ar.end_pfn) reserve_size = (node_ar.end_pfn << PAGE_SHIFT) - - (start_pfn << PAGE_SHIFT); + - physbase; /* * Only worry about *this* node, others may not * yet have valid NODE_DATA(). diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 67de6bf3db3d..d281cc0bca71 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -328,7 +328,7 @@ static int __init ps3_mm_add_memory(void) return result; } -core_initcall(ps3_mm_add_memory); +device_initcall(ps3_mm_add_memory); /*============================================================================*/ /* dma routines */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index be8497186b96..0d33893e1e89 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9c39095b33fc..8fc9a847cf48 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,6 +34,7 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 10d6cc3fd052..e1983fa025d2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -174,28 +174,8 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PCI - select TRACING - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/tracers/mmiotrace.txt. - If you are not helping to develop drivers, say N. - -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. +config HAVE_MMIOTRACE_SUPPORT + def_bool y # # IO delay types: diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608af..886c9402ec45 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include <linux/types.h> #include <linux/ioctl.h> +/* Select x86 specific features in <linux/kvm.h> */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca5..105fb90a0635 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) get_memcfg_numa_flat(); } -extern int early_pfn_to_nid(unsigned long pfn); - extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9e..a29f48c2a322 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -extern int early_pfn_to_nid(unsigned long pfn); - #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e8695..776579119a00 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c09a14127584..e299287e8e33 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void) PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_cpu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } -} - +void arch_flush_lazy_cpu_mode(void); #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) @@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -static inline void arch_flush_lazy_mmu_mode(void) -{ - if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } -} +void arch_flush_lazy_mmu_mode(void); static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, unsigned long phys, pgprot_t flags) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c3..c5d737cdb365 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,7 +33,7 @@ #include <linux/cpufreq.h> #include <linux/compiler.h> #include <linux/dmi.h> -#include <linux/ftrace.h> +#include <trace/power.h> #include <linux/acpi.h> #include <acpi/processor.h> @@ -70,6 +70,8 @@ struct acpi_cpufreq_data { static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); +DEFINE_TRACE(power_mark); + /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance *acpi_perf_data; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index fb039cd345d8..6428aa17b40e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - rc = powernow_k8_cpu_init_acpi(data); - if (rc) { + if (powernow_k8_cpu_init_acpi(data)) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - goto err_out; + kfree(data); + return -ENODEV; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - goto err_out; + kfree(data); + return -ENODEV; } rc = find_psb_table(data); if (rc) { - goto err_out; + kfree(data); + return -ENODEV; } /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c0852291b623..4325165753d8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/nmi.h> #include <linux/sysfs.h> +#include <linux/ftrace.h> #include <asm/stacktrace.h> @@ -196,6 +197,11 @@ unsigned __kprobes long oops_begin(void) int cpu; unsigned long flags; + /* notify the hw-branch tracer so it may disable tracing and + add the last trace to the trace buffer - + the earlier this happens, the more useful the trace. */ + trace_hw_branch_oops(); + oops_enter(); /* racy, but better than risking deadlock. */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 76f7141e0f91..c2e057d9f88c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -82,7 +82,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * are the same as what exists. */ -static atomic_t in_nmi = ATOMIC_INIT(0); +static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ static int mod_code_write; /* set when NMI should do the write */ static void *mod_code_ip; /* holds the IP to write to */ @@ -115,8 +115,8 @@ static void ftrace_mod_code(void) void ftrace_nmi_enter(void) { - atomic_inc(&in_nmi); - /* Must have in_nmi seen before reading write flag */ + atomic_inc(&nmi_running); + /* Must have nmi_running seen before reading write flag */ smp_mb(); if (mod_code_write) { ftrace_mod_code(); @@ -126,22 +126,21 @@ void ftrace_nmi_enter(void) void ftrace_nmi_exit(void) { - /* Finish all executions before clearing in_nmi */ + /* Finish all executions before clearing nmi_running */ smp_wmb(); - atomic_dec(&in_nmi); + atomic_dec(&nmi_running); } static void wait_for_nmi(void) { - int waited = 0; + if (!atomic_read(&nmi_running)) + return; - while (atomic_read(&in_nmi)) { - waited = 1; + do { cpu_relax(); - } + } while (atomic_read(&nmi_running)); - if (waited) - nmi_wait_count++; + nmi_wait_count++; } static int @@ -368,25 +367,6 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_mod_jmp(ip, old_offset, new_offset); } -#else /* CONFIG_DYNAMIC_FTRACE */ - -/* - * These functions are picked from those used on - * this page for dynamic ftrace. They have been - * simplified to ignore all traces in NMI context. - */ -static atomic_t in_nmi; - -void ftrace_nmi_enter(void) -{ - atomic_inc(&in_nmi); -} - -void ftrace_nmi_exit(void) -{ - atomic_dec(&in_nmi); -} - #endif /* !CONFIG_DYNAMIC_FTRACE */ /* @@ -403,7 +383,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) &return_to_handler; /* Nmi's are currently unsupported */ - if (unlikely(atomic_read(&in_nmi))) + if (unlikely(in_nmi())) return; if (unlikely(atomic_read(¤t->tracing_graph_pause))) @@ -439,13 +419,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - if (unlikely(!__kernel_text_address(old))) { - ftrace_graph_stop(); - *parent = old; - WARN_ON(1); - return; - } - calltime = cpu_clock(raw_smp_processor_id()); if (ftrace_push_return_trace(old, calltime, diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 388254f69a2a..a00545fe5cdd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode, now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; cfg = hpet_readl(HPET_Tn_CFG(timer)); + /* Make sure we use edge triggered interrupts */ + cfg &= ~HPET_TN_LEVEL; cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; hpet_writel(cfg, HPET_Tn_CFG(timer)); diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 7a13fac63a1f..4006c522adc7 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = 0xc2; + olpc_platform_info.boardrev = olpc_board(0xc2); } #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb608873..c6520a4e85d4 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void arch_flush_lazy_mmu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + WARN_ON(preempt_count() == 1); + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + +void arch_flush_lazy_cpu_mode(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { + WARN_ON(preempt_count() == 1); + arch_leave_lazy_cpu_mode(); + arch_enter_lazy_cpu_mode(); + } + + preempt_enable(); +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6d12f7e37f8c..23b328edc2b3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,7 +8,7 @@ #include <linux/module.h> #include <linux/pm.h> #include <linux/clockchips.h> -#include <linux/ftrace.h> +#include <trace/power.h> #include <asm/system.h> #include <asm/apic.h> @@ -19,6 +19,9 @@ EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +DEFINE_TRACE(power_start); +DEFINE_TRACE(power_end); + int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..5a4c23d89892 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7932338d7cb3..a9e7548e1790 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs) local_irq_enable(); } +static inline void conditional_cli(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -626,8 +632,10 @@ clear_dr7: #ifdef CONFIG_X86_32 debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - preempt_conditional_cli(regs); + conditional_cli(regs); return; #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b81125f0bdee..c7da3683f4c5 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -55,7 +55,8 @@ config KVM_AMD config KVM_TRACE bool "KVM trace support" - depends on KVM && MARKERS && SYSFS + depends on KVM && SYSFS + select MARKERS select RELAY select DEBUG_FS default n diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index e665d1c623ca..72bd275a9b5c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) hrtimer_add_expires_ns(&pt->timer, pt->period); pt->scheduled = hrtimer_get_expires_ns(&pt->timer); if (pt->period) - ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); + ps->channels[0].count_load_time = ktime_get(); return (pt->period == 0 ? 0 : 1); } diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index c019b8edcdb7..cf17ed52f6fb 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - kvm_apic_timer_intr_post(vcpu, vec); - /* TODO: PIT, RTC etc. */ -} -EXPORT_SYMBOL_GPL(kvm_timer_intr_post); - void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2bf32a03ceec..82579ee538d0 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afac68c0815c..f0b67f2cdd69 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -35,6 +35,12 @@ #include "kvm_cache_regs.h" #include "irq.h" +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + #define PRId64 "d" #define PRIx64 "llx" #define PRIu64 "u" @@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic) { - u64 counter_passed; - ktime_t passed, now; + ktime_t remaining; + s64 ns; u32 tmcct; ASSERT(apic != NULL); - now = apic->timer.dev.base->get_time(); - tmcct = apic_get_reg(apic, APIC_TMICT); - /* if initial count is 0, current count should also be 0 */ - if (tmcct == 0) + if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - if (unlikely(ktime_to_ns(now) <= - ktime_to_ns(apic->timer.last_update))) { - /* Wrap around */ - passed = ktime_add(( { - (ktime_t) { - .tv64 = KTIME_MAX - - (apic->timer.last_update).tv64}; } - ), now); - apic_debug("time elapsed\n"); - } else - passed = ktime_sub(now, apic->timer.last_update); - - counter_passed = div64_u64(ktime_to_ns(passed), - (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); - - if (counter_passed > tmcct) { - if (unlikely(!apic_lvtt_period(apic))) { - /* one-shot timers stick at 0 until reset */ - tmcct = 0; - } else { - /* - * periodic timers reset to APIC_TMICT when they - * hit 0. The while loop simulates this happening N - * times. (counter_passed %= tmcct) would also work, - * but might be slower or not work on 32-bit?? - */ - while (counter_passed > tmcct) - counter_passed -= tmcct; - tmcct -= counter_passed; - } - } else { - tmcct -= counter_passed; - } + remaining = hrtimer_expires_remaining(&apic->timer.dev); + if (ktime_to_ns(remaining) < 0) + remaining = ktime_set(0, 0); + + ns = mod_64(ktime_to_ns(remaining), apic->timer.period); + tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); return tmcct; } @@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->timer.dev.base->get_time(); - apic->timer.last_update = now; - apic->timer.period = apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->timer.divide_count; atomic_set(&apic->timer.pending, 0); @@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) - apic->timer.last_update = ktime_add_ns( - apic->timer.last_update, - apic->timer.period); -} - int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 81858881287e..45ab6ee71209 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,7 +12,6 @@ struct kvm_lapic { atomic_t pending; s64 period; /* unit: ns */ u32 divide_count; - ktime_t last_update; struct hrtimer dev; } timer; struct kvm_vcpu *vcpu; @@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83f11c7474a1..2d4477c71473 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (largepage) spte |= PT_PAGE_SIZE_MASK; if (mt_mask) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); + if (!kvm_is_mmio_pfn(pfn)) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + mt_mask |= VMX_EPT_IGMT_BIT; + } else + mt_mask = MTRR_TYPE_UNCACHABLE << + kvm_x86_ops->get_mt_mask_shift(); spte |= mt_mask; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1452851ae258..a9e769e4e251 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); - kvm_timer_intr_post(vcpu, intr_vector); out: update_cr8_intercept(vcpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6259d7467648..7611af576829 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; @@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) } if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); if (kvm_cpu_has_interrupt(vcpu)) enable_irq_window(vcpu); } @@ -3687,8 +3687,7 @@ static int __init vmx_init(void) if (vm_need_ept()) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_IGMT_BIT); + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546a2406..758b7a155ae9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: - case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; + case KVM_CAP_CLOCKSOURCE: + r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); + break; default: r = 0; break; @@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm) } -void kvm_arch_destroy_vm(struct kvm *kvm) +void kvm_arch_sync_events(struct kvm *kvm) { kvm_free_all_assigned_devices(kvm); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af750ab973b6..f45d5e29a72e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr) return 0; } -int pagerange_is_ram(unsigned long start, unsigned long end) -{ - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; - - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - if (page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; - } - - return ram_page; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89e..f3516da035d1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, return shift; } -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { return phys_to_nid(pfn << PAGE_SHIFT); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 84ba74820ad6..8ca0d8566fc8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); + /* + * If we're called with lazy mmu updates enabled, the + * in-memory pte state may be stale. Flush pending updates to + * bring them up to date. + */ + arch_flush_lazy_mmu_mode(); + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; @@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); + /* + * If we've been called with lazy mmu updates enabled, then + * make sure that everything gets flushed out before we + * return. + */ + arch_flush_lazy_mmu_mode(); + out: return ret; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7b61036427df..aebbf67a79d0 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) static struct memtype *cached_entry; static u64 cached_start; +static int pat_pagerange_is_ram(unsigned long start, unsigned long end) +{ + int ram_page = 0, not_rampage = 0; + unsigned long page_nr; + + for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); + ++page_nr) { + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && + page_is_ram(page_nr)) + ram_page = 1; + else + not_rampage = 1; + + if (ram_page == not_rampage) + return -1; + } + + return ram_page; +} + /* * For RAM pages, mark the pages as non WB memory type using * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or @@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) *new_type = actual_type; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return reserve_ram_pages_type(start, end, req_type, + new_type); + else if (is_range_ram < 0) + return -EINVAL; new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end) if (is_ISA_range(start, end - 1)) return 0; - /* - * For legacy reasons, some parts of the physical address range in the - * legacy 1MB region is treated as non-RAM (even when listed as RAM in - * the e820 tables). So we will track the memory attributes of this - * legacy 1MB region using the linear memtype_list always. - */ - if (end >= ISA_END_ADDRESS) { - is_range_ram = pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) - return -EINVAL; - } + is_range_ram = pat_pagerange_is_ram(start, end); + if (is_range_ram == 1) + return free_ram_pages_type(start, end); + else if (is_range_ram < 0) + return -EINVAL; spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { @@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); - if (is_ram != 0) { - /* - * For mapping RAM pages, drivers need to call - * set_memory_[uc|wc|wb] directly, for reserve and free, before - * setting up the PTE. - */ - WARN_ON_ONCE(1); - return 0; - } + /* + * reserve_pfn_range() doesn't support RAM pages. + */ + if (is_ram != 0) + return -EINVAL; ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) @@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size) { int is_ram; - is_ram = pagerange_is_ram(paddr, paddr + size); + is_ram = pat_pagerange_is_ram(paddr, paddr + size); if (is_ram == 0) free_memtype(paddr, paddr + size); } |