diff options
Diffstat (limited to 'arch/x86/include')
37 files changed, 555 insertions, 142 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 55d106b5e31b..211ca3f7fd16 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -185,17 +185,16 @@ struct bootnode; #ifdef CONFIG_ACPI_NUMA extern int acpi_numa; -extern int acpi_get_nodes(struct bootnode *physnodes); +extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, + unsigned long end); extern int acpi_scan_nodes(unsigned long start, unsigned long end); #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) + +#ifdef CONFIG_NUMA_EMU extern void acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes); -#else -static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, - int num_nodes) -{ -} #endif +#endif /* CONFIG_ACPI_NUMA */ #define acpi_unlazy_tlb(x) leave_mm(x) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 6aee50d655d1..64dc82ee19f0 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -3,16 +3,27 @@ #include <linux/pci.h> +struct amd_nb_bus_dev_range { + u8 bus; + u8 dev_base; + u8 dev_limit; +}; + extern struct pci_device_id amd_nb_misc_ids[]; +extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; struct bootnode; extern int early_is_amd_nb(u32 value); extern int amd_cache_northbridges(void); extern void amd_flush_garts(void); -extern int amd_get_nodes(struct bootnode *nodes); extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int amd_scan_nodes(void); +#ifdef CONFIG_NUMA_EMU +extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); +extern void amd_get_nodes(struct bootnode *nodes); +#endif + struct amd_northbridge { struct pci_dev *misc; }; diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 3b62ab56c7a0..5e1a2eef3e7c 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -32,11 +32,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#ifdef CONFIG_X86_64 -#define BOOT_HEAP_SIZE 0x7000 -#else -#define BOOT_HEAP_SIZE 0x4000 -#endif +#define BOOT_HEAP_SIZE 0x8000 #endif /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 63e35ec9075c..62f084478f7e 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -1,48 +1,8 @@ #ifndef _ASM_X86_CACHEFLUSH_H #define _ASM_X86_CACHEFLUSH_H -/* Keep includes the same across arches. */ -#include <linux/mm.h> - /* Caches aren't brain-dead on the intel. */ -static inline void flush_cache_all(void) { } -static inline void flush_cache_mm(struct mm_struct *mm) { } -static inline void flush_cache_dup_mm(struct mm_struct *mm) { } -static inline void flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) { } -static inline void flush_cache_page(struct vm_area_struct *vma, - unsigned long vmaddr, unsigned long pfn) { } -#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 -static inline void flush_dcache_page(struct page *page) { } -static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } -static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } -static inline void flush_icache_range(unsigned long start, - unsigned long end) { } -static inline void flush_icache_page(struct vm_area_struct *vma, - struct page *page) { } -static inline void flush_icache_user_range(struct vm_area_struct *vma, - struct page *page, - unsigned long addr, - unsigned long len) { } -static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } -static inline void flush_cache_vunmap(unsigned long start, - unsigned long end) { } - -static inline void copy_to_user_page(struct vm_area_struct *vma, - struct page *page, unsigned long vaddr, - void *dst, const void *src, - unsigned long len) -{ - memcpy(dst, src, len); -} - -static inline void copy_from_user_page(struct vm_area_struct *vma, - struct page *page, unsigned long vaddr, - void *dst, const void *src, - unsigned long len) -{ - memcpy(dst, src, len); -} +#include <asm-generic/cacheflush.h> #ifdef CONFIG_X86_PAT /* diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4fab24de26b1..6e6e7558e702 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -32,5 +32,6 @@ extern void arch_unregister_cpu(int); DECLARE_PER_CPU(int, cpu_state); +int __cpuinit mwait_usable(const struct cpuinfo_x86 *); #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 0141b234406f..4729b2b63117 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -116,11 +116,11 @@ enum fixed_addresses { #endif FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ - __end_of_permanent_fixed_addresses, - #ifdef CONFIG_X86_MRST FIX_LNW_VRTC, #endif + __end_of_permanent_fixed_addresses, + /* * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h index 49dbfdfa50f9..91d915a65259 100644 --- a/arch/x86/include/asm/gpio.h +++ b/arch/x86/include/asm/gpio.h @@ -38,12 +38,9 @@ static inline int gpio_cansleep(unsigned int gpio) return __gpio_cansleep(gpio); } -/* - * Not implemented, yet. - */ static inline int gpio_to_irq(unsigned int gpio) { - return -ENOSYS; + return __gpio_to_irq(gpio); } static inline int irq_to_gpio(unsigned int irq) diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index ff2546ce7178..7a15153c675d 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -20,6 +20,9 @@ #ifndef _ASM_X86_HYPERVISOR_H #define _ASM_X86_HYPERVISOR_H +#include <asm/kvm_para.h> +#include <asm/xen/hypervisor.h> + extern void init_hypervisor(struct cpuinfo_x86 *c); extern void init_hypervisor_platform(void); @@ -47,4 +50,13 @@ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; +static inline bool hypervisor_x2apic_available(void) +{ + if (kvm_para_available()) + return true; + if (xen_x2apic_para_available()) + return true; + return false; +} + #endif diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ba870bb6dd8e..c704b38c57a2 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -10,6 +10,9 @@ #include <asm/apicdef.h> #include <asm/irq_vectors.h> +/* Even though we don't support this, supply it to appease OF */ +static inline void irq_dispose_mapping(unsigned int virq) { } + static inline int irq_canonicalize(int irq) { return ((irq == 2) ? 9 : irq); diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index f52d42e80585..574dbc22893a 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -14,7 +14,7 @@ do { \ asm goto("1:" \ JUMP_LABEL_INITIAL_NOP \ - ".pushsection __jump_table, \"a\" \n\t"\ + ".pushsection __jump_table, \"aw\" \n\t"\ _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ ".popsection \n\t" \ : : "i" (key) : : label); \ diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index f23eb2528464..ca242d35e873 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -18,7 +18,6 @@ enum die_val { DIE_TRAP, DIE_GPF, DIE_CALL, - DIE_NMI_IPI, DIE_PAGE_FAULT, DIE_NMIUNKNOWN, }; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b36c6b3fe144..8e37deb1eb38 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -15,6 +15,14 @@ struct x86_emulate_ctxt; +struct x86_exception { + u8 vector; + bool error_code_valid; + u16 error_code; + bool nested_page_fault; + u64 address; /* cr2 or nested page fault gpa */ +}; + /* * x86_emulate_ops: * @@ -64,7 +72,8 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to read from memory. */ int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + unsigned int bytes, struct kvm_vcpu *vcpu, + struct x86_exception *fault); /* * write_std: Write bytes of standard (non-emulated/special) memory. @@ -74,7 +83,8 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to write to memory. */ int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + unsigned int bytes, struct kvm_vcpu *vcpu, + struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. * Used for instruction fetch. @@ -83,7 +93,8 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to read from memory. */ int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + unsigned int bytes, struct kvm_vcpu *vcpu, + struct x86_exception *fault); /* * read_emulated: Read bytes from emulated/special memory area. @@ -94,7 +105,7 @@ struct x86_emulate_ops { int (*read_emulated)(unsigned long addr, void *val, unsigned int bytes, - unsigned int *error, + struct x86_exception *fault, struct kvm_vcpu *vcpu); /* @@ -107,7 +118,7 @@ struct x86_emulate_ops { int (*write_emulated)(unsigned long addr, const void *val, unsigned int bytes, - unsigned int *error, + struct x86_exception *fault, struct kvm_vcpu *vcpu); /* @@ -122,7 +133,7 @@ struct x86_emulate_ops { const void *old, const void *new, unsigned int bytes, - unsigned int *error, + struct x86_exception *fault, struct kvm_vcpu *vcpu); int (*pio_in_emulated)(int size, unsigned short port, void *val, @@ -159,7 +170,10 @@ struct operand { }; union { unsigned long *reg; - unsigned long mem; + struct segmented_address { + ulong ea; + unsigned seg; + } mem; } addr; union { unsigned long val; @@ -226,9 +240,8 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ - int exception; /* exception that happens during emulation or -1 */ - u32 error_code; /* error code for exception */ - bool error_code_valid; + bool have_exception; + struct x86_exception exception; /* decode cache */ struct decode_cache decode; @@ -252,7 +265,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #endif -int x86_decode_insn(struct x86_emulate_ctxt *ctxt); +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f702f82aa1eb..ffd7f8d29187 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -83,11 +83,14 @@ #define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 +#define ASYNC_PF_PER_VCPU 64 + extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_vcpu; struct kvm; +struct kvm_async_pf; enum kvm_reg { VCPU_REGS_RAX = 0, @@ -114,6 +117,7 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR3, }; enum { @@ -238,16 +242,18 @@ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); - void (*inject_page_fault)(struct kvm_vcpu *vcpu); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + bool prefault); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - u32 *error); + struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); int (*sync_page)(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, bool clear_unsync); + struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); hpa_t root_hpa; int root_level; @@ -315,16 +321,6 @@ struct kvm_vcpu_arch { */ struct kvm_mmu *walk_mmu; - /* - * This struct is filled with the necessary information to propagate a - * page fault into the guest - */ - struct { - u64 address; - unsigned error_code; - bool nested; - } fault; - /* only needed in kvm_pv_mmu_op() path, but it's hot so * put it here to avoid allocation */ struct kvm_pv_mmu_op_buffer mmu_op_buffer; @@ -412,6 +408,15 @@ struct kvm_vcpu_arch { u64 hv_vapic; cpumask_var_t wbinvd_dirty_mask; + + struct { + bool halted; + gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + struct gfn_to_hva_cache data; + u64 msr_val; + u32 id; + bool send_user_only; + } apf; }; struct kvm_arch { @@ -456,6 +461,10 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + + #ifdef CONFIG_KVM_MMU_AUDIT + int audit_point; + #endif }; struct kvm_vm_stat { @@ -529,6 +538,7 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); + void (*decache_cr3)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -582,9 +592,17 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); const struct trace_print_flags *exit_reasons_str; }; +struct kvm_arch_async_pf { + u32 token; + gfn_t gfn; + unsigned long cr3; + bool direct_map; +}; + extern struct kvm_x86_ops *kvm_x86_ops; int kvm_mmu_module_init(void); @@ -594,7 +612,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); -void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask); @@ -623,8 +640,15 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, u16 error_code, int emulation_type); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); + +static inline int emulate_instruction(struct kvm_vcpu *vcpu, + int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); +} + void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -650,7 +674,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); @@ -668,11 +692,11 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); -void kvm_inject_page_fault(struct kvm_vcpu *vcpu); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); -void kvm_propagate_fault(struct kvm_vcpu *vcpu); +void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); int kvm_pic_set_irq(void *opaque, int irq, int level); @@ -690,16 +714,21 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); -gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); -gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); -gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); +gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_fix_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_enable_tdp(void); @@ -766,20 +795,25 @@ enum { #define HF_VINTR_MASK (1 << 2) #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) +#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ /* * Hardware virtualization extension instructions may fault if a * reboot turns off virtualization while processes are running. * Trap the fault and ignore the instruction if that happens. */ -asmlinkage void kvm_handle_fault_on_reboot(void); +asmlinkage void kvm_spurious_fault(void); +extern bool kvm_rebooting; #define __kvm_handle_fault_on_reboot(insn) \ "666: " insn "\n\t" \ + "668: \n\t" \ ".pushsection .fixup, \"ax\" \n" \ "667: \n\t" \ + "cmpb $0, kvm_rebooting \n\t" \ + "jne 668b \n\t" \ __ASM_SIZE(push) " $666b \n\t" \ - "jmp kvm_handle_fault_on_reboot \n\t" \ + "call kvm_spurious_fault \n\t" \ ".popsection \n\t" \ ".pushsection __ex_table, \"a\" \n\t" \ _ASM_PTR " 666b, 667b \n\t" \ @@ -788,6 +822,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); @@ -799,4 +834,15 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); + +void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b562b6184bc..a427bf77a93d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -20,6 +20,7 @@ * are available. The use of 0x11 and 0x12 is deprecated */ #define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_ASYNC_PF 4 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -32,9 +33,13 @@ /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 +#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define KVM_MAX_MMU_OP_BATCH 32 +#define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) + /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 #define KVM_MMU_OP_FLUSH_TLB 2 @@ -61,10 +66,20 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + +struct kvm_vcpu_pv_apf_data { + __u32 reason; + __u8 pad[60]; + __u32 enabled; +}; + #ifdef __KERNEL__ #include <asm/processor.h> extern void kvmclock_init(void); +extern int kvm_register_clock(char *txt); /* This instruction is vmcall. On non-VT architectures, it will generate a @@ -160,8 +175,17 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); #else #define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static inline u32 kvm_read_and_reset_pf_reason(void) +{ + return 0; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h index f7920601e472..72a8b52e7dfd 100644 --- a/arch/x86/include/asm/mach_traps.h +++ b/arch/x86/include/asm/mach_traps.h @@ -7,9 +7,19 @@ #include <asm/mc146818rtc.h> +#define NMI_REASON_PORT 0x61 + +#define NMI_REASON_SERR 0x80 +#define NMI_REASON_IOCHK 0x40 +#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) + +#define NMI_REASON_CLEAR_SERR 0x04 +#define NMI_REASON_CLEAR_IOCHK 0x08 +#define NMI_REASON_CLEAR_MASK 0x0f + static inline unsigned char get_nmi_reason(void) { - return inb(0x61); + return inb(NMI_REASON_PORT); } static inline void reassert_nmi(void) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c4021b953510..c76f5b92b840 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -23,6 +23,26 @@ void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif +/* + * Define some priorities for the nmi notifier call chain. + * + * Create a local nmi bit that has a higher priority than + * external nmis, because the local ones are more frequent. + * + * Also setup some default high/normal/low settings for + * subsystems to registers with. Using 4 bits to seperate + * the priorities. This can go alot higher if needed be. + */ + +#define NMI_LOCAL_SHIFT 16 /* randomly picked */ +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) +#define NMI_HIGH_PRIOR (1ULL << 8) +#define NMI_NORMAL_PRIOR (1ULL << 4) +#define NMI_LOW_PRIOR (1ULL << 0) +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) + void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index a37229011b56..b0ef2b449a9d 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_NUMA_32_H #define _ASM_X86_NUMA_32_H +extern int numa_off; + extern int pxm_to_nid(int pxm); extern void numa_remove_cpu(int cpu); diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 823e070e7c26..0493be39607c 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -38,8 +38,9 @@ extern void __cpuinit numa_add_cpu(int cpu); extern void __cpuinit numa_remove_cpu(int cpu); #ifdef CONFIG_NUMA_EMU -#define FAKE_NODE_MIN_SIZE ((u64)64 << 20) +#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) +void numa_emu_cmdline(char *); #endif /* CONFIG_NUMA_EMU */ #else static inline void init_cpu_to_node(void) { } diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index 42a978c0c1b3..f482010350fb 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h @@ -107,10 +107,14 @@ extern int olpc_ec_mask_unset(uint8_t bits); /* GPIO assignments */ #define OLPC_GPIO_MIC_AC 1 -#define OLPC_GPIO_DCON_IRQ geode_gpio(7) +#define OLPC_GPIO_DCON_STAT0 5 +#define OLPC_GPIO_DCON_STAT1 6 +#define OLPC_GPIO_DCON_IRQ 7 #define OLPC_GPIO_THRM_ALRM geode_gpio(10) -#define OLPC_GPIO_SMB_CLK geode_gpio(14) -#define OLPC_GPIO_SMB_DATA geode_gpio(15) +#define OLPC_GPIO_DCON_LOAD 11 +#define OLPC_GPIO_DCON_BLANK 12 +#define OLPC_GPIO_SMB_CLK 14 +#define OLPC_GPIO_SMB_DATA 15 #define OLPC_GPIO_WORKAUX geode_gpio(24) #define OLPC_GPIO_LID geode_gpio(26) #define OLPC_GPIO_ECSCI geode_gpio(27) diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index 2a8478140bb3..641988efe063 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h @@ -8,6 +8,8 @@ #ifdef CONFIG_OLPC_OPENFIRMWARE +extern bool olpc_ofw_is_installed(void); + /* run an OFW command by calling into the firmware */ #define olpc_ofw(name, args, res) \ __olpc_ofw((name), ARRAY_SIZE(args), args, ARRAY_SIZE(res), res) @@ -26,10 +28,17 @@ extern bool olpc_ofw_present(void); #else /* !CONFIG_OLPC_OPENFIRMWARE */ +static inline bool olpc_ofw_is_installed(void) { return false; } static inline void olpc_ofw_detect(void) { } static inline void setup_olpc_ofw_pgd(void) { } static inline bool olpc_ofw_present(void) { return false; } #endif /* !CONFIG_OLPC_OPENFIRMWARE */ +#ifdef CONFIG_OLPC_OPENFIRMWARE_DT +extern void olpc_dt_build_devicetree(void); +#else +static inline void olpc_dt_build_devicetree(void) { } +#endif /* CONFIG_OLPC_OPENFIRMWARE_DT */ + #endif /* _ASM_X86_OLPC_OFW_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7709c12431b8..2071a8b2b32f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -435,6 +435,11 @@ static inline void pte_update(struct mm_struct *mm, unsigned long addr, { PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); } +static inline void pmd_update(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + PVOP_VCALL3(pv_mmu_ops.pmd_update, mm, addr, pmdp); +} static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -442,6 +447,12 @@ static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); } +static inline void pmd_update_defer(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + PVOP_VCALL3(pv_mmu_ops.pmd_update_defer, mm, addr, pmdp); +} + static inline pte_t __pte(pteval_t val) { pteval_t ret; @@ -543,6 +554,20 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ +#if PAGETABLE_LEVELS >= 3 + if (sizeof(pmdval_t) > sizeof(long)) + /* 5 arg words */ + pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd); + else + PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp, pmd.pmd); +#endif +} +#endif + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { pmdval_t val = native_pmd_val(pmd); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index b82bac975250..82885099c869 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -265,10 +265,16 @@ struct pv_mmu_ops { void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmdval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void (*pte_update_defer)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (*pmd_update)(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp); + void (*pmd_update_defer)(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8ee45167e817..3788f4649db4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -414,8 +414,6 @@ do { \ #define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) @@ -432,8 +430,6 @@ do { \ #define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) #define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) #define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) -#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #ifndef CONFIG_M386 #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) @@ -475,11 +471,15 @@ do { \ #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) #define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) +#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #endif /* This is not atomic against other CPUs -- CPU preemption needs to be off */ diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 295e2ff18a6a..e2f6a99f14ab 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -20,6 +20,9 @@ #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) #define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_CNTRVAL_BITS (40) +#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) + #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 271de94c3810..b4389a468fb6 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -92,7 +92,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, - unsigned long adddress) + unsigned long address) { ___pmd_free_tlb(tlb, pmd); } diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 2334982b339e..98391db840c6 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -46,6 +46,15 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif +#ifdef CONFIG_SMP +static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) +{ + return __pmd(xchg((pmdval_t *)xp, 0)); +} +#else +#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) +#endif + /* * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, * split up the 29 bits of offset into this range: diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 177b0165ea01..94b979d1b58d 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -104,6 +104,29 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif +#ifdef CONFIG_SMP +union split_pmd { + struct { + u32 pmd_low; + u32 pmd_high; + }; + pmd_t pmd; +}; +static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) +{ + union split_pmd res, *orig = (union split_pmd *)pmdp; + + /* xchg acts as a barrier before setting of the high bits */ + res.pmd_low = xchg(&orig->pmd_low, 0); + res.pmd_high = orig->pmd_high; + orig->pmd_high = 0; + + return res.pmd; +} +#else +#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) +#endif + /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index ada823a13c7c..18601c86fab1 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -35,6 +35,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #else /* !CONFIG_PARAVIRT */ #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) +#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) @@ -59,6 +60,8 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) +#define pmd_update(mm, addr, ptep) do { } while (0) +#define pmd_update_defer(mm, addr, ptep) do { } while (0) #define pgd_val(x) native_pgd_val(x) #define __pgd(x) native_make_pgd(x) @@ -94,6 +97,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline int pmd_young(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_ACCESSED; +} + static inline int pte_write(pte_t pte) { return pte_flags(pte) & _PAGE_RW; @@ -142,6 +150,23 @@ static inline int pmd_large(pmd_t pte) (_PAGE_PSE | _PAGE_PRESENT); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_SPLITTING; +} + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_PSE; +} + +static inline int has_transparent_hugepage(void) +{ + return cpu_has_pse; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static inline pte_t pte_set_flags(pte_t pte, pteval_t set) { pteval_t v = native_pte_val(pte); @@ -216,6 +241,55 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte_set_flags(pte, _PAGE_SPECIAL); } +static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) +{ + pmdval_t v = native_pmd_val(pmd); + + return __pmd(v | set); +} + +static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) +{ + pmdval_t v = native_pmd_val(pmd); + + return __pmd(v & ~clear); +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_RW); +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_DIRTY); +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_PSE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_RW); +} + +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_PRESENT); +} + /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. @@ -256,6 +330,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return __pte(val); } +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pmdval_t val = pmd_val(pmd); + + val &= _HPAGE_CHG_MASK; + val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; + + return __pmd(val); +} + /* mprotect needs to preserve PAT bits when updating vm_page_prot */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) @@ -350,7 +434,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) /* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -524,12 +608,26 @@ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) return res; } +static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) +{ + pmd_t res = *pmdp; + + native_pmd_clear(pmdp); + return res; +} + static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep , pte_t pte) { native_set_pte(ptep, pte); } +static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp , pmd_t pmd) +{ + native_set_pmd(pmdp, pmd); +} + #ifndef CONFIG_PARAVIRT /* * Rules for using pte_update - it must be called after any PTE update which @@ -607,6 +705,49 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, #define flush_tlb_fix_spurious_fault(vma, address) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + + +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMD_WRITE +static inline int pmd_write(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_RW; +} + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + pmd_t pmd = native_pmdp_get_and_clear(pmdp); + pmd_update(mm, addr, pmdp); + return pmd; +} + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); + pmd_update(mm, addr, pmdp); +} + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f86da20347f2..975f709e09ae 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -59,6 +59,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) native_set_pte(ptep, pte); } +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +static inline void native_pmd_clear(pmd_t *pmd) +{ + native_set_pmd(pmd, native_make_pmd(0)); +} + static inline pte_t native_ptep_get_and_clear(pte_t *xp) { #ifdef CONFIG_SMP @@ -72,14 +82,17 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #endif } -static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { - *pmdp = pmd; -} - -static inline void native_pmd_clear(pmd_t *pmd) -{ - native_set_pmd(pmd, native_make_pmd(0)); +#ifdef CONFIG_SMP + return native_make_pmd(xchg(&xp->pmd, 0)); +#else + /* native_local_pmdp_get_and_clear, + but duplicated because of cyclic dependency */ + pmd_t ret = *xp; + native_pmd_clear(xp); + return ret; +#endif } static inline void native_set_pud(pud_t *pudp, pud_t pud) @@ -168,6 +181,7 @@ extern void cleanup_highmap(void); #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) #define __HAVE_ARCH_PTE_SAME + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d1f4a760be23..7db7723d1f32 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -22,6 +22,7 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ /* If _PAGE_BIT_PRESENT is clear, we use these: */ @@ -45,6 +46,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING) #define __HAVE_ARCH_PTE_SPECIAL #ifdef CONFIG_KMEMCHECK @@ -70,6 +72,7 @@ /* Set of bits not changed in pte_modify */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) #define _PAGE_CACHE_WB (0) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c6efecf85a6a..45636cefa186 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -761,10 +761,11 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; -extern unsigned long idle_halt; -extern unsigned long idle_nomwait; extern bool c1e_detected; +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, + IDLE_POLL, IDLE_FORCE_MWAIT}; + extern void enable_sep_cpu(void); extern int sysenter_setup(void); @@ -901,7 +902,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" - * is accessable even if the CPU haven't stored the SS/ESP registers + * is accessible even if the CPU haven't stored the SS/ESP registers * on the stack (interrupt gate does not save these registers * when switching to the same priv ring). * Therefore beware: accessing the ss/esp fields of the diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h new file mode 100644 index 000000000000..b4ec95f07518 --- /dev/null +++ b/arch/x86/include/asm/prom.h @@ -0,0 +1 @@ +/* dummy prom.h; here to make linux/of.h's #includes happy */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0e831059ac5a..f2b83bc7d784 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -47,14 +47,13 @@ enum { INTERCEPT_MONITOR, INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, + INTERCEPT_XSETBV, }; struct __attribute__ ((__packed__)) vmcb_control_area { - u16 intercept_cr_read; - u16 intercept_cr_write; - u16 intercept_dr_read; - u16 intercept_dr_write; + u32 intercept_cr; + u32 intercept_dr; u32 intercept_exceptions; u64 intercept; u8 reserved_1[42]; @@ -81,14 +80,19 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 event_inj_err; u64 nested_cr3; u64 lbr_ctl; - u64 reserved_5; + u32 clean; + u32 reserved_5; u64 next_rip; - u8 reserved_6[816]; + u8 insn_len; + u8 insn_bytes[15]; + u8 reserved_6[800]; }; #define TLB_CONTROL_DO_NOTHING 0 #define TLB_CONTROL_FLUSH_ALL_ASID 1 +#define TLB_CONTROL_FLUSH_ASID 3 +#define TLB_CONTROL_FLUSH_ASID_LOCAL 7 #define V_TPR_MASK 0x0f @@ -204,19 +208,31 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK #define SVM_SELECTOR_CODE_MASK (1 << 3) -#define INTERCEPT_CR0_MASK 1 -#define INTERCEPT_CR3_MASK (1 << 3) -#define INTERCEPT_CR4_MASK (1 << 4) -#define INTERCEPT_CR8_MASK (1 << 8) - -#define INTERCEPT_DR0_MASK 1 -#define INTERCEPT_DR1_MASK (1 << 1) -#define INTERCEPT_DR2_MASK (1 << 2) -#define INTERCEPT_DR3_MASK (1 << 3) -#define INTERCEPT_DR4_MASK (1 << 4) -#define INTERCEPT_DR5_MASK (1 << 5) -#define INTERCEPT_DR6_MASK (1 << 6) -#define INTERCEPT_DR7_MASK (1 << 7) +#define INTERCEPT_CR0_READ 0 +#define INTERCEPT_CR3_READ 3 +#define INTERCEPT_CR4_READ 4 +#define INTERCEPT_CR8_READ 8 +#define INTERCEPT_CR0_WRITE (16 + 0) +#define INTERCEPT_CR3_WRITE (16 + 3) +#define INTERCEPT_CR4_WRITE (16 + 4) +#define INTERCEPT_CR8_WRITE (16 + 8) + +#define INTERCEPT_DR0_READ 0 +#define INTERCEPT_DR1_READ 1 +#define INTERCEPT_DR2_READ 2 +#define INTERCEPT_DR3_READ 3 +#define INTERCEPT_DR4_READ 4 +#define INTERCEPT_DR5_READ 5 +#define INTERCEPT_DR6_READ 6 +#define INTERCEPT_DR7_READ 7 +#define INTERCEPT_DR0_WRITE (16 + 0) +#define INTERCEPT_DR1_WRITE (16 + 1) +#define INTERCEPT_DR2_WRITE (16 + 2) +#define INTERCEPT_DR3_WRITE (16 + 3) +#define INTERCEPT_DR4_WRITE (16 + 4) +#define INTERCEPT_DR5_WRITE (16 + 5) +#define INTERCEPT_DR6_WRITE (16 + 6) +#define INTERCEPT_DR7_WRITE (16 + 7) #define SVM_EVTINJ_VEC_MASK 0xff @@ -246,6 +262,8 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 +#define SVM_EXITINFO_REG_MASK 0x0F + #define SVM_EXIT_READ_CR0 0x000 #define SVM_EXIT_READ_CR3 0x003 #define SVM_EXIT_READ_CR4 0x004 @@ -316,6 +334,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXIT_MONITOR 0x08a #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_ERR -1 diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda56781d..0310da67307f 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9f0cbd987d50..84471b810460 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -66,15 +66,23 @@ #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 #define VM_EXIT_SAVE_IA32_PAT 0x00040000 #define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 /* VMCS Encodings */ enum vmcs_field { @@ -239,6 +247,7 @@ enum vmcs_field { #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 #define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 #define EXIT_REASON_INVLPG 14 #define EXIT_REASON_RDPMC 15 #define EXIT_REASON_RDTSC 16 @@ -296,6 +305,12 @@ enum vmcs_field { #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +/* GUEST_ACTIVITY_STATE flags */ +#define GUEST_ACTIVITY_ACTIVE 0 +#define GUEST_ACTIVITY_HLT 1 +#define GUEST_ACTIVITY_SHUTDOWN 2 +#define GUEST_ACTIVITY_WAIT_SIPI 3 + /* * Exit Qualifications for MOV for Control Register Access */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 396ff4cc8ed4..66d0fff1ee84 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,4 +37,39 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +#include <asm/processor.h> + +static inline uint32_t xen_cpuid_base(void) +{ + uint32_t base, eax, ebx, ecx, edx; + char signature[13]; + + for (base = 0x40000000; base < 0x40010000; base += 0x100) { + cpuid(base, &eax, &ebx, &ecx, &edx); + *(uint32_t *)(signature + 0) = ebx; + *(uint32_t *)(signature + 4) = ecx; + *(uint32_t *)(signature + 8) = edx; + signature[12] = 0; + + if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2)) + return base; + } + + return 0; +} + +#ifdef CONFIG_XEN +extern bool xen_hvm_need_lapic(void); + +static inline bool xen_x2apic_para_available(void) +{ + return xen_hvm_need_lapic(); +} +#else +static inline bool xen_x2apic_para_available(void) +{ + return (xen_cpuid_base() != 0); +} +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 8760cc60a21c..f25bdf238a33 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -42,6 +42,11 @@ extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern int m2p_add_override(unsigned long mfn, struct page *page); +extern int m2p_remove_override(struct page *page); +extern struct page *m2p_find_override(unsigned long mfn); +extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); + static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; @@ -72,9 +77,6 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely((mfn >> machine_to_phys_order) != 0)) - return ~0; - pfn = 0; /* * The array access can fail (e.g., device space beyond end of RAM). @@ -83,6 +85,14 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) */ __get_user(pfn, &machine_to_phys_mapping[mfn]); + /* + * If this appears to be a foreign mfn (because the pfn + * doesn't map back to the mfn), then check the local override + * table to see if there's a better pfn to use. + */ + if (get_phys_to_machine(pfn) != mfn) + pfn = m2p_find_override_pfn(mfn, pfn); + return pfn; } |