diff options
Diffstat (limited to 'arch/x86/include/asm')
46 files changed, 427 insertions, 391 deletions
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 00c88a01301d..da181ad1d5f8 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -3,6 +3,7 @@ #include <linux/ioport.h> #include <linux/pci.h> +#include <linux/refcount.h> struct amd_nb_bus_dev_range { u8 bus; @@ -55,7 +56,7 @@ struct threshold_bank { struct threshold_block *blocks; /* initialized to the number of CPUs on the node sharing this bank */ - atomic_t cpus; + refcount_t cpus; }; struct amd_northbridge { diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index bdffcd9eab2b..5f01671c68f2 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -252,6 +252,8 @@ static inline int x2apic_enabled(void) { return 0; } #define x2apic_supported() (0) #endif /* !CONFIG_X86_X2APIC */ +struct irq_data; + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -296,9 +298,9 @@ struct apic { /* Can't be NULL on 64-bit */ unsigned long (*set_apic_id)(unsigned int id); - int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid); + int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); /* ipi */ void (*send_IPI)(int cpu, int vector); @@ -540,28 +542,12 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) #endif -static inline int -flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid) -{ - unsigned long cpu_mask = cpumask_bits(cpumask)[0] & - cpumask_bits(andmask)[0] & - cpumask_bits(cpu_online_mask)[0] & - APIC_ALL_CPUS; - - if (likely(cpu_mask)) { - *apicid = (unsigned int)cpu_mask; - return 0; - } else { - return -EINVAL; - } -} - -extern int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask, - unsigned int *apicid); +extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); +extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, + struct irq_data *irqdata, + unsigned int *apicid); static inline void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index caa5798c92f4..33380b871463 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -246,19 +246,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } -/** - * atomic_inc_short - increment of a short integer - * @v: pointer to type int - * - * Atomically adds 1 to @v - * Returns the new value of @u - */ -static __always_inline short int atomic_inc_short(short int *v) -{ - asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); - return *v; -} - #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 24118c0b4640..5343c19814b3 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -116,7 +116,6 @@ struct compat_statfs { int f_spare[4]; }; -#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff #define COMPAT_RLIM_INFINITY 0xffffffff typedef u32 compat_old_sigset_t; /* at least 32 bits */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2701e5f8145b..ca3c48c0872f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -286,6 +286,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ +#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 08a0838b83fb..398c79889f5c 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -19,8 +19,6 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -#define DMA_ERROR_CODE 0 - extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -35,9 +33,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -#define HAVE_ARCH_DMA_SUPPORTED 1 -extern int dma_supported(struct device *hwdev, u64 mask); - extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs); diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 8e0f8b85b209..a504adc661a4 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h @@ -4,6 +4,7 @@ #include <asm/e820/types.h> extern struct e820_table *e820_table; +extern struct e820_table *e820_table_kexec; extern struct e820_table *e820_table_firmware; extern unsigned long pci_mem_start; diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 2f77bcefe6b4..796ff6c1aa53 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -33,7 +33,7 @@ #ifdef CONFIG_X86_32 -extern unsigned long asmlinkage efi_call_phys(void *, ...); +extern asmlinkage unsigned long efi_call_phys(void *, ...); #define arch_efi_call_virt_setup() kernel_fpu_begin() #define arch_efi_call_virt_teardown() kernel_fpu_end() @@ -52,7 +52,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define EFI_LOADER_SIGNATURE "EL64" -extern u64 asmlinkage efi_call(void *fp, ...); +extern asmlinkage u64 efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) @@ -74,7 +74,7 @@ struct efi_scratch { __kernel_fpu_begin(); \ \ if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = read_cr3(); \ + efi_scratch.prev_cr3 = __read_cr3(); \ write_cr3((unsigned long)efi_scratch.efi_pgt); \ __flush_tlb_all(); \ } \ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index e8ab9a46bc68..1c18d83d3f09 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -245,12 +245,13 @@ extern int force_personality32; #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 -/* This is the location that an ET_DYN program is loaded if exec'ed. Typical - use of this is to invoke "./ld.so someprog" to test out a new version of - the loader. We need to make sure that it is out of the way of the program - that it will "exec", and that there is sufficient room for the brk. */ - -#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +/* + * This is the base location for PIE (ET_DYN with INTERP) loads. On + * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * space open for things that want to use the area for 32-bit pointers. + */ +#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index b8ad261d11dc..c66d19e3c23e 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -29,6 +29,7 @@ struct pt_regs; } while (0) extern int fixup_exception(struct pt_regs *regs, int trapnr); +extern int fixup_bug(struct pt_regs *regs, int trapnr); extern bool ex_has_fault_handler(unsigned long ip); extern void early_fixup_exception(struct pt_regs *regs, int trapnr); diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 59405a248fc2..9b76cd331990 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,8 +22,8 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - unsigned int irq_tlb_count; #endif + unsigned int irq_tlb_count; #ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; #endif diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 3a106165e03a..535af0f2d8ac 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -85,4 +85,8 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +static inline bool gigantic_page_supported(void) { return true; } +#endif + #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 793869879464..fca144a104e4 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,6 +6,8 @@ extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; +int x86_dma_supported(struct device *dev, u64 mask); + /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 16d3fa211962..668cca540025 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -29,7 +29,6 @@ struct irq_desc; #include <linux/cpumask.h> extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -extern void irq_force_complete_move(struct irq_desc *desc); #endif #ifdef CONFIG_HAVE_KVM diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index a210eba2727c..023b4a9fc846 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -55,7 +55,8 @@ extern struct irq_domain * irq_remapping_get_irq_domain(struct irq_alloc_info *info); /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ -extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent); +extern struct irq_domain * +arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); /* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 055962615779..fde36f189836 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -23,6 +23,7 @@ struct x86_exception { u16 error_code; bool nested_page_fault; u64 address; /* cr2 or nested page fault gpa */ + u8 async_page_fault; }; /* @@ -296,6 +297,7 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ + bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; struct x86_exception exception; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9c761fea0c98..87ac4fba6d8e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,33 +43,36 @@ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -#define KVM_HALT_POLL_NS_DEFAULT 400000 +#define KVM_HALT_POLL_NS_DEFAULT 200000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS /* x86-specific vcpu->requests bit members */ -#define KVM_REQ_MIGRATE_TIMER 8 -#define KVM_REQ_REPORT_TPR_ACCESS 9 -#define KVM_REQ_TRIPLE_FAULT 10 -#define KVM_REQ_MMU_SYNC 11 -#define KVM_REQ_CLOCK_UPDATE 12 -#define KVM_REQ_EVENT 14 -#define KVM_REQ_APF_HALT 15 -#define KVM_REQ_STEAL_UPDATE 16 -#define KVM_REQ_NMI 17 -#define KVM_REQ_PMU 18 -#define KVM_REQ_PMI 19 -#define KVM_REQ_SMI 20 -#define KVM_REQ_MASTERCLOCK_UPDATE 21 -#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 -#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_HV_CRASH 26 -#define KVM_REQ_IOAPIC_EOI_EXIT 27 -#define KVM_REQ_HV_RESET 28 -#define KVM_REQ_HV_EXIT 29 -#define KVM_REQ_HV_STIMER 30 +#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) +#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) +#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) +#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) +#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) +#define KVM_REQ_EVENT KVM_ARCH_REQ(6) +#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) +#define KVM_REQ_NMI KVM_ARCH_REQ(9) +#define KVM_REQ_PMU KVM_ARCH_REQ(10) +#define KVM_REQ_PMI KVM_ARCH_REQ(11) +#define KVM_REQ_SMI KVM_ARCH_REQ(12) +#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) +#define KVM_REQ_MCLOCK_INPROGRESS \ + KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SCAN_IOAPIC \ + KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) +#define KVM_REQ_APIC_PAGE_RELOAD \ + KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) +#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) +#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) +#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) +#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -254,7 +257,8 @@ union kvm_mmu_page_role { unsigned cr0_wp:1; unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; - unsigned :8; + unsigned ad_disabled:1; + unsigned :7; /* * This is left at the top of the word so that @@ -458,10 +462,12 @@ struct kvm_vcpu_hv_synic { DECLARE_BITMAP(auto_eoi_bitmap, 256); DECLARE_BITMAP(vec_bitmap, 256); bool active; + bool dont_zero_synic_pages; }; /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { + u32 vp_index; u64 hv_vapic; s64 runtime_offset; struct kvm_vcpu_hv_synic synic; @@ -545,6 +551,7 @@ struct kvm_vcpu_arch { bool reinject; u8 nr; u32 error_code; + u8 nested_apf; } exception; struct kvm_queued_interrupt { @@ -645,6 +652,9 @@ struct kvm_vcpu_arch { u64 msr_val; u32 id; bool send_user_only; + u32 host_apf_reason; + unsigned long nested_apf_token; + bool delivery_as_pf_vmexit; } apf; /* OSVW MSRs (AMD only) */ @@ -799,6 +809,7 @@ struct kvm_arch { int audit_point; #endif + bool backwards_tsc_observed; bool boot_vcpu_runs_old_kvmclock; u32 bsp_vcpu_id; @@ -948,9 +959,7 @@ struct kvm_x86_ops { unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu); - void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code, - bool reinject); + void (*queue_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4fd5195deed0..181264989db5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s #endif int mce_available(struct cpuinfo_x86 *c); +bool mce_is_memory_error(struct mce *m); DECLARE_PER_CPU(unsigned, mce_exception_count); DECLARE_PER_CPU(unsigned, mce_poll_count); @@ -284,10 +285,6 @@ int mce_notify_irq(void); DECLARE_PER_CPU(struct mce, injectm); -extern void register_mce_write_callback(ssize_t (*)(struct file *filp, - const char __user *ubuf, - size_t usize, loff_t *off)); - /* Disable CMCI/polling for MCA bank claimed by firmware */ extern void mce_disable_bank(int bank); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index f9813b6d8b80..79b647a7ebd0 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -37,12 +37,6 @@ typedef struct { #endif } mm_context_t; -#ifdef CONFIG_SMP void leave_mm(int cpu); -#else -static inline void leave_mm(int cpu) -{ -} -#endif #endif /* _ASM_X86_MMU_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 68b329d77b3a..ecfcb6643c9b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -47,7 +47,7 @@ struct ldt_struct { * allocations, but it's not worth trying to optimize. */ struct desc_struct *entries; - unsigned int size; + unsigned int nr_entries; }; /* @@ -87,22 +87,46 @@ static inline void load_mm_ldt(struct mm_struct *mm) */ if (unlikely(ldt)) - set_ldt(ldt->entries, ldt->size); + set_ldt(ldt->entries, ldt->nr_entries); else clear_LDT(); #else clear_LDT(); #endif +} + +static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) +{ +#ifdef CONFIG_MODIFY_LDT_SYSCALL + /* + * Load the LDT if either the old or new mm had an LDT. + * + * An mm will never go from having an LDT to not having an LDT. Two + * mms never share an LDT, so we don't gain anything by checking to + * see whether the LDT changed. There's also no guarantee that + * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, + * then prev->context.ldt will also be non-NULL. + * + * If we really cared, we could optimize the case where prev == next + * and we're exiting lazy mode. Most of the time, if this happens, + * we don't actually need to reload LDTR, but modify_ldt() is mostly + * used by legacy code and emulators where we don't need this level of + * performance. + * + * This uses | instead of || because it generates better code. + */ + if (unlikely((unsigned long)prev->context.ldt | + (unsigned long)next->context.ldt)) + load_mm_ldt(next); +#endif DEBUG_LOCKS_WARN_ON(preemptible()); } static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#ifdef CONFIG_SMP if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif } static inline int init_new_context(struct task_struct *tsk, @@ -220,18 +244,6 @@ static inline int vma_pkey(struct vm_area_struct *vma) } #endif -static inline bool __pkru_allows_pkey(u16 pkey, bool write) -{ - u32 pkru = read_pkru(); - - if (!__pkru_allows_read(pkru, pkey)) - return false; - if (write && !__pkru_allows_write(pkru, pkey)) - return false; - - return true; -} - /* * We only want to enforce protection keys on the current process * because we effectively have no access to PKRU for other @@ -268,4 +280,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } + +/* + * This can be used from process context to figure out what the value of + * CR3 is without needing to do a (slow) __read_cr3(). + * + * It's intended to be used for code like KVM that sneakily changes CR3 + * and needs to restore it. It needs to be used very carefully. + */ +static inline unsigned long __get_current_cr3_fast(void) +{ + unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); + + /* For now, be very restrictive about when this can be called. */ + VM_WARN_ON(in_nmi() || !in_atomic()); + + VM_BUG_ON(cr3 != __read_cr3()); + return cr3; +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fba100713924..2b58c8c1eeaa 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,8 +2,7 @@ #define _ASM_X86_MSHYPER_H #include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/clocksource.h> +#include <linux/atomic.h> #include <asm/hyperv.h> /* @@ -137,7 +136,6 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } -#define hv_get_current_tick(tick) rdmsrl(HV_X64_MSR_TIME_REF_COUNT, tick) #define hv_init_timer(timer, tick) wrmsrl(timer, tick) #define hv_init_timer_config(config, val) wrmsrl(config, val) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 673f9ac50f6d..5573c75f8e4c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -137,6 +137,8 @@ #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) #define MSR_PEBS_FRONTEND 0x000003f7 @@ -249,9 +251,13 @@ #define HWP_MIN_PERF(x) (x & 0xff) #define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) ((x & 0xff) << 24) -#define HWP_ACTIVITY_WINDOW(x) ((x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((x & 0x1) << 42) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -420,6 +426,8 @@ #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + #define MSR_IA32_XSS 0x00000da0 #define FEATURE_CONTROL_LOCKED (1<<0) @@ -474,9 +482,11 @@ #define MSR_MISC_PWR_MGMT 0x000001aa #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 -#define ENERGY_PERF_BIAS_PERFORMANCE 0 -#define ENERGY_PERF_BIAS_NORMAL 6 -#define ENERGY_PERF_BIAS_POWERSAVE 15 +#define ENERGY_PERF_BIAS_PERFORMANCE 0 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 +#define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 +#define ENERGY_PERF_BIAS_POWERSAVE 15 #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 55fa56fe4e45..9ccac1926587 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -61,7 +61,7 @@ static inline void write_cr2(unsigned long x) PVOP_VCALL1(pv_mmu_ops.write_cr2, x); } -static inline unsigned long read_cr3(void) +static inline unsigned long __read_cr3(void) { return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); } @@ -118,7 +118,7 @@ static inline u64 paravirt_read_msr(unsigned msr) static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { - return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); + PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) @@ -312,11 +312,9 @@ static inline void __flush_tlb_single(unsigned long addr) } static inline void flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end) + const struct flush_tlb_info *info) { - PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end); + PVOP_VCALL2(pv_mmu_ops.flush_tlb_others, cpumask, info); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 7465d6fe336f..cb976bab6299 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -51,6 +51,7 @@ struct mm_struct; struct desc_struct; struct task_struct; struct cpumask; +struct flush_tlb_info; /* * Wrapper type for pointers to code which uses the non-standard @@ -223,9 +224,7 @@ struct pv_mmu_ops { void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); void (*flush_tlb_others)(const struct cpumask *cpus, - struct mm_struct *mm, - unsigned long start, - unsigned long end); + const struct flush_tlb_info *info); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 0b1ff4c1c14e..fffb2794dd89 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -7,6 +7,7 @@ bool pat_enabled(void); void pat_disable(const char *reason); extern void pat_init(void); +extern void init_cache_modes(void); extern int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index f513cc231151..473a7295ab10 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -77,14 +77,8 @@ static inline bool is_vmd(struct pci_bus *bus) extern unsigned int pcibios_assign_all_busses(void); extern int pci_legacy_init(void); -# ifdef CONFIG_ACPI -# define x86_default_pci_init pci_acpi_init -# else -# define x86_default_pci_init pci_legacy_init -# endif #else -# define pcibios_assign_all_busses() 0 -# define x86_default_pci_init NULL +static inline int pcibios_assign_all_busses(void) { return 0; } #endif extern unsigned long pci_mem_start; diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 50d35e3185f5..c8821bab938f 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -212,4 +212,51 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) +#define gup_get_pte gup_get_pte +/* + * WARNING: only to be used in the get_user_pages_fast() implementation. + * + * With get_user_pages_fast(), we walk down the pagetables without taking + * any locks. For this we would like to load the pointers atomically, + * but that is not possible (without expensive cmpxchg8b) on PAE. What + * we do have is the guarantee that a PTE will only either go from not + * present to present, or present to not present or both -- it will not + * switch to a completely different present page without a TLB flush in + * between; something that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees 'l' iff pte_high + * sees 'h'. We load pte_high *after* loading pte_low, which ensures we + * don't see an older value of pte_high. *Then* we recheck pte_low, + * which ensures that we haven't picked up a changed pte high. We might + * have gotten rubbish values from pte_low and pte_high, but we are + * guaranteed that pte_low will not have the present bit set *unless* + * it is 'l'. Because get_user_pages_fast() only operates on present ptes + * we're safe. + */ +static inline pte_t gup_get_pte(pte_t *ptep) +{ + pte_t pte; + + do { + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + } while (unlikely(pte.pte_low != ptep->pte_low)); + + return pte; +} + #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index f5af95a0c6b8..77037b6f1caa 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -244,6 +244,11 @@ static inline int pud_devmap(pud_t pud) return 0; } #endif + +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -917,7 +922,7 @@ extern pgd_t trampoline_pgd_entry; static inline void __meminit init_trampoline_default(void) { /* Default trampoline pgd value */ - trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)]; + trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; } # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); @@ -1185,6 +1190,54 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags) #endif } +static inline bool __pkru_allows_pkey(u16 pkey, bool write) +{ + u32 pkru = read_pkru(); + + if (!__pkru_allows_read(pkru, pkey)) + return false; + if (write && !__pkru_allows_write(pkru, pkey)) + return false; + + return true; +} + +/* + * 'pteval' can come from a PTE, PMD or PUD. We only check + * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the + * same value on all 3 types. + */ +static inline bool __pte_access_permitted(unsigned long pteval, bool write) +{ + unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; + + if (write) + need_pte_bits |= _PAGE_RW; + + if ((pteval & need_pte_bits) != need_pte_bits) + return 0; + + return __pkru_allows_pkey(pte_flags_pkey(pteval), write); +} + +#define pte_access_permitted pte_access_permitted +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + return __pte_access_permitted(pte_val(pte), write); +} + +#define pmd_access_permitted pmd_access_permitted +static inline bool pmd_access_permitted(pmd_t pmd, bool write) +{ + return __pte_access_permitted(pmd_val(pmd), write); +} + +#define pud_access_permitted pud_access_permitted +static inline bool pud_access_permitted(pud_t pud, bool write) +{ + return __pte_access_permitted(pud_val(pud), write); +} + #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 9991224f6238..2160c1fee920 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,15 +14,17 @@ #include <linux/bitops.h> #include <linux/threads.h> +extern p4d_t level4_kernel_pgt[512]; +extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512]; -extern pgd_t init_level4_pgt[]; +extern pgd_t init_top_pgt[]; -#define swapper_pg_dir init_level4_pgt +#define swapper_pg_dir init_top_pgt extern void paging_init(void); @@ -227,6 +229,20 @@ extern void cleanup_highmap(void); extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); -#endif /* !__ASSEMBLY__ */ +#define gup_fast_permitted gup_fast_permitted +static inline bool gup_fast_permitted(unsigned long start, int nr_pages, + int write) +{ + unsigned long len, end; + + len = (unsigned long)nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + return false; + if (end >> __VIRTUAL_MASK_SHIFT) + return false; + return true; +} +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h deleted file mode 100644 index 0ff8fe71b255..000000000000 --- a/arch/x86/include/asm/pmem.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#ifndef __ASM_X86_PMEM_H__ -#define __ASM_X86_PMEM_H__ - -#include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/cpufeature.h> -#include <asm/special_insns.h> - -#ifdef CONFIG_ARCH_HAS_PMEM_API -/** - * arch_memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Copy data to persistent memory media via non-temporal stores so that - * a subsequent pmem driver flush operation will drain posted write queues. - */ -static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - int rem; - - /* - * We are copying between two kernel buffers, if - * __copy_from_user_inatomic_nocache() returns an error (page - * fault) we would have already reported a general protection fault - * before the WARN+BUG. - */ - rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); - if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", - __func__, dst, src, rem)) - BUG(); -} - -/** - * arch_wb_cache_pmem - write back a cache range with CLWB - * @vaddr: virtual start address - * @size: number of bytes to write back - * - * Write back a cache range using the CLWB (cache line write back) - * instruction. Note that @size is internally rounded up to be cache - * line size aligned. - */ -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ - u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; - unsigned long clflush_mask = x86_clflush_size - 1; - void *vend = addr + size; - void *p; - - for (p = (void *)((unsigned long)addr & ~clflush_mask); - p < vend; p += x86_clflush_size) - clwb(p); -} - -/** - * arch_copy_from_iter_pmem - copy data from an iterator to PMEM - * @addr: PMEM destination address - * @bytes: number of bytes to copy - * @i: iterator with source data - * - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - */ -static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - size_t len; - - /* TODO: skip the write-back by always using non-temporal stores */ - len = copy_from_iter_nocache(addr, bytes, i); - - /* - * In the iovec case on x86_64 copy_from_iter_nocache() uses - * non-temporal stores for the bulk of the transfer, but we need - * to manually flush if the transfer is unaligned. A cached - * memory copy is used when destination or size is not naturally - * aligned. That is: - * - Require 8-byte alignment when size is 8 bytes or larger. - * - Require 4-byte alignment when size is 4 bytes. - * - * In the non-iovec case the entire destination needs to be - * flushed. - */ - if (iter_is_iovec(i)) { - unsigned long flushed, dest = (unsigned long) addr; - - if (bytes < 8) { - if (!IS_ALIGNED(dest, 4) || (bytes != 4)) - arch_wb_cache_pmem(addr, bytes); - } else { - if (!IS_ALIGNED(dest, 8)) { - dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); - arch_wb_cache_pmem(addr, 1); - } - - flushed = dest - (unsigned long) addr; - if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8)) - arch_wb_cache_pmem(addr + bytes - 1, 1); - } - } else - arch_wb_cache_pmem(addr, bytes); - - return len; -} - -/** - * arch_clear_pmem - zero a PMEM memory range - * @addr: virtual start address - * @size: number of bytes to zero - * - * Write zeros into the memory range starting at 'addr' for 'size' bytes. - */ -static inline void arch_clear_pmem(void *addr, size_t size) -{ - memset(addr, 0, size); - arch_wb_cache_pmem(addr, size); -} - -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ - clflush_cache_range(addr, size); -} -#endif /* CONFIG_ARCH_HAS_PMEM_API */ -#endif /* __ASM_X86_PMEM_H__ */ diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 39fb618e2211..79aa2f98398d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -8,4 +8,40 @@ #else #define X86_VM_MASK 0 /* No VM86 support */ #endif + +/* + * CR3's layout varies depending on several things. + * + * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID. + * If PAE is enabled, then CR3[11:5] is part of the PDPT address + * (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored. + * Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and + * CR3[2:0] and CR3[11:5] are ignored. + * + * In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD. + * + * CR3[63] is always read as zero. If CR4.PCIDE is set, then CR3[63] may be + * written as 1 to prevent the write to CR3 from flushing the TLB. + * + * On systems with SME, one bit (in a variable position!) is stolen to indicate + * that the top-level paging structure is encrypted. + * + * All of the remaining bits indicate the physical address of the top-level + * paging structure. + * + * CR3_ADDR_MASK is the mask used by read_cr3_pa(). + */ +#ifdef CONFIG_X86_64 +/* Mask off the address space ID bits. */ +#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull +#define CR3_PCID_MASK 0xFFFull +#else +/* + * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save + * a tiny bit of code size by setting all the bits. + */ +#define CR3_ADDR_MASK 0xFFFFFFFFull +#define CR3_PCID_MASK 0ull +#endif + #endif /* _ASM_X86_PROCESSOR_FLAGS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3cada998a402..6a79547e8ee0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -231,6 +231,14 @@ native_cpuid_reg(ebx) native_cpuid_reg(ecx) native_cpuid_reg(edx) +/* + * Friendlier CR3 helpers. + */ +static inline unsigned long read_cr3_pa(void) +{ + return __read_cr3() & CR3_ADDR_MASK; +} + static inline void load_cr3(pgd_t *pgdir) { write_cr3(__pa(pgdir)); @@ -860,8 +868,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); @@ -901,8 +907,13 @@ static inline int mpx_disable_management(void) } #endif /* CONFIG_X86_INTEL_MPX */ +#ifdef CONFIG_CPU_SUP_AMD extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); +#else +static inline u16 amd_get_nb_id(int cpu) { return 0; } +static inline u32 amd_get_nodes_per_socket(void) { return 0; } +#endif static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) { diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ac1d5da14734..e4585a393965 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -44,7 +44,6 @@ extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); -extern void setup_default_timer_irq(void); #ifdef CONFIG_X86_INTEL_MID extern void x86_intel_mid_early_setup(void); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 12af3e35edfa..9efaabf5b54b 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -39,7 +39,7 @@ static inline void native_write_cr2(unsigned long val) asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); } -static inline unsigned long native_read_cr3(void) +static inline unsigned long __native_read_cr3(void) { unsigned long val; asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); @@ -159,9 +159,13 @@ static inline void write_cr2(unsigned long x) native_write_cr2(x); } -static inline unsigned long read_cr3(void) +/* + * Careful! CR3 contains more than just an address. You probably want + * read_cr3_pa() instead. + */ +static inline unsigned long __read_cr3(void) { - return native_read_cr3(); + return __native_read_cr3(); } static inline void write_cr3(unsigned long x) diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index dcbd9bcce714..8abedf1d650e 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -74,6 +74,7 @@ static __always_inline void boot_init_stack_canary(void) get_random_bytes(&canary, sizeof(canary)); tsc = rdtsc(); canary += tsc + (tsc << 32UL); + canary &= CANARY_MASK; current->stack_canary = canary; #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 3d3e8353ee5c..e9ee84873de5 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, } #define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #ifdef CONFIG_X86_USE_3DNOW #include <asm/mmx.h> @@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) #endif #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t n); +extern int memcmp(const void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #define memcmp __builtin_memcmp +#endif #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *cs, int c, size_t count); @@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #if (__GNUC__ >= 4) #define memset(s, c, count) __builtin_memset(s, c, count) #else @@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, (count)) \ : __memset((s), (c), (count))) #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ /* * find the first occurrence of byte 'c', or 1 past the area if none diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 733bae07fb29..2a8c822de1fc 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); +#ifndef CONFIG_FORTIFY_SOURCE #ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ @@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len); */ #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); @@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct); #define memcpy(dst, src, len) __memcpy(dst, src, len) #define memmove(dst, src, len) __memmove(dst, src, len) #define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 @@ -109,6 +116,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) return 0; } +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 +void memcpy_flushcache(void *dst, const void *src, size_t cnt); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 6136a18152af..2bd96b4df140 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -42,8 +42,7 @@ struct saved_context { set_debugreg((thread)->debugreg##register, register) /* routines for saving/restoring kernel state */ -extern int acpi_save_state_mem(void); -extern char core_restore_code; -extern char restore_registers; +extern char core_restore_code[]; +extern char restore_registers[]; #endif /* _ASM_X86_SUSPEND_64_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 14824fc78f7e..58fffe79e417 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 event_inj; u32 event_inj_err; u64 nested_cr3; - u64 lbr_ctl; + u64 virt_ext; u32 clean; u32 reserved_5; u64 next_rip; @@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) +#define LBR_CTL_ENABLE_MASK BIT_ULL(0) +#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) + #define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_IOIO_STR_SHIFT 2 diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 27e9f9d769b8..2016962103df 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -29,11 +29,9 @@ struct cyc2ns_data { u32 cyc2ns_mul; u32 cyc2ns_shift; u64 cyc2ns_offset; - u32 __count; - /* u32 hole */ -}; /* 24 bytes -- do not grow */ +}; /* 16 bytes */ -extern struct cyc2ns_data *cyc2ns_read_begin(void); -extern void cyc2ns_read_end(struct cyc2ns_data *); +extern void cyc2ns_read_begin(struct cyc2ns_data *); +extern void cyc2ns_read_end(void); #endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h new file mode 100644 index 000000000000..f4a6ff352a0e --- /dev/null +++ b/arch/x86/include/asm/tlbbatch.h @@ -0,0 +1,14 @@ +#ifndef _ARCH_X86_TLBBATCH_H +#define _ARCH_X86_TLBBATCH_H + +#include <linux/cpumask.h> + +struct arch_tlbflush_unmap_batch { + /* + * Each bit set is a CPU that potentially has a TLB entry for one of + * the PFNs being flushed.. + */ + struct cpumask cpumask; +}; + +#endif /* _ARCH_X86_TLBBATCH_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6ed9ea469b48..50ea3482e1d1 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,7 @@ #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> +#include <asm/smp.h> static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) @@ -65,10 +66,14 @@ static inline void invpcid_flush_all_nonglobals(void) #endif struct tlb_state { -#ifdef CONFIG_SMP - struct mm_struct *active_mm; + /* + * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts + * are on. This means that it may not match current->active_mm, + * which will contain the previous user mm when we're in lazy TLB + * mode even if we've already switched back to swapper_pg_dir. + */ + struct mm_struct *loaded_mm; int state; -#endif /* * Access to this CR4 shadow and to H/W CR4 is protected by @@ -151,7 +156,7 @@ static inline void __native_flush_tlb(void) * back: */ preempt_disable(); - native_write_cr3(native_read_cr3()); + native_write_cr3(__native_read_cr3()); preempt_enable(); } @@ -220,84 +225,16 @@ static inline void __flush_tlb_one(unsigned long addr) * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus + * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ - -#ifndef CONFIG_SMP - -/* "_up" is for UniProcessor. - * - * This is a helper for other header functions. *Not* intended to be called - * directly. All global TLB flushes need to either call this, or to bump the - * vm statistics themselves. - */ -static inline void __flush_tlb_up(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); -} - -static inline void flush_tlb_all(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb_all(); -} - -static inline void local_flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_mm_range(struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned long vmflag) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ -} - -static inline void reset_lazy_tlbstate(void) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#else /* SMP */ - -#include <asm/smp.h> +struct flush_tlb_info { + struct mm_struct *mm; + unsigned long start; + unsigned long end; +}; #define local_flush_tlb() __flush_tlb() @@ -307,29 +244,32 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags) extern void flush_tlb_all(void); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) +{ + flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE); +} + void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, unsigned long end); + const struct flush_tlb_info *info); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -static inline void reset_lazy_tlbstate(void) +static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm) { - this_cpu_write(cpu_tlbstate.state, 0); - this_cpu_write(cpu_tlbstate.active_mm, &init_mm); + cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); } -#endif /* SMP */ +extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, start, end) \ - native_flush_tlb_others(mask, mm, start, end) +#define flush_tlb_others(mask, info) \ + native_flush_tlb_others(mask, info) #endif #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index fd91722315c8..30269dafec47 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -319,10 +319,10 @@ do { \ #define __get_user_asm_u64(x, ptr, retval, errret) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - asm volatile(ASM_STAC "\n" \ + asm volatile("\n" \ "1: movl %2,%%eax\n" \ "2: movl %3,%%edx\n" \ - "3: " ASM_CLAC "\n" \ + "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: mov %4,%0\n" \ " xorl %%eax,%%eax\n" \ @@ -331,7 +331,7 @@ do { \ ".previous\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - : "=r" (retval), "=A"(x) \ + : "=r" (retval), "=&A"(x) \ : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ }) @@ -562,7 +562,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n); extern __must_check long strncpy_from_user(char *dst, const char __user *src, long count); -extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); @@ -700,14 +699,15 @@ extern struct movsl_mask { #define unsafe_put_user(x, ptr, err_label) \ do { \ int __pu_err; \ - __put_user_size((x), (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __put_user_size(__pu_val, (ptr), sizeof(*(ptr)), __pu_err, -EFAULT); \ if (unlikely(__pu_err)) goto err_label; \ } while (0) #define unsafe_get_user(x, ptr, err_label) \ do { \ int __gu_err; \ - unsigned long __gu_val; \ + __inttype(*(ptr)) __gu_val; \ __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ if (unlikely(__gu_err)) goto err_label; \ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c5504b9a472e..b16f6a1d8b26 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); +extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); +extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, + size_t len); + static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) @@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src, return __copy_user_nocache(dst, src, size, 0); } +static inline int +__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) +{ + kasan_check_write(dst, size); + return __copy_user_flushcache(dst, src, size); +} + unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6686820feae9..b5a32231abd8 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H +#include <asm/tlbflush.h> + enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; struct cpumask; @@ -15,10 +17,7 @@ extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - unsigned int cpu); + const struct flush_tlb_info *info); #else /* X86_UV */ @@ -28,8 +27,8 @@ static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned int cpu) +uv_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) { return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index f6d20f6cca12..11071fcd630e 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -43,6 +43,7 @@ #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/smap.h> #include <xen/interface/xen.h> #include <xen/interface/sched.h> @@ -50,6 +51,8 @@ #include <xen/interface/platform.h> #include <xen/interface/xen-mca.h> +struct xen_dm_op_buf; + /* * The hypercall asms have to meet several constraints: * - Work on 32- and 64-bit. @@ -214,10 +217,12 @@ privcmd_call(unsigned call, __HYPERCALL_DECLS; __HYPERCALL_5ARG(a1, a2, a3, a4, a5); + stac(); asm volatile("call *%[call]" : __HYPERCALL_5PARAM : [call] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); + clac(); return (long)__res; } @@ -474,9 +479,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg) static inline int HYPERVISOR_dm_op( - domid_t dom, unsigned int nr_bufs, void *bufs) + domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs) { - return _hypercall3(int, dm_op, dom, nr_bufs, bufs); + int ret; + stac(); + ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs); + clac(); + return ret; } static inline void |