diff options
Diffstat (limited to 'arch/x86/include/asm')
47 files changed, 961 insertions, 469 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 3a45668f6dc3..94c18ebfd68c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -32,6 +32,10 @@ #include <asm/mpspec.h> #include <asm/realmode.h> +#ifdef CONFIG_ACPI_APEI +# include <asm/pgtable_types.h> +#endif + #ifdef CONFIG_ACPI extern int acpi_lapic; extern int acpi_ioapic; @@ -147,4 +151,23 @@ extern int x86_acpi_numa_init(void); #define acpi_unlazy_tlb(x) leave_mm(x) +#ifdef CONFIG_ACPI_APEI +static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +{ + /* + * We currently have no way to look up the EFI memory map + * attributes for a region in a consistent way, because the + * memmap is discarded after efi_free_boot_services(). So if + * you call efi_mem_attributes() during boot and at runtime, + * you could theoretically see different attributes. + * + * Since we are yet to see any x86 platforms that require + * anything other than PAGE_KERNEL (some arm64 platforms + * require the equivalent of PAGE_KERNEL_NOCACHE), return that + * until we know differently. + */ + return PAGE_KERNEL; +} +#endif + #endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 1a5da2e63aee..3c56ef1ae068 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } -static inline u16 amd_get_node_id(struct pci_dev *pdev) +static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) { struct pci_dev *misc; int i; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index ebf6d5e5668c..a30316bf801a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void) return msr & X2APIC_ENABLE; } +extern void enable_IR_x2apic(void); + +extern int get_physical_broadcast(void); + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void lapic_shutdown(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void setup_local_APIC(void); +extern void init_apic_mappings(void); +void register_lapic_address(unsigned long address); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern int APIC_init_uniprocessor(void); + +#ifdef CONFIG_X86_64 +static inline int apic_force_enable(unsigned long addr) +{ + return -1; +} +#else +extern int apic_force_enable(unsigned long addr); +#endif + +extern int apic_bsp_setup(bool upmode); +extern void apic_ap_setup(void); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} +#endif + +extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } +# define setup_boot_APIC_clock x86_init_noop +# define setup_secondary_APIC_clock x86_init_noop +#endif /* !CONFIG_X86_LOCAL_APIC */ + #ifdef CONFIG_X86_X2APIC /* * Make previous memory operations globally visible before @@ -186,67 +239,14 @@ static inline int x2apic_enabled(void) } #define x2apic_supported() (cpu_has_x2apic) -#else +#else /* !CONFIG_X86_X2APIC */ static inline void check_x2apic(void) { } static inline void x2apic_setup(void) { } static inline int x2apic_enabled(void) { return 0; } #define x2apic_mode (0) #define x2apic_supported() (0) -#endif - -extern void enable_IR_x2apic(void); - -extern int get_physical_broadcast(void); - -extern int lapic_get_maxlvt(void); -extern void clear_local_APIC(void); -extern void disconnect_bsp_APIC(int virt_wire_setup); -extern void disable_local_APIC(void); -extern void lapic_shutdown(void); -extern void sync_Arb_IDs(void); -extern void init_bsp_APIC(void); -extern void setup_local_APIC(void); -extern void init_apic_mappings(void); -void register_lapic_address(unsigned long address); -extern void setup_boot_APIC_clock(void); -extern void setup_secondary_APIC_clock(void); -extern int APIC_init_uniprocessor(void); - -#ifdef CONFIG_X86_64 -static inline int apic_force_enable(unsigned long addr) -{ - return -1; -} -#else -extern int apic_force_enable(unsigned long addr); -#endif - -extern int apic_bsp_setup(bool upmode); -extern void apic_ap_setup(void); - -/* - * On 32bit this is mach-xxx local - */ -#ifdef CONFIG_X86_64 -extern int apic_is_clustered_box(void); -#else -static inline int apic_is_clustered_box(void) -{ - return 0; -} -#endif - -extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); - -#else /* !CONFIG_X86_LOCAL_APIC */ -static inline void lapic_shutdown(void) { } -#define local_apic_timer_c2_ok 1 -static inline void init_apic_mappings(void) { } -static inline void disable_local_APIC(void) { } -# define setup_boot_APIC_clock x86_init_noop -# define setup_secondary_APIC_clock x86_init_noop -#endif /* !CONFIG_X86_LOCAL_APIC */ +#endif /* !CONFIG_X86_X2APIC */ #ifdef CONFIG_X86_64 #define SET_APIC_ID(x) (apic->set_apic_id(x)) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index e9168955c42f..ae5fb83e6d91 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -24,7 +24,7 @@ */ static __always_inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** @@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v) */ static __always_inline void atomic_set(atomic_t *v, int i) { - v->counter = i; + WRITE_ONCE(v->counter, i); } /** @@ -182,6 +182,21 @@ static inline int atomic_xchg(atomic_t *v, int new) return xchg(&v->counter, new); } +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + asm volatile(LOCK_PREFIX #op"l %1,%0" \ + : "+m" (v->counter) \ + : "ir" (i) \ + : "memory"); \ +} + +ATOMIC_OP(and) +ATOMIC_OP(or) +ATOMIC_OP(xor) + +#undef ATOMIC_OP + /** * __atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t @@ -219,16 +234,6 @@ static __always_inline short int atomic_inc_short(short int *v) return *v; } -/* These are x86-specific, used by some header files */ -#define atomic_clear_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "andl %0,%1" \ - : : "r" (~(mask)), "m" (*(addr)) : "memory") - -#define atomic_set_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "orl %0,%1" \ - : : "r" ((unsigned)(mask)), "m" (*(addr)) \ - : "memory") - #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index b154de75c90c..a11c30b77fb5 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -313,4 +313,18 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 +#define ATOMIC64_OP(op, c_op) \ +static inline void atomic64_##op(long long i, atomic64_t *v) \ +{ \ + long long old, c = 0; \ + while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \ + c = old; \ +} + +ATOMIC64_OP(and, &) +ATOMIC64_OP(or, |) +ATOMIC64_OP(xor, ^) + +#undef ATOMIC64_OP + #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index b965f9e03f2a..037351022f54 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -18,7 +18,7 @@ */ static inline long atomic64_read(const atomic64_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** @@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v) */ static inline void atomic64_set(atomic64_t *v, long i) { - v->counter = i; + WRITE_ONCE(v->counter, i); } /** @@ -220,4 +220,19 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) return dec; } +#define ATOMIC64_OP(op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + asm volatile(LOCK_PREFIX #op"q %1,%0" \ + : "+m" (v->counter) \ + : "er" (i) \ + : "memory"); \ +} + +ATOMIC64_OP(and) +ATOMIC64_OP(or) +ATOMIC64_OP(xor) + +#undef ATOMIC64_OP + #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 818cb8788225..0681d2532527 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -57,12 +57,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ @@ -74,12 +74,12 @@ do { \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 9bf3ea14b9f0..e63aa38e85fb 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages); void clflush_cache_range(void *addr, unsigned int size); +#define mmio_flush_range(addr, size) clflush_cache_range(addr, size) + #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); extern const int rodata_test_data; @@ -109,75 +111,4 @@ static inline int rodata_test(void) } #endif -#ifdef ARCH_HAS_NOCACHE_UACCESS - -/** - * arch_memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Copy data to persistent memory media via non-temporal stores so that - * a subsequent arch_wmb_pmem() can flush cpu and memory controller - * write buffers to guarantee durability. - */ -static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, - size_t n) -{ - int unwritten; - - /* - * We are copying between two kernel buffers, if - * __copy_from_user_inatomic_nocache() returns an error (page - * fault) we would have already reported a general protection fault - * before the WARN+BUG. - */ - unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, - (void __user *) src, n); - if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", - __func__, dst, src, unwritten)) - BUG(); -} - -/** - * arch_wmb_pmem - synchronize writes to persistent memory - * - * After a series of arch_memcpy_to_pmem() operations this drains data - * from cpu write buffers and any platform (memory controller) buffers - * to ensure that written data is durable on persistent memory media. - */ -static inline void arch_wmb_pmem(void) -{ - /* - * wmb() to 'sfence' all previous writes such that they are - * architecturally visible to 'pcommit'. Note, that we've - * already arranged for pmem writes to avoid the cache via - * arch_memcpy_to_pmem(). - */ - wmb(); - pcommit_sfence(); -} - -static inline bool __arch_has_wmb_pmem(void) -{ -#ifdef CONFIG_X86_64 - /* - * We require that wmb() be an 'sfence', that is only guaranteed on - * 64-bit builds - */ - return static_cpu_has(X86_FEATURE_PCOMMIT); -#else - return false; -#endif -} -#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */ -extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n); -extern void arch_wmb_pmem(void); - -static inline bool __arch_has_wmb_pmem(void) -{ - return false; -} -#endif - #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 477fc28050e4..e4f8010f22e0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/disabled-features.h> #endif -#define NCAPINTS 13 /* N 32-bit words worth of info */ +#define NCAPINTS 14 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -193,7 +193,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ -#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ +#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ #define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ @@ -241,6 +241,7 @@ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ @@ -254,6 +255,9 @@ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1f5b7287d1ad..953b7263f844 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -12,7 +12,6 @@ #include <linux/dma-attrs.h> #include <asm/io.h> #include <asm/swiotlb.h> -#include <asm-generic/dma-coherent.h> #include <linux/dma-contiguous.h> #ifdef CONFIG_ISA @@ -41,24 +40,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) #endif } -#include <asm-generic/dma-mapping-common.h> - -/* Make sure we keep the same behaviour */ -static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - debug_dma_mapping_error(dev, dma_addr); - if (ops->mapping_error) - return ops->mapping_error(dev, dma_addr); - - return (dma_addr == DMA_ERROR_CODE); -} - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) +bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); +#define arch_dma_alloc_attrs arch_dma_alloc_attrs +#define HAVE_ARCH_DMA_SUPPORTED 1 extern int dma_supported(struct device *hwdev, u64 mask); -extern int dma_set_mask(struct device *dev, u64 mask); + +#include <asm-generic/dma-mapping-common.h> extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, @@ -125,16 +113,4 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) return gfp; } -#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) - -void * -dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, struct dma_attrs *attrs); - -#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) - -void dma_free_attrs(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus, - struct dma_attrs *attrs); - #endif diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h new file mode 100644 index 000000000000..b7a1ab865d68 --- /dev/null +++ b/arch/x86/include/asm/dwarf2.h @@ -0,0 +1,84 @@ +#ifndef _ASM_X86_DWARF2_H +#define _ASM_X86_DWARF2_H + +#ifndef __ASSEMBLY__ +#warning "asm/dwarf2.h should be only included in pure assembly files" +#endif + +/* + * Macros for dwarf2 CFI unwind table entries. + * See "as.info" for details on these pseudo ops. Unfortunately + * they are only supported in very new binutils, so define them + * away for older version. + */ + +#ifdef CONFIG_AS_CFI + +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined +#define CFI_ESCAPE .cfi_escape + +#ifdef CONFIG_AS_CFI_SIGNAL_FRAME +#define CFI_SIGNAL_FRAME .cfi_signal_frame +#else +#define CFI_SIGNAL_FRAME +#endif + +#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) +#ifndef BUILD_VDSO + /* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * The latter we currently just discard since we don't do DWARF + * unwinding at runtime. So only the offline DWARF information is + * useful to anyone. Note we should not use this directive if + * vmlinux.lds.S gets changed so it doesn't discard .eh_frame. + */ + .cfi_sections .debug_frame +#else + /* + * For the vDSO, emit both runtime unwind information and debug + * symbols for the .dbg file. + */ + .cfi_sections .eh_frame, .debug_frame +#endif +#endif + +#else + +/* + * Due to the structure of pre-exisiting code, don't use assembler line + * comment character # to ignore the arguments. Instead, use a dummy macro. + */ +.macro cfi_ignore a=0, b=0, c=0, d=0 +.endm + +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore +#define CFI_DEF_CFA_REGISTER cfi_ignore +#define CFI_DEF_CFA_OFFSET cfi_ignore +#define CFI_ADJUST_CFA_OFFSET cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_ESCAPE cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore + +#endif + +#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 155162ea0e00..0010c78c4998 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,18 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN +/* + * CONFIG_KASAN may redefine memset to __memset. __memset function is present + * only in kernel binary. Since the EFI stub linked into a separate binary it + * doesn't have __memset(). So we should use standard memset from + * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove. + */ +#undef memcpy +#undef memset +#undef memmove +#endif + #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; @@ -93,6 +105,7 @@ extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); extern pgd_t * __init efi_call_phys_prolog(void); extern void __init efi_call_phys_epilog(pgd_t *save_pgd); +extern void __init efi_print_memmap(void); extern void __init efi_unmap_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 141c561f4664..1514753fd435 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -171,11 +171,11 @@ do { \ static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) { - /* Commented-out registers are cleared in stub_execve */ - /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0; - regs->si = regs->di /*= regs->bp*/ = 0; + /* ax gets execve's return value. */ + /*regs->ax = */ regs->bx = regs->cx = regs->dx = 0; + regs->si = regs->di = regs->bp = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; - /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/ + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; t->fs = t->gs = 0; t->fsindex = t->gsindex = 0; t->ds = t->es = ds; @@ -328,7 +328,7 @@ else \ #define VDSO_ENTRY \ ((unsigned long)current->mm->context.vdso + \ - selected_vdso32->sym___kernel_vsyscall) + vdso_image_32.sym___kernel_vsyscall) struct linux_binprm; diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c49c5173158e..1c6f6ac52ad0 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -95,63 +95,122 @@ struct swregs_state { /* * List of XSAVE features Linux knows about: */ -enum xfeature_bit { - XSTATE_BIT_FP, - XSTATE_BIT_SSE, - XSTATE_BIT_YMM, - XSTATE_BIT_BNDREGS, - XSTATE_BIT_BNDCSR, - XSTATE_BIT_OPMASK, - XSTATE_BIT_ZMM_Hi256, - XSTATE_BIT_Hi16_ZMM, - - XFEATURES_NR_MAX, +enum xfeature { + XFEATURE_FP, + XFEATURE_SSE, + /* + * Values above here are "legacy states". + * Those below are "extended states". + */ + XFEATURE_YMM, + XFEATURE_BNDREGS, + XFEATURE_BNDCSR, + XFEATURE_OPMASK, + XFEATURE_ZMM_Hi256, + XFEATURE_Hi16_ZMM, + + XFEATURE_MAX, }; -#define XSTATE_FP (1 << XSTATE_BIT_FP) -#define XSTATE_SSE (1 << XSTATE_BIT_SSE) -#define XSTATE_YMM (1 << XSTATE_BIT_YMM) -#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) -#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) -#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) -#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) -#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) +#define XFEATURE_MASK_FP (1 << XFEATURE_FP) +#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) +#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) +#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) +#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) +#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) +#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) +#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) + +#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) +#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ + | XFEATURE_MASK_ZMM_Hi256 \ + | XFEATURE_MASK_Hi16_ZMM) + +#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) -#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +struct reg_128_bit { + u8 regbytes[128/8]; +}; +struct reg_256_bit { + u8 regbytes[256/8]; +}; +struct reg_512_bit { + u8 regbytes[512/8]; +}; /* + * State component 2: + * * There are 16x 256-bit AVX registers named YMM0-YMM15. * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) - * and are stored in 'struct fxregs_state::xmm_space[]'. + * and are stored in 'struct fxregs_state::xmm_space[]' in the + * "legacy" area. * - * The high 128 bits are stored here: - * 16x 128 bits == 256 bytes. + * The high 128 bits are stored here. */ struct ymmh_struct { - u8 ymmh_space[256]; -}; - -/* We don't support LWP yet: */ -struct lwp_struct { - u8 reserved[128]; -}; + struct reg_128_bit hi_ymm[16]; +} __packed; /* Intel MPX support: */ -struct bndreg { + +struct mpx_bndreg { u64 lower_bound; u64 upper_bound; } __packed; +/* + * State component 3 is used for the 4 128-bit bounds registers + */ +struct mpx_bndreg_state { + struct mpx_bndreg bndreg[4]; +} __packed; -struct bndcsr { +/* + * State component 4 is used for the 64-bit user-mode MPX + * configuration register BNDCFGU and the 64-bit MPX status + * register BNDSTATUS. We call the pair "BNDCSR". + */ +struct mpx_bndcsr { u64 bndcfgu; u64 bndstatus; } __packed; -struct mpx_struct { - struct bndreg bndreg[4]; - struct bndcsr bndcsr; -}; +/* + * The BNDCSR state is padded out to be 64-bytes in size. + */ +struct mpx_bndcsr_state { + union { + struct mpx_bndcsr bndcsr; + u8 pad_to_64_bytes[64]; + }; +} __packed; + +/* AVX-512 Components: */ + +/* + * State component 5 is used for the 8 64-bit opmask registers + * k0-k7 (opmask state). + */ +struct avx_512_opmask_state { + u64 opmask_reg[8]; +} __packed; + +/* + * State component 6 is used for the upper 256 bits of the + * registers ZMM0-ZMM15. These 16 256-bit values are denoted + * ZMM0_H-ZMM15_H (ZMM_Hi256 state). + */ +struct avx_512_zmm_uppers_state { + struct reg_256_bit zmm_upper[16]; +} __packed; + +/* + * State component 7 is used for the 16 512-bit registers + * ZMM16-ZMM31 (Hi16_ZMM state). + */ +struct avx_512_hi16_state { + struct reg_512_bit hi16_zmm[16]; +} __packed; struct xstate_header { u64 xfeatures; @@ -159,22 +218,19 @@ struct xstate_header { u64 reserved[6]; } __attribute__((packed)); -/* New processor state extensions should be added here: */ -#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ - sizeof(struct lwp_struct) + \ - sizeof(struct mpx_struct) ) /* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. * * It consists of a legacy fxregs portion, an xstate header and - * subsequent fixed size areas as defined by the xstate header. - * Not all CPUs support all the extensions. + * subsequent areas as defined by the xstate header. Not all CPUs + * support all the extensions, so the size of the extended area + * can vary quite a bit between CPUs. */ struct xregs_state { struct fxregs_state i387; struct xstate_header header; - u8 __reserved[XSTATE_RESERVE]; + u8 extended_state_area[0]; } __attribute__ ((packed, aligned (64))); /* @@ -182,7 +238,9 @@ struct xregs_state { * put together, so that we can pick the right one runtime. * * The size of the structure is determined by the largest - * member - which is the xsave area: + * member - which is the xsave area. The padding is there + * to ensure that statically-allocated task_structs (just + * the init_task today) have enough space. */ union fpregs_state { struct fregs_state fsave; diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 4656b25bb9a7..3a6c89b70307 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -6,7 +6,7 @@ #include <linux/uaccess.h> /* Bit 63 of XCR0 is reserved for future expansion */ -#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) +#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) #define XSTATE_CPUID 0x0000000d @@ -19,14 +19,18 @@ #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) /* Supported features which support lazy state saving */ -#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM) /* Supported features which require eager state saving */ -#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) /* All currently supported features */ -#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) +#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " @@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); +void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index f45acad3c4b6..24938852db30 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,9 +3,9 @@ #ifdef CONFIG_FUNCTION_TRACER #ifdef CC_USING_FENTRY -# define MCOUNT_ADDR ((long)(__fentry__)) +# define MCOUNT_ADDR ((unsigned long)(__fentry__)) #else -# define MCOUNT_ADDR ((long)(mcount)) +# define MCOUNT_ADDR ((unsigned long)(mcount)) #endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 5fa9fb0f8809..cc285ec4b2c1 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -63,10 +63,10 @@ /* hpet memory map physical address */ extern unsigned long hpet_address; extern unsigned long force_hpet_address; -extern int boot_hpet_disable; +extern bool boot_hpet_disable; extern u8 hpet_blockid; -extern int hpet_force_user; -extern u8 hpet_msi_disable; +extern bool hpet_force_user; +extern bool hpet_msi_disable; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7cfc085b6879..de25aad07853 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -250,12 +250,6 @@ static inline void flush_write_buffers(void) #endif } -static inline void __pmem *arch_memremap_pmem(resource_size_t offset, - unsigned long size) -{ - return (void __force __pmem *) ioremap_cache(offset, size); -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a4c1cf7e93f8..5daeca3d0f9e 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -16,15 +16,32 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (key), "i" (branch) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" + "2:\n\t" + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" + ".popsection \n\t" + : : "i" (key), "i" (branch) : : l_yes); + return false; l_yes: return true; diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 32ce71375b21..e5f5dc9787d5 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -29,11 +29,5 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs, extern void __show_regs(struct pt_regs *regs, int all); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -#ifdef CONFIG_KEXEC -extern int in_crash_kexec; -#else -/* no crash dump is ever in progress if no crash kernel can be kexec'd */ -#define in_crash_kexec 0 -#endif #endif /* _ASM_X86_KDEBUG_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c12e845f59e6..3a36ee704c30 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -40,6 +40,7 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS @@ -711,6 +712,7 @@ struct kvm_vcpu_stat { u32 nmi_window_exits; u32 halt_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 request_irq_exits; u32 irq_exits; @@ -1224,10 +1226,8 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); int kvm_is_in_guest(void); -int __x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); -int x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); +int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); +int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 2dbc0bf2b9f3..2ea4527e462f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -123,19 +123,27 @@ struct mca_config { }; struct mce_vendor_flags { - /* - * overflow recovery cpuid bit indicates that overflow - * conditions are not fatal - */ - __u64 overflow_recov : 1, - - /* - * SUCCOR stands for S/W UnCorrectable error COntainment - * and Recovery. It indicates support for data poisoning - * in HW and deferred error interrupts. - */ - succor : 1, - __reserved_0 : 62; + /* + * Indicates that overflow conditions are not fatal, when set. + */ + __u64 overflow_recov : 1, + + /* + * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and + * Recovery. It indicates support for data poisoning in HW and deferred + * error interrupts. + */ + succor : 1, + + /* + * (AMD) SMCA: This bit indicates support for Scalable MCA which expands + * the register space for each MCA bank and also increases number of + * banks. Also, to accommodate the new banks and registers, the MCA + * register space is moved to a new MSR range. + */ + smca : 1, + + __reserved_0 : 61; }; extern struct mce_vendor_flags mce_flags; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 9e6278c7140e..34e62b1dcfce 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -27,7 +27,6 @@ struct cpu_signature { struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; -extern bool dis_ucode_ldr; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -55,6 +54,12 @@ struct ucode_cpu_info { }; extern struct ucode_cpu_info ucode_cpu_info[]; +#ifdef CONFIG_MICROCODE +int __init microcode_init(void); +#else +static inline int __init microcode_init(void) { return 0; }; +#endif + #ifdef CONFIG_MICROCODE_INTEL extern struct microcode_ops * __init init_intel_microcode(void); #else @@ -75,7 +80,6 @@ static inline struct microcode_ops * __init init_amd_microcode(void) static inline void __exit exit_amd_microcode(void) {} #endif -#ifdef CONFIG_MICROCODE_EARLY #define MAX_UCODE_COUNT 128 #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) @@ -150,22 +154,18 @@ static inline unsigned int x86_model(unsigned int sig) return model; } +#ifdef CONFIG_MICROCODE extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); extern int __init save_microcode_in_initrd(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); #else -static inline void __init load_ucode_bsp(void) {} -static inline void load_ucode_ap(void) {} -static inline int __init save_microcode_in_initrd(void) -{ - return 0; -} -static inline void reload_early_microcode(void) {} -static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) -{ - return false; -} +static inline void __init load_ucode_bsp(void) { } +static inline void load_ucode_ap(void) { } +static inline int __init save_microcode_in_initrd(void) { return 0; } +static inline void reload_early_microcode(void) { } +static inline bool +get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index ac6d328977a6..adfc847a395e 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -64,7 +64,7 @@ extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, s #define PATCH_MAX_SIZE PAGE_SIZE extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; -#ifdef CONFIG_MICROCODE_AMD_EARLY +#ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); extern void load_ucode_amd_ap(void); extern int __init save_microcode_in_initrd_amd(void); @@ -76,4 +76,5 @@ static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; } void reload_ucode_amd(void) {} #endif +extern bool check_current_patch_level(u32 *rev, bool early); #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 7991c606125d..8559b0102ea1 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -57,7 +57,7 @@ extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev); extern int microcode_sanity_check(void *mc, int print_err); extern int find_matching_signature(void *mc, unsigned int csig, int cpf); -#ifdef CONFIG_MICROCODE_INTEL_EARLY +#ifdef CONFIG_MICROCODE_INTEL extern void __init load_ucode_intel_bsp(void); extern void load_ucode_intel_ap(void); extern void show_ucode_info_early(void); @@ -71,13 +71,9 @@ static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; static inline void reload_ucode_intel(void) {} #endif -#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +#ifdef CONFIG_HOTPLUG_CPU extern int save_mc_for_early(u8 *mc); #else -static inline int save_mc_for_early(u8 *mc) -{ - return 0; -} +static inline int save_mc_for_early(u8 *mc) { return 0; } #endif - #endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fcd17c1fc0c6..b8c14bb7fc8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -141,6 +141,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_PEBS_FRONTEND 0x000003f7 + #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_MC0_CTL 0x00000400 @@ -184,6 +186,12 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 #define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A @@ -325,6 +333,7 @@ /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 #define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h index 1c6f7f6212c1..c64373a2d731 100644 --- a/arch/x86/include/asm/numachip/numachip.h +++ b/arch/x86/include/asm/numachip/numachip.h @@ -14,6 +14,7 @@ #ifndef _ASM_X86_NUMACHIP_NUMACHIP_H #define _ASM_X86_NUMACHIP_NUMACHIP_H +extern u8 numachip_system; extern int __init pci_numachip_init(void); #endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h index 660f843df928..29719eecdc2e 100644 --- a/arch/x86/include/asm/numachip/numachip_csr.h +++ b/arch/x86/include/asm/numachip/numachip_csr.h @@ -14,12 +14,8 @@ #ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H #define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H -#include <linux/numa.h> -#include <linux/percpu.h> +#include <linux/smp.h> #include <linux/io.h> -#include <linux/swab.h> -#include <asm/types.h> -#include <asm/processor.h> #define CSR_NODE_SHIFT 16 #define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT) @@ -27,11 +23,8 @@ /* 32K CSR space, b15 indicates geo/non-geo */ #define CSR_OFFSET_MASK 0x7fffUL - -/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */ -#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL -#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL -#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1) +#define CSR_G0_NODE_IDS (0x008 + (0 << 12)) +#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12)) /* * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however @@ -41,12 +34,7 @@ #define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL #define NUMACHIP_LCSR_LIM 0x3fffffffffffULL #define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1) - -static inline void *gcsr_address(int node, unsigned long offset) -{ - return __va(NUMACHIP_GCSR_BASE | (1UL << 15) | - CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK)); -} +#define NUMACHIP_LAPIC_BITS 8 static inline void *lcsr_address(unsigned long offset) { @@ -54,114 +42,57 @@ static inline void *lcsr_address(unsigned long offset) CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK)); } -static inline unsigned int read_gcsr(int node, unsigned long offset) +static inline unsigned int read_lcsr(unsigned long offset) { - return swab32(readl(gcsr_address(node, offset))); + return swab32(readl(lcsr_address(offset))); } -static inline void write_gcsr(int node, unsigned long offset, unsigned int val) +static inline void write_lcsr(unsigned long offset, unsigned int val) { - writel(swab32(val), gcsr_address(node, offset)); + writel(swab32(val), lcsr_address(offset)); } -static inline unsigned int read_lcsr(unsigned long offset) +/* + * On NumaChip2, local CSR space is 16MB and starts at fixed offset below 4G + */ + +#define NUMACHIP2_LCSR_BASE 0xf0000000UL +#define NUMACHIP2_LCSR_SIZE 0x1000000UL +#define NUMACHIP2_APIC_ICR 0x100000 +#define NUMACHIP2_TIMER_DEADLINE 0x200000 +#define NUMACHIP2_TIMER_INT 0x200008 +#define NUMACHIP2_TIMER_NOW 0x200018 +#define NUMACHIP2_TIMER_RESET 0x200020 + +static inline void __iomem *numachip2_lcsr_address(unsigned long offset) { - return swab32(readl(lcsr_address(offset))); + return (void __iomem *)__va(NUMACHIP2_LCSR_BASE | + (offset & (NUMACHIP2_LCSR_SIZE - 1))); } -static inline void write_lcsr(unsigned long offset, unsigned int val) +static inline u32 numachip2_read32_lcsr(unsigned long offset) { - writel(swab32(val), lcsr_address(offset)); + return readl(numachip2_lcsr_address(offset)); } -/* ========================================================================= */ -/* CSR_G0_STATE_CLEAR */ -/* ========================================================================= */ - -#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12)) -union numachip_csr_g0_state_clear { - unsigned int v; - struct numachip_csr_g0_state_clear_s { - unsigned int _state:2; - unsigned int _rsvd_2_6:5; - unsigned int _lost:1; - unsigned int _rsvd_8_31:24; - } s; -}; - -/* ========================================================================= */ -/* CSR_G0_NODE_IDS */ -/* ========================================================================= */ +static inline u64 numachip2_read64_lcsr(unsigned long offset) +{ + return readq(numachip2_lcsr_address(offset)); +} -#define CSR_G0_NODE_IDS (0x008 + (0 << 12)) -union numachip_csr_g0_node_ids { - unsigned int v; - struct numachip_csr_g0_node_ids_s { - unsigned int _initialid:16; - unsigned int _nodeid:12; - unsigned int _rsvd_28_31:4; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_EXT_IRQ_GEN */ -/* ========================================================================= */ +static inline void numachip2_write32_lcsr(unsigned long offset, u32 val) +{ + writel(val, numachip2_lcsr_address(offset)); +} -#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12)) -union numachip_csr_g3_ext_irq_gen { - unsigned int v; - struct numachip_csr_g3_ext_irq_gen_s { - unsigned int _vector:8; - unsigned int _msgtype:3; - unsigned int _index:5; - unsigned int _destination_apic_id:16; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_EXT_IRQ_STATUS */ -/* ========================================================================= */ - -#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12)) -union numachip_csr_g3_ext_irq_status { - unsigned int v; - struct numachip_csr_g3_ext_irq_status_s { - unsigned int _result:32; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_EXT_IRQ_DEST */ -/* ========================================================================= */ - -#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12)) -union numachip_csr_g3_ext_irq_dest { - unsigned int v; - struct numachip_csr_g3_ext_irq_dest_s { - unsigned int _irq:8; - unsigned int _rsvd_8_31:24; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_NC_ATT_MAP_SELECT */ -/* ========================================================================= */ - -#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12)) -union numachip_csr_g3_nc_att_map_select { - unsigned int v; - struct numachip_csr_g3_nc_att_map_select_s { - unsigned int _upper_address_bits:4; - unsigned int _select_ram:4; - unsigned int _rsvd_8_31:24; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */ -/* ========================================================================= */ - -#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12)) +static inline void numachip2_write64_lcsr(unsigned long offset, u64 val) +{ + writeq(val, numachip2_lcsr_address(offset)); +} -#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */ +static inline unsigned int numachip2_timer(void) +{ + return (smp_processor_id() % 48) << 6; +} +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4fa7c6..31247b5bff7c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -97,7 +97,6 @@ struct pv_lazy_ops { struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); - unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h new file mode 100644 index 000000000000..d8ce3ec816ab --- /dev/null +++ b/arch/x86/include/asm/pmem.h @@ -0,0 +1,153 @@ +/* + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ASM_X86_PMEM_H__ +#define __ASM_X86_PMEM_H__ + +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/special_insns.h> + +#ifdef CONFIG_ARCH_HAS_PMEM_API +/** + * arch_memcpy_to_pmem - copy data to persistent memory + * @dst: destination buffer for the copy + * @src: source buffer for the copy + * @n: length of the copy in bytes + * + * Copy data to persistent memory media via non-temporal stores so that + * a subsequent arch_wmb_pmem() can flush cpu and memory controller + * write buffers to guarantee durability. + */ +static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, + size_t n) +{ + int unwritten; + + /* + * We are copying between two kernel buffers, if + * __copy_from_user_inatomic_nocache() returns an error (page + * fault) we would have already reported a general protection fault + * before the WARN+BUG. + */ + unwritten = __copy_from_user_inatomic_nocache((void __force *) dst, + (void __user *) src, n); + if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n", + __func__, dst, src, unwritten)) + BUG(); +} + +/** + * arch_wmb_pmem - synchronize writes to persistent memory + * + * After a series of arch_memcpy_to_pmem() operations this drains data + * from cpu write buffers and any platform (memory controller) buffers + * to ensure that written data is durable on persistent memory media. + */ +static inline void arch_wmb_pmem(void) +{ + /* + * wmb() to 'sfence' all previous writes such that they are + * architecturally visible to 'pcommit'. Note, that we've + * already arranged for pmem writes to avoid the cache via + * arch_memcpy_to_pmem(). + */ + wmb(); + pcommit_sfence(); +} + +/** + * __arch_wb_cache_pmem - write back a cache range with CLWB + * @vaddr: virtual start address + * @size: number of bytes to write back + * + * Write back a cache range using the CLWB (cache line write back) + * instruction. This function requires explicit ordering with an + * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation. + */ +static inline void __arch_wb_cache_pmem(void *vaddr, size_t size) +{ + u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; + unsigned long clflush_mask = x86_clflush_size - 1; + void *vend = vaddr + size; + void *p; + + for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + p < vend; p += x86_clflush_size) + clwb(p); +} + +/* + * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec + * iterators, so for other types (bvec & kvec) we must do a cache write-back. + */ +static inline bool __iter_needs_pmem_wb(struct iov_iter *i) +{ + return iter_is_iovec(i) == false; +} + +/** + * arch_copy_from_iter_pmem - copy data from an iterator to PMEM + * @addr: PMEM destination address + * @bytes: number of bytes to copy + * @i: iterator with source data + * + * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. + * This function requires explicit ordering with an arch_wmb_pmem() call. + */ +static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, + struct iov_iter *i) +{ + void *vaddr = (void __force *)addr; + size_t len; + + /* TODO: skip the write-back by always using non-temporal stores */ + len = copy_from_iter_nocache(vaddr, bytes, i); + + if (__iter_needs_pmem_wb(i)) + __arch_wb_cache_pmem(vaddr, bytes); + + return len; +} + +/** + * arch_clear_pmem - zero a PMEM memory range + * @addr: virtual start address + * @size: number of bytes to zero + * + * Write zeros into the memory range starting at 'addr' for 'size' bytes. + * This function requires explicit ordering with an arch_wmb_pmem() call. + */ +static inline void arch_clear_pmem(void __pmem *addr, size_t size) +{ + void *vaddr = (void __force *)addr; + + /* TODO: implement the zeroing via non-temporal writes */ + if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) + clear_page(vaddr); + else + memset(vaddr, 0, size); + + __arch_wb_cache_pmem(vaddr, size); +} + +static inline bool __arch_has_wmb_pmem(void) +{ + /* + * We require that wmb() be an 'sfence', that is only guaranteed on + * 64-bit builds + */ + return static_cpu_has(X86_FEATURE_PCOMMIT); +} +#endif /* CONFIG_ARCH_HAS_PMEM_API */ +#endif /* __ASM_X86_PMEM_H__ */ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index b12f81022a6b..01bcde84d3e4 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -30,12 +30,9 @@ static __always_inline void preempt_count_set(int pc) /* * must be macros to avoid header recursion hell */ -#define init_task_preempt_count(p) do { \ - task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ -} while (0) +#define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ } while (0) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bb911e718330..67522256c7ff 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -556,12 +556,12 @@ static inline unsigned int cpuid_edx(unsigned int op) } /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) +static __always_inline void rep_nop(void) { asm volatile("rep; nop" ::: "memory"); } -static inline void cpu_relax(void) +static __always_inline void cpu_relax(void) { rep_nop(); } diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 655e07a48f6c..67f08230103a 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -41,6 +41,7 @@ struct pvclock_wall_clock { #define PVCLOCK_TSC_STABLE_BIT (1 << 0) #define PVCLOCK_GUEST_STOPPED (1 << 1) +/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h index ae0e241e228b..c537cbb038a7 100644 --- a/arch/x86/include/asm/qrwlock.h +++ b/arch/x86/include/asm/qrwlock.h @@ -2,16 +2,6 @@ #define _ASM_X86_QRWLOCK_H #include <asm-generic/qrwlock_types.h> - -#ifndef CONFIG_X86_PPRO_FENCE -#define queue_write_unlock queue_write_unlock -static inline void queue_write_unlock(struct qrwlock *lock) -{ - barrier(); - ACCESS_ONCE(*(u8 *)&lock->cnts) = 0; -} -#endif - #include <asm-generic/qrwlock.h> #endif /* _ASM_X86_QRWLOCK_H */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae1cba3..eaba08076030 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,18 +39,27 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock - -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifdef CONFIG_PARAVIRT +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); return true; } +#endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index e4661196994e..ff8b9a17dc4b 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */ #define __HAVE_ARCH_MEMCPY 1 +extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); #ifndef CONFIG_KMEMCHECK -#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 -extern void *memcpy(void *to, const void *from, size_t len); -#else +#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ ({ \ size_t __len = (len); \ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index d7f3b3b78ac3..751bf4b7bf11 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -79,12 +79,12 @@ do { \ #else /* CONFIG_X86_32 */ /* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" +#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" #define __EXTRA_CLOBBER \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15" + "r12", "r13", "r14", "r15", "flags" #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ @@ -100,7 +100,11 @@ do { \ #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ -/* Save restore flags to clear handle leaking NT */ +/* + * There is no need to save or restore flags, because flags are always + * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL + * has no effect. + */ #define switch_to(prev, next, last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d6a756ae04c8..999b7cd2e78c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,9 +20,21 @@ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> -typedef void (*sys_call_ptr_t)(void); +typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); extern const sys_call_ptr_t sys_call_table[]; +#if defined(CONFIG_X86_32) +#define ia32_sys_call_table sys_call_table +#define __NR_syscall_compat_max __NR_syscall_max +#define IA32_NR_syscalls NR_syscalls +#endif + +#if defined(CONFIG_IA32_EMULATION) +extern const sys_call_ptr_t ia32_sys_call_table[]; +#endif + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8afdc3e44247..c7b551028740 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -57,9 +57,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int saved_preempt_count; mm_segment_t addr_limit; - void __user *sysenter_return; unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ }; @@ -69,7 +67,6 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .cpu = 0, \ - .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index cd791948b286..6df2029405a3 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -261,6 +261,12 @@ static inline void reset_lazy_tlbstate(void) #endif /* SMP */ +/* Not inlined due to inc_irq_stat not being defined yet */ +#define flush_tlb_local() { \ + inc_irq_stat(irq_tlb_count); \ + local_flush_tlb(); \ +} + #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, mm, start, end) \ native_flush_tlb_others(mask, mm, start, end) diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 173dd3ba108c..0f492fc50bce 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -11,7 +11,7 @@ TRACE_EVENT(mpx_bounds_register_exception, TP_PROTO(void *addr_referenced, - const struct bndreg *bndreg), + const struct mpx_bndreg *bndreg), TP_ARGS(addr_referenced, bndreg), TP_STRUCT__entry( @@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception, TRACE_EVENT(bounds_exception_mpx, - TP_PROTO(const struct bndcsr *bndcsr), + TP_PROTO(const struct mpx_bndcsr *bndcsr), TP_ARGS(bndcsr), TP_STRUCT__entry( @@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table, /* * This gets used outside of MPX-specific code, so we need a stub. */ -static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) +static inline +void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) { } diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a8df874f3e88..09b1b0ab94b7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -51,13 +51,13 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un * limit, not add it to the address). */ if (__builtin_constant_p(size)) - return addr > limit - size; + return unlikely(addr > limit - size); /* Arbitrary sizes? Be careful about overflow */ addr += size; - if (addr < size) + if (unlikely(addr < size)) return true; - return addr > limit; + return unlikely(addr > limit); } #define __range_not_ok(addr, size, limit) \ @@ -182,7 +182,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) : "=a" (__ret_gu), "=r" (__val_gu) \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ - __ret_gu; \ + __builtin_expect(__ret_gu, 0); \ }) #define __put_user_x(size, x, ptr, __ret_pu) \ @@ -278,7 +278,7 @@ extern void __put_user_8(void); __put_user_x(X, __pu_val, ptr, __ret_pu); \ break; \ } \ - __ret_pu; \ + __builtin_expect(__ret_pu, 0); \ }) #define __put_user_size(x, ptr, size, retval, errret) \ @@ -401,7 +401,7 @@ do { \ ({ \ int __pu_err; \ __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ - __pu_err; \ + __builtin_expect(__pu_err, 0); \ }) #define __get_user_nocheck(x, ptr, size) \ @@ -410,7 +410,7 @@ do { \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __gu_err; \ + __builtin_expect(__gu_err, 0); \ }) /* FIXME: this hack is definitely wrong -AK */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 8021bd28c0f1..756de9190aec 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -26,7 +26,7 @@ struct vdso_image { long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; long sym___kernel_vsyscall; - long sym_VDSO32_SYSENTER_RETURN; + long sym_int80_landing_pad; }; #ifdef CONFIG_X86_64 @@ -38,13 +38,7 @@ extern const struct vdso_image vdso_image_x32; #endif #if defined CONFIG_X86_32 || defined CONFIG_COMPAT -extern const struct vdso_image vdso_image_32_int80; -#ifdef CONFIG_COMPAT -extern const struct vdso_image vdso_image_32_syscall; -#endif -extern const struct vdso_image vdso_image_32_sysenter; - -extern const struct vdso_image *selected_vdso32; +extern const struct vdso_image vdso_image_32; #endif extern void __init init_vdso_image(const struct vdso_image *image); diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 608a79d5a466..e6911caf5bbf 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -20,4 +20,15 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) /* No need for a barrier -- XCHG is a barrier on x86. */ #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) +extern int xen_have_vector_callback; + +/* + * Events delivered via platform PCI interrupts are always + * routed to vcpu 0 and hence cannot be rebound. + */ +static inline bool xen_support_evtchn_rebind(void) +{ + return (!xen_hvm_domain() || xen_have_vector_callback); +} + #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index ca08a27b90b3..4c20dd333412 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -336,10 +336,10 @@ HYPERVISOR_update_descriptor(u64 ma, u64 desc) return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); } -static inline int +static inline long HYPERVISOR_memory_op(unsigned int cmd, void *arg) { - return _hypercall2(int, memory_op, cmd, arg); + return _hypercall2(long, memory_op, cmd, arg); } static inline int @@ -465,6 +465,12 @@ HYPERVISOR_tmem_op( return _hypercall1(int, tmem_op, op); } +static inline int +HYPERVISOR_xenpmu_op(unsigned int op, void *arg) +{ + return _hypercall2(int, xenpmu_op, op, arg); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 3400dbaec3c3..62ca03ef5c65 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -3,12 +3,38 @@ * * Guest OS interface to x86 Xen. * - * Copyright (c) 2004, K A Fraser + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser */ #ifndef _ASM_X86_XEN_INTERFACE_H #define _ASM_X86_XEN_INTERFACE_H +/* + * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field + * in a struct in memory. + * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an + * hypercall argument. + * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but + * they might not be on other architectures. + */ #ifdef __XEN__ #define __DEFINE_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name @@ -88,13 +114,16 @@ DEFINE_GUEST_HANDLE(xen_ulong_t); * start of the GDT because some stupid OSes export hard-coded selector values * in their ABI. These hard-coded values are always near the start of the GDT, * so Xen places itself out of the way, at the far end of the GDT. + * + * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op */ #define FIRST_RESERVED_GDT_PAGE 14 #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) /* - * Send an array of these to HYPERVISOR_set_trap_table() + * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. * The privilege level specifies which modes may enter a trap via a software * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate * privilege levels as follows: @@ -118,10 +147,41 @@ struct trap_info { DEFINE_GUEST_HANDLE_STRUCT(trap_info); struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; - unsigned long nmi_reason; + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + unsigned long max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it + * has been set to invalid e.g. due to the p2m being too large for the + * 3-level p2m tree. In this case the linear mapper p2m list anchored + * at p2m_vaddr is to be used. + */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + /* + * Following three fields are valid if p2m_cr3 contains a value + * different from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register + * state and holds the folded machine frame number (via xen_pfn_to_cr3) + * of a L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All + * entries in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 + * and a value with the least significant bit set indicates that a + * mapping update is in progress. This allows guest external software + * (e.g. in Dom0) to verify that read mappings are consistent and + * whether they have changed since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an + * atomic write only. + */ + unsigned long p2m_cr3; /* cr3 value of the p2m address space */ + unsigned long p2m_vaddr; /* virtual address of the p2m list */ + unsigned long p2m_generation; /* generation count of p2m mapping */ }; #endif /* !__ASSEMBLY__ */ @@ -137,13 +197,31 @@ struct arch_shared_info { /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + * + * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise + * for HVM and PVH guests, not all information in this structure is updated: + * + * - For HVM guests, the structures read include: fpu_ctxt (if + * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * + * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to + * set cr3. All other fields not used should be set to 0. */ struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ -#define VGCF_I387_VALID (1<<0) -#define VGCF_HVM_GUEST (1<<1) -#define VGCF_IN_KERNEL (1<<2) +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; /* User-level CPU registers */ struct trap_info trap_ctxt[256]; /* Virtual IDT */ @@ -172,6 +250,129 @@ struct vcpu_guest_context { #endif }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); + +/* AMD PMU registers and structures */ +struct xen_pmu_amd_ctxt { + /* + * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). + * For PV(H) guests these fields are RO. + */ + uint32_t counters; + uint32_t ctrls; + + /* Counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; + +/* Intel PMU registers and structures */ +struct xen_pmu_cntr_pair { + uint64_t counter; + uint64_t control; +}; + +struct xen_pmu_intel_ctxt { + /* + * Offsets to fixed and architectural counter MSRs (relative to + * xen_pmu_arch.c.intel). + * For PV(H) guests these fields are RO. + */ + uint32_t fixed_counters; + uint32_t arch_counters; + + /* PMU registers */ + uint64_t global_ctrl; + uint64_t global_ovf_ctrl; + uint64_t global_status; + uint64_t fixed_ctrl; + uint64_t ds_area; + uint64_t pebs_enable; + uint64_t debugctl; + + /* Fixed and architectural counter MSRs */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint64_t regs[]; +#elif defined(__GNUC__) + uint64_t regs[0]; +#endif +}; + +/* Sampled domain's registers */ +struct xen_pmu_regs { + uint64_t ip; + uint64_t sp; + uint64_t flags; + uint16_t cs; + uint16_t ss; + uint8_t cpl; + uint8_t pad[3]; +}; + +/* PMU flags */ +#define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ +#define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ +#define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ +#define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ + +/* + * Architecture-specific information describing state of the processor at + * the time of PMU interrupt. + * Fields of this structure marked as RW for guest should only be written by + * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the + * hypervisor during PMU interrupt). Hypervisor will read updated data in + * XENPMU_flush hypercall and clear PMU_CACHED bit. + */ +struct xen_pmu_arch { + union { + /* + * Processor's registers at the time of interrupt. + * WO for hypervisor, RO for guests. + */ + struct xen_pmu_regs regs; + /* + * Padding for adding new registers to xen_pmu_regs in + * the future + */ +#define XENPMU_REGS_PAD_SZ 64 + uint8_t pad[XENPMU_REGS_PAD_SZ]; + } r; + + /* WO for hypervisor, RO for guest */ + uint64_t pmu_flags; + + /* + * APIC LVTPC register. + * RW for both hypervisor and guest. + * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware + * during XENPMU_flush or XENPMU_lvtpc_set. + */ + union { + uint32_t lapic_lvtpc; + uint64_t pad; + } l; + + /* + * Vendor-specific PMU registers. + * RW for both hypervisor and guest (see exceptions above). + * Guest's updates to this field are verified and then loaded by the + * hypervisor into hardware during XENPMU_flush + */ + union { + struct xen_pmu_amd_ctxt amd; + struct xen_pmu_intel_ctxt intel; + + /* + * Padding for contexts (fixed parts only, does not include + * MSR banks that are specified by offsets) + */ +#define XENPMU_CTXT_PAD_SZ 128 + uint8_t pad[XENPMU_CTXT_PAD_SZ]; + } c; +}; + #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c44a5d53e464..0679e11d2cf7 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -35,9 +35,7 @@ typedef struct xpaddr { #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) -/* Maximum amount of memory we can handle in a domain in pages */ -#define MAX_DOMAIN_PAGES \ - ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) +#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) extern unsigned long *machine_to_phys_mapping; extern unsigned long machine_to_phys_nr; @@ -48,8 +46,8 @@ extern unsigned long xen_max_p2m_pfn; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); -extern unsigned long set_phys_range_identity(unsigned long pfn_s, - unsigned long pfn_e); +extern unsigned long __init set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e); extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, @@ -103,6 +101,11 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; + /* + * Some x86 code are still using pfn_to_mfn instead of + * pfn_to_mfn. This will have to be removed when we figured + * out which call. + */ if (xen_feature(XENFEAT_auto_translated_physmap)) return pfn; @@ -149,6 +152,11 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) { unsigned long pfn; + /* + * Some x86 code are still using mfn_to_pfn instead of + * gfn_to_pfn. This will have to be removed when we figure + * out which call. + */ if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; @@ -178,6 +186,27 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); } +/* Pseudo-physical <-> Guest conversion */ +static inline unsigned long pfn_to_gfn(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return pfn; + else + return pfn_to_mfn(pfn); +} + +static inline unsigned long gfn_to_pfn(unsigned long gfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return gfn; + else + return mfn_to_pfn(gfn); +} + +/* Pseudo-physical <-> Bus conversion */ +#define pfn_to_bfn(pfn) pfn_to_gfn(pfn) +#define bfn_to_pfn(bfn) gfn_to_pfn(bfn) + /* * We detect special mappings in one of two ways: * 1. If the MFN is an I/O page then Xen will set the m2p entry @@ -198,7 +227,7 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) * require. In all the cases we care about, the FOREIGN_FRAME bit is * masked (e.g., pfn_to_mfn()) so behaviour there is correct. */ -static inline unsigned long mfn_to_local_pfn(unsigned long mfn) +static inline unsigned long bfn_to_local_pfn(unsigned long mfn) { unsigned long pfn; @@ -217,6 +246,10 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) +/* VIRT <-> GUEST conversion */ +#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v))) +#define gfn_to_virt(g) (__va(gfn_to_pfn(g) << PAGE_SHIFT)) + static inline unsigned long pte_mfn(pte_t pte) { return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; @@ -264,7 +297,7 @@ void make_lowmem_page_readwrite(void *vaddr); static inline bool xen_arch_need_swiotlb(struct device *dev, unsigned long pfn, - unsigned long mfn) + unsigned long bfn) { return false; } |