diff options
Diffstat (limited to 'arch/x86/include/asm')
61 files changed, 940 insertions, 1435 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index ea34464d6221..b19ec8282d50 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,5 +10,3 @@ generated-y += xen-hypercalls.h generic-y += early_ioremap.h generic-y += export.h generic-y += mcs_spinlock.h -generic-y += mm-arch-hooks.h -generic-y += mmiowb.h diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 1ae4e5791afa..c7df20e78b09 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -12,7 +12,6 @@ struct amd_nb_bus_dev_range { u8 dev_limit; }; -extern const struct pci_device_id amd_nb_misc_ids[]; extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; extern bool early_is_amd_nb(u32 value); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index cd339b88d5d4..0f63585edf5f 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -138,9 +138,6 @@ # define _ASM_EXTABLE_FAULT(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) -# define _ASM_EXTABLE_EX(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) - # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ @@ -166,9 +163,6 @@ # define _ASM_EXTABLE_FAULT(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) -# define _ASM_EXTABLE_EX(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) - /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 062cdecb2f24..53f246e9df5a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -54,7 +54,7 @@ arch_set_bit(long nr, volatile unsigned long *addr) if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" ((u8)CONST_MASK(nr)) + : "iq" (CONST_MASK(nr) & 0xff) : "memory"); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" @@ -74,7 +74,7 @@ arch_clear_bit(long nr, volatile unsigned long *addr) if (__builtin_constant_p(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" ((u8)~CONST_MASK(nr))); + : "iq" (CONST_MASK(nr) ^ 0xff)); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index dc4cfc888d6d..dc9dc7b3911a 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h @@ -4,14 +4,18 @@ #ifndef _ASM_X86_CLOCKSOURCE_H #define _ASM_X86_CLOCKSOURCE_H -#define VCLOCK_NONE 0 /* No vDSO clock available. */ -#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ -#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ -#define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */ -#define VCLOCK_MAX 3 +#include <asm/vdso/clocksource.h> -struct arch_clocksource_data { - int vclock_mode; -}; +extern unsigned int vclocks_used; + +static inline bool vclock_was_used(int vclock) +{ + return READ_ONCE(vclocks_used) & (1U << vclock); +} + +static inline void vclocks_set_used(unsigned int which) +{ + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << which)); +} #endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index adc6cc86b062..dd17c2da1af5 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -40,4 +40,22 @@ int mwait_usable(const struct cpuinfo_x86 *); unsigned int x86_family(unsigned int sig); unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); +#ifdef CONFIG_CPU_SUP_INTEL +extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); +extern void switch_to_sld(unsigned long tifn); +extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); +extern bool handle_guest_split_lock(unsigned long ip); +#else +static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {} +static inline void switch_to_sld(unsigned long tifn) {} +static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) +{ + return false; +} + +static inline bool handle_guest_split_lock(unsigned long ip) +{ + return false; +} +#endif #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 31c379c1da41..cf3d621c6892 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -5,9 +5,139 @@ /* * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions + * + * The wildcard initializers are in mod_devicetable.h because + * file2alias needs them. Sigh. */ - #include <linux/mod_devicetable.h> +/* Get the INTEL_FAM* model defines */ +#include <asm/intel-family.h> +/* And the X86_VENDOR_* ones */ +#include <asm/processor.h> + +/* Centaur FAM6 models */ +#define X86_CENTAUR_FAM6_C7_A 0xa +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +/** + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Use only if you need all selectors. Otherwise use one of the shorter + * macros of the X86_MATCH_* family. If there is no matching shorthand + * macro, consider to add one. If you really need to wrap one of the macros + * into another macro at the usage site for good reasons, then please + * start this local macro with X86_MATCH to allow easy grepping. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \ + _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + +/** + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ + X86_MODEL_ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) + +/** + * X86_MATCH_FEATURE - Macro for matching a CPU feature + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_VENDOR_FEATURE(ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @model: The model number, model constant or X86_MODEL_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ + X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VENDOR_FAM - Match vendor and family + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set of wildcards. + */ +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) + +/** + * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model + * @model: The model name without the INTEL_FAM6_ prefix or ANY + * The model name is expanded to INTEL_FAM6_@model internally + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The vendor is set to INTEL, the family to 6 and all other missing + * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards. + * + * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information. + */ +#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) /* * Match specific microcode revisions. diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f3327cb56edf..db189945e9b0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -217,7 +217,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -285,6 +285,7 @@ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -299,6 +300,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ @@ -367,6 +369,7 @@ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae391f609840..430fca13bb56 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,15 +6,6 @@ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif -/* - * Macros for dwarf2 CFI unwind table entries. - * See "as.info" for details on these pseudo ops. Unfortunately - * they are only supported in very new binutils, so define them - * away for older version. - */ - -#ifdef CONFIG_AS_CFI - #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc #define CFI_DEF_CFA .cfi_def_cfa @@ -30,20 +21,13 @@ #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape -#ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame -#else -#define CFI_SIGNAL_FRAME -#endif - -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) #ifndef BUILD_VDSO /* * Emit CFI data in .debug_frame sections, not .eh_frame sections. * The latter we currently just discard since we don't do DWARF * unwinding at runtime. So only the offline DWARF information is - * useful to anyone. Note we should not use this directive if - * vmlinux.lds.S gets changed so it doesn't discard .eh_frame. + * useful to anyone. Note we should not use this directive if we + * ever decide to enable DWARF unwinding at runtime. */ .cfi_sections .debug_frame #else @@ -53,33 +37,5 @@ */ .cfi_sections .eh_frame, .debug_frame #endif -#endif - -#else - -/* - * Due to the structure of pre-exisiting code, don't use assembler line - * comment character # to ignore the arguments. Instead, use a dummy macro. - */ -.macro cfi_ignore a=0, b=0, c=0, d=0 -.endm - -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore -#define CFI_DEF_CFA_REGISTER cfi_ignore -#define CFI_DEF_CFA_OFFSET cfi_ignore -#define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_ESCAPE cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore - -#endif #endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 86169a24b0d8..8391c115c0ec 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -10,6 +10,8 @@ #include <asm/mmu_context.h> #include <linux/build_bug.h> +extern unsigned long efi_fw_vendor, efi_config_table; + /* * We map the EFI regions needed for runtime services non-contiguously, * with preserved alignment on virtual addresses starting from -4G down @@ -34,8 +36,6 @@ static inline bool efi_have_uv1_memmap(void) #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" -#define MAX_CMDLINE_ADDRESS UINT_MAX - #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF /* @@ -178,9 +178,10 @@ extern void efi_free_boot_services(void); extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); +/* kexec external ABI */ struct efi_setup_data { u64 fw_vendor; - u64 runtime; + u64 __unused; u64 tables; u64 smbios; u64 reserved[8]; @@ -219,7 +220,8 @@ extern void efi_thunk_runtime_setup(void); efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, - efi_memory_desc_t *virtual_map); + efi_memory_desc_t *virtual_map, + unsigned long systab_phys); /* arch specific definitions used by the stub code */ @@ -270,6 +272,11 @@ static inline void *efi64_zero_upper(void *p) return p; } +static inline u32 efi64_convert_status(efi_status_t status) +{ + return (u32)(status | (u64)status >> 32); +} + #define __efi64_argmap_free_pages(addr, size) \ ((addr), 0, (size)) @@ -285,11 +292,21 @@ static inline void *efi64_zero_upper(void *p) #define __efi64_argmap_locate_protocol(protocol, reg, interface) \ ((protocol), (reg), efi64_zero_upper(interface)) +#define __efi64_argmap_locate_device_path(protocol, path, handle) \ + ((protocol), (path), efi64_zero_upper(handle)) + +#define __efi64_argmap_exit(handle, status, size, data) \ + ((handle), efi64_convert_status(status), (size), (data)) + /* PCI I/O */ #define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ efi64_zero_upper(dev), efi64_zero_upper(func)) +/* LoadFile */ +#define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \ + ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 13c83fe97988..f9c00110a69a 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -12,76 +12,103 @@ #include <asm/processor.h> #include <asm/smap.h> -#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - asm volatile("\t" ASM_STAC "\n" \ - "1:\t" insn "\n" \ - "2:\t" ASM_CLAC "\n" \ +#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \ +do { \ + int oldval = 0, ret; \ + asm volatile("1:\t" insn "\n" \ + "2:\n" \ "\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ _ASM_EXTABLE_UA(1b, 3b) \ : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ - : "i" (-EFAULT), "0" (oparg), "1" (0)) + : "i" (-EFAULT), "0" (oparg), "1" (0)); \ + if (ret) \ + goto label; \ + *oval = oldval; \ +} while(0) -#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - asm volatile("\t" ASM_STAC "\n" \ - "1:\tmovl %2, %0\n" \ - "\tmovl\t%0, %3\n" \ + +#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \ +do { \ + int oldval = 0, ret, tem; \ + asm volatile("1:\tmovl %2, %0\n" \ + "2:\tmovl\t%0, %3\n" \ "\t" insn "\n" \ - "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ - "\tjnz\t1b\n" \ - "3:\t" ASM_CLAC "\n" \ + "3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + "\tjnz\t2b\n" \ + "4:\n" \ "\t.section .fixup,\"ax\"\n" \ - "4:\tmov\t%5, %1\n" \ - "\tjmp\t3b\n" \ + "5:\tmov\t%5, %1\n" \ + "\tjmp\t4b\n" \ "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 4b) \ - _ASM_EXTABLE_UA(2b, 4b) \ + _ASM_EXTABLE_UA(1b, 5b) \ + _ASM_EXTABLE_UA(3b, 5b) \ : "=&a" (oldval), "=&r" (ret), \ "+m" (*uaddr), "=&r" (tem) \ - : "r" (oparg), "i" (-EFAULT), "1" (0)) + : "r" (oparg), "i" (-EFAULT), "1" (0)); \ + if (ret) \ + goto label; \ + *oval = oldval; \ +} while(0) -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, +static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - int oldval = 0, ret, tem; - - pagefault_disable(); + if (!user_access_begin(uaddr, sizeof(u32))) + return -EFAULT; switch (op) { case FUTEX_OP_SET: - __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault); break; case FUTEX_OP_ADD: - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, - uaddr, oparg); + unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval, + uaddr, oparg, Efault); break; case FUTEX_OP_OR: - __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault); break; case FUTEX_OP_ANDN: - __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault); break; case FUTEX_OP_XOR: - __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault); break; default: - ret = -ENOSYS; + user_access_end(); + return -ENOSYS; } - - pagefault_enable(); - - if (!ret) - *oval = oldval; - - return ret; + user_access_end(); + return 0; +Efault: + user_access_end(); + return -EFAULT; } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval); + int ret = 0; + + if (!user_access_begin(uaddr, sizeof(u32))) + return -EFAULT; + asm volatile("\n" + "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "2:\n" + "\t.section .fixup, \"ax\"\n" + "3:\tmov %3, %0\n" + "\tjmp 2b\n" + "\t.previous\n" + _ASM_EXTABLE_UA(1b, 3b) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "1" (oldval) + : "memory" + ); + user_access_end(); + *uval = oldval; + return ret; } #endif diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 92abc1e42bfc..29336574d0bc 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -376,6 +376,7 @@ struct hv_tsc_emulation_status { #define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 @@ -405,6 +406,8 @@ enum HV_GENERIC_SET_FORMAT { HV_GENERIC_SET_ALL, }; +#define HV_PARTITION_ID_SELF ((u64)-1) + #define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) #define HV_HYPERCALL_FAST_BIT BIT(16) #define HV_HYPERCALL_VARHEAD_OFFSET 17 @@ -909,4 +912,42 @@ struct hv_tlb_flush_ex { struct hv_partition_assist_pg { u32 tlb_lock_count; }; + +union hv_msi_entry { + u64 as_uint64; + struct { + u32 address; + u32 data; + } __packed; +}; + +struct hv_interrupt_entry { + u32 source; /* 1 for MSI(-X) */ + u32 reserved1; + union hv_msi_entry msi_entry; +} __packed; + +/* + * flags for hv_device_interrupt_target.flags + */ +#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 +#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 + +struct hv_device_interrupt_target { + u32 vector; + u32 flags; + union { + u64 vp_mask; + struct hv_vpset vp_set; + }; +} __packed; + +/* HvRetargetDeviceInterrupt hypercall */ +struct hv_retarget_device_interrupt { + u64 partition_id; /* use "self" */ + u64 device_id; + struct hv_interrupt_entry int_entry; + u64 reserved2; + struct hv_device_interrupt_target int_target; +} __packed __aligned(8); #endif diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 4981c293f926..8f1e94f29a16 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -35,6 +35,9 @@ * The #define line may optionally include a comment including platform names. */ +/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ +#define INTEL_FAM6_ANY X86_MODEL_ANY + #define INTEL_FAM6_CORE_YONAH 0x0E #define INTEL_FAM6_CORE2_MEROM 0x0F @@ -118,17 +121,7 @@ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ -/* Useful macros */ -#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ -{ \ - .vendor = X86_VENDOR_INTEL, \ - .family = _family, \ - .model = _model, \ - .feature = X86_FEATURE_ANY, \ - .driver_data = (kernel_ulong_t)&_driver_data \ -} - -#define INTEL_CPU_FAM6(_model, _driver_data) \ - INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data) +/* Family 5 */ +#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 02c6ef8f7667..07344d82e88e 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -19,7 +19,14 @@ struct task_struct; void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(void); -void tss_update_io_bitmap(void); +void native_tss_update_io_bitmap(void); + +#ifdef CONFIG_PARAVIRT_XXL +#include <asm/paravirt.h> +#else +#define tss_update_io_bitmap native_tss_update_io_bitmap +#endif + #else static inline void io_bitmap_share(struct task_struct *tsk) { } static inline void io_bitmap_exit(void) { } diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index a176f6165d85..72fba0eeeb30 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -36,7 +36,7 @@ extern void native_init_IRQ(void); extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs); -extern __visible unsigned int do_IRQ(struct pt_regs *regs); +extern __visible void do_IRQ(struct pt_regs *regs); extern void init_ISA_irqs(void); diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 95b1f053bd96..073eb7ad2f56 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -36,6 +36,7 @@ typedef u8 kprobe_opcode_t; /* optinsn template addresses */ extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_clac[]; extern __visible kprobe_opcode_t optprobe_template_val[]; extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h deleted file mode 100644 index 03946eb3e2b9..000000000000 --- a/arch/x86/include/asm/kvm_emulate.h +++ /dev/null @@ -1,464 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/****************************************************************************** - * x86_emulate.h - * - * Generic x86 (32-bit and 64-bit) instruction decoder and emulator. - * - * Copyright (c) 2005 Keir Fraser - * - * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 - */ - -#ifndef _ASM_X86_KVM_X86_EMULATE_H -#define _ASM_X86_KVM_X86_EMULATE_H - -#include <asm/desc_defs.h> - -struct x86_emulate_ctxt; -enum x86_intercept; -enum x86_intercept_stage; - -struct x86_exception { - u8 vector; - bool error_code_valid; - u16 error_code; - bool nested_page_fault; - u64 address; /* cr2 or nested page fault gpa */ - u8 async_page_fault; -}; - -/* - * This struct is used to carry enough information from the instruction - * decoder to main KVM so that a decision can be made whether the - * instruction needs to be intercepted or not. - */ -struct x86_instruction_info { - u8 intercept; /* which intercept */ - u8 rep_prefix; /* rep prefix? */ - u8 modrm_mod; /* mod part of modrm */ - u8 modrm_reg; /* index of register used */ - u8 modrm_rm; /* rm part of modrm */ - u64 src_val; /* value of source operand */ - u64 dst_val; /* value of destination operand */ - u8 src_bytes; /* size of source operand */ - u8 dst_bytes; /* size of destination operand */ - u8 ad_bytes; /* size of src/dst address */ - u64 next_rip; /* rip following the instruction */ -}; - -/* - * x86_emulate_ops: - * - * These operations represent the instruction emulator's interface to memory. - * There are two categories of operation: those that act on ordinary memory - * regions (*_std), and those that act on memory regions known to require - * special treatment or emulation (*_emulated). - * - * The emulator assumes that an instruction accesses only one 'emulated memory' - * location, that this location is the given linear faulting address (cr2), and - * that this is one of the instruction's data operands. Instruction fetches and - * stack operations are assumed never to access emulated memory. The emulator - * automatically deduces which operand of a string-move operation is accessing - * emulated memory, and assumes that the other operand accesses normal memory. - * - * NOTES: - * 1. The emulator isn't very smart about emulated vs. standard memory. - * 'Emulated memory' access addresses should be checked for sanity. - * 'Normal memory' accesses may fault, and the caller must arrange to - * detect and handle reentrancy into the emulator via recursive faults. - * Accesses may be unaligned and may cross page boundaries. - * 2. If the access fails (cannot emulate, or a standard access faults) then - * it is up to the memop to propagate the fault to the guest VM via - * some out-of-band mechanism, unknown to the emulator. The memop signals - * failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will - * then immediately bail. - * 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only - * cmpxchg8b_emulated need support 8-byte accesses. - * 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system. - */ -/* Access completed successfully: continue emulation as normal. */ -#define X86EMUL_CONTINUE 0 -/* Access is unhandleable: bail from emulation and return error to caller. */ -#define X86EMUL_UNHANDLEABLE 1 -/* Terminate emulation but return success to the caller. */ -#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ -#define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ -#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ -#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ -#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */ - -struct x86_emulate_ops { - /* - * read_gpr: read a general purpose register (rax - r15) - * - * @reg: gpr number. - */ - ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); - /* - * write_gpr: write a general purpose register (rax - r15) - * - * @reg: gpr number. - * @val: value to write. - */ - void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); - /* - * read_std: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*read_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, - unsigned int bytes, - struct x86_exception *fault, bool system); - - /* - * read_phys: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Physical address from which to read. - * @val: [OUT] Value read from memory. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, - void *val, unsigned int bytes); - - /* - * write_std: Write bytes of standard (non-emulated/special) memory. - * Used for descriptor writing. - * @addr: [IN ] Linear address to which to write. - * @val: [OUT] Value write to memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to write to memory. - * @system:[IN ] Whether the access is forced to be at CPL0. - */ - int (*write_std)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault, bool system); - /* - * fetch: Read bytes of standard (non-emulated/special) memory. - * Used for instruction fetch. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*fetch)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); - - /* - * read_emulated: Read bytes from emulated/special memory area. - * @addr: [IN ] Linear address from which to read. - * @val: [OUT] Value read from memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); - - /* - * write_emulated: Write bytes to emulated/special memory area. - * @addr: [IN ] Linear address to which to write. - * @val: [IN ] Value to write to memory (low-order bytes used as - * required). - * @bytes: [IN ] Number of bytes to write to memory. - */ - int (*write_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, const void *val, - unsigned int bytes, - struct x86_exception *fault); - - /* - * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an - * emulated/special memory area. - * @addr: [IN ] Linear address to access. - * @old: [IN ] Value expected to be current at @addr. - * @new: [IN ] Value to write to @addr. - * @bytes: [IN ] Number of bytes to access using CMPXCHG. - */ - int (*cmpxchg_emulated)(struct x86_emulate_ctxt *ctxt, - unsigned long addr, - const void *old, - const void *new, - unsigned int bytes, - struct x86_exception *fault); - void (*invlpg)(struct x86_emulate_ctxt *ctxt, ulong addr); - - int (*pio_in_emulated)(struct x86_emulate_ctxt *ctxt, - int size, unsigned short port, void *val, - unsigned int count); - - int (*pio_out_emulated)(struct x86_emulate_ctxt *ctxt, - int size, unsigned short port, const void *val, - unsigned int count); - - bool (*get_segment)(struct x86_emulate_ctxt *ctxt, u16 *selector, - struct desc_struct *desc, u32 *base3, int seg); - void (*set_segment)(struct x86_emulate_ctxt *ctxt, u16 selector, - struct desc_struct *desc, u32 base3, int seg); - unsigned long (*get_cached_segment_base)(struct x86_emulate_ctxt *ctxt, - int seg); - void (*get_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*get_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*set_gdt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt); - ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr); - int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val); - int (*cpl)(struct x86_emulate_ctxt *ctxt); - int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); - int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); - u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); - void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); - int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); - int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); - int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); - int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); - void (*halt)(struct x86_emulate_ctxt *ctxt); - void (*wbinvd)(struct x86_emulate_ctxt *ctxt); - int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); - int (*intercept)(struct x86_emulate_ctxt *ctxt, - struct x86_instruction_info *info, - enum x86_intercept_stage stage); - - bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, - u32 *ecx, u32 *edx, bool check_limit); - bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); - bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); - bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); - - void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); - - unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); - void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); - int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, - const char *smstate); - void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt); - int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); -}; - -typedef u32 __attribute__((vector_size(16))) sse128_t; - -/* Type, address-of, and value of an instruction's operand. */ -struct operand { - enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; - unsigned int bytes; - unsigned int count; - union { - unsigned long orig_val; - u64 orig_val64; - }; - union { - unsigned long *reg; - struct segmented_address { - ulong ea; - unsigned seg; - } mem; - unsigned xmm; - unsigned mm; - } addr; - union { - unsigned long val; - u64 val64; - char valptr[sizeof(sse128_t)]; - sse128_t vec_val; - u64 mm_val; - void *data; - }; -}; - -struct fetch_cache { - u8 data[15]; - u8 *ptr; - u8 *end; -}; - -struct read_cache { - u8 data[1024]; - unsigned long pos; - unsigned long end; -}; - -/* Execution mode, passed to the emulator. */ -enum x86emul_mode { - X86EMUL_MODE_REAL, /* Real mode. */ - X86EMUL_MODE_VM86, /* Virtual 8086 mode. */ - X86EMUL_MODE_PROT16, /* 16-bit protected mode. */ - X86EMUL_MODE_PROT32, /* 32-bit protected mode. */ - X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ -}; - -/* These match some of the HF_* flags defined in kvm_host.h */ -#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ -#define X86EMUL_SMM_MASK (1 << 6) -#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) - -struct x86_emulate_ctxt { - const struct x86_emulate_ops *ops; - - /* Register state before/after emulation. */ - unsigned long eflags; - unsigned long eip; /* eip before instruction emulation */ - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ - enum x86emul_mode mode; - - /* interruptibility state, as a result of execution of STI or MOV SS */ - int interruptibility; - - bool perm_ok; /* do not check permissions if true */ - bool ud; /* inject an #UD if host doesn't support insn */ - bool tf; /* TF value before instruction (after for syscall/sysret) */ - - bool have_exception; - struct x86_exception exception; - - /* - * decode cache - */ - - /* current opcode length in bytes */ - u8 opcode_len; - u8 b; - u8 intercept; - u8 op_bytes; - u8 ad_bytes; - struct operand src; - struct operand src2; - struct operand dst; - int (*execute)(struct x86_emulate_ctxt *ctxt); - int (*check_perm)(struct x86_emulate_ctxt *ctxt); - /* - * The following six fields are cleared together, - * the rest are initialized unconditionally in x86_decode_insn - * or elsewhere - */ - bool rip_relative; - u8 rex_prefix; - u8 lock_prefix; - u8 rep_prefix; - /* bitmaps of registers in _regs[] that can be read */ - u32 regs_valid; - /* bitmaps of registers in _regs[] that have been written */ - u32 regs_dirty; - /* modrm */ - u8 modrm; - u8 modrm_mod; - u8 modrm_reg; - u8 modrm_rm; - u8 modrm_seg; - u8 seg_override; - u64 d; - unsigned long _eip; - struct operand memop; - /* Fields above regs are cleared together. */ - unsigned long _regs[NR_VCPU_REGS]; - struct operand *memopp; - struct fetch_cache fetch; - struct read_cache io_read; - struct read_cache mem_read; -}; - -/* Repeat String Operation Prefix */ -#define REPE_PREFIX 0xf3 -#define REPNE_PREFIX 0xf2 - -/* CPUID vendors */ -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 - -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41 -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574 -#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273 - -#define X86EMUL_CPUID_VENDOR_HygonGenuine_ebx 0x6f677948 -#define X86EMUL_CPUID_VENDOR_HygonGenuine_ecx 0x656e6975 -#define X86EMUL_CPUID_VENDOR_HygonGenuine_edx 0x6e65476e - -#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547 -#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e -#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69 - -enum x86_intercept_stage { - X86_ICTP_NONE = 0, /* Allow zero-init to not match anything */ - X86_ICPT_PRE_EXCEPT, - X86_ICPT_POST_EXCEPT, - X86_ICPT_POST_MEMACCESS, -}; - -enum x86_intercept { - x86_intercept_none, - x86_intercept_cr_read, - x86_intercept_cr_write, - x86_intercept_clts, - x86_intercept_lmsw, - x86_intercept_smsw, - x86_intercept_dr_read, - x86_intercept_dr_write, - x86_intercept_lidt, - x86_intercept_sidt, - x86_intercept_lgdt, - x86_intercept_sgdt, - x86_intercept_lldt, - x86_intercept_sldt, - x86_intercept_ltr, - x86_intercept_str, - x86_intercept_rdtsc, - x86_intercept_rdpmc, - x86_intercept_pushf, - x86_intercept_popf, - x86_intercept_cpuid, - x86_intercept_rsm, - x86_intercept_iret, - x86_intercept_intn, - x86_intercept_invd, - x86_intercept_pause, - x86_intercept_hlt, - x86_intercept_invlpg, - x86_intercept_invlpga, - x86_intercept_vmrun, - x86_intercept_vmload, - x86_intercept_vmsave, - x86_intercept_vmmcall, - x86_intercept_stgi, - x86_intercept_clgi, - x86_intercept_skinit, - x86_intercept_rdtscp, - x86_intercept_icebp, - x86_intercept_wbinvd, - x86_intercept_monitor, - x86_intercept_mwait, - x86_intercept_rdmsr, - x86_intercept_wrmsr, - x86_intercept_in, - x86_intercept_ins, - x86_intercept_out, - x86_intercept_outs, - x86_intercept_xsetbv, - - nr_x86_intercepts -}; - -/* Host execution mode. */ -#if defined(CONFIG_X86_32) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 -#elif defined(CONFIG_X86_64) -#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 -#endif - -int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); -bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt); -#define EMULATION_FAILED -1 -#define EMULATION_OK 0 -#define EMULATION_RESTART 1 -#define EMULATION_INTERCEPTED 2 -void init_decode_cache(struct x86_emulate_ctxt *ctxt); -int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); -int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - u16 tss_selector, int idt_index, int reason, - bool has_error_code, u32 error_code); -int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); -void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); -void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); -bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); - -#endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4dffbc10d3f8..42a2d0d3984a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -49,13 +49,16 @@ #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) -#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5) +#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) #define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) @@ -182,7 +185,10 @@ enum exit_fastpath_completion { EXIT_FASTPATH_SKIP_EMUL_INS, }; -#include <asm/kvm_emulate.h> +struct x86_emulate_ctxt; +struct x86_exception; +enum x86_intercept; +enum x86_intercept_stage; #define KVM_NR_MEM_OBJS 40 @@ -297,7 +303,6 @@ union kvm_mmu_extended_role { unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; - unsigned int cr4_la57:1; unsigned int maxphyaddr:6; }; }; @@ -382,8 +387,7 @@ struct kvm_mmu_root_info { * current mmu mode. */ struct kvm_mmu { - void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); - unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); @@ -678,7 +682,7 @@ struct kvm_vcpu_arch { /* emulate context */ - struct x86_emulate_ctxt emulate_ctxt; + struct x86_emulate_ctxt *emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); @@ -781,9 +785,19 @@ struct kvm_vcpu_arch { u64 msr_kvm_poll_control; /* - * Indicate whether the access faults on its page table in guest - * which is set when fix page fault and used to detect unhandeable - * instruction. + * Indicates the guest is trying to write a gfn that contains one or + * more of the PTEs used to translate the write itself, i.e. the access + * is changing its own translation in the guest page tables. KVM exits + * to userspace if emulation of the faulting instruction fails and this + * flag is set, as KVM cannot make forward progress. + * + * If emulation fails for a write to guest page tables, KVM unprotects + * (zaps) the shadow page for the target gfn and resumes the guest to + * retry the non-emulatable instruction (on hardware). Unprotecting the + * gfn doesn't allow forward progress for a self-changing access because + * doing so also zaps the translation for the gfn, i.e. retrying the + * instruction will hit a !PRESENT fault, which results in a new shadow + * page and sends KVM back to square one. */ bool write_fault_to_shadow_pgtable; @@ -798,10 +812,6 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; - /* GPA available */ - bool gpa_available; - gpa_t gpa_val; - /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; @@ -880,6 +890,7 @@ enum kvm_irqchip_mode { #define APICV_INHIBIT_REASON_NESTED 2 #define APICV_INHIBIT_REASON_IRQWIN 3 #define APICV_INHIBIT_REASON_PIT_REINJ 4 +#define APICV_INHIBIT_REASON_X2APIC 5 struct kvm_arch { unsigned long n_used_mmu_pages; @@ -910,6 +921,7 @@ struct kvm_arch { atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; + bool apic_map_dirty; bool apic_access_page_done; unsigned long apicv_inhibit_reasons; @@ -1042,19 +1054,14 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) } struct kvm_x86_ops { - int (*cpu_has_kvm_support)(void); /* __init */ - int (*disabled_by_bios)(void); /* __init */ int (*hardware_enable)(void); void (*hardware_disable)(void); - int (*check_processor_compatibility)(void);/* __init */ - int (*hardware_setup)(void); /* __init */ - void (*hardware_unsetup)(void); /* __exit */ + void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); - struct kvm *(*vm_alloc)(void); - void (*vm_free)(struct kvm *); + unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); @@ -1080,7 +1087,6 @@ struct kvm_x86_ops { void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); - void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); @@ -1112,6 +1118,7 @@ struct kvm_x86_ops { int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, @@ -1136,19 +1143,14 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); - int (*get_lpage_level)(void); - bool (*rdtscp_supported)(void); - bool (*invpcid_supported)(void); - - void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); - void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); + void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long cr3); bool (*has_wbinvd_exit)(void); @@ -1160,16 +1162,12 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, - enum x86_intercept_stage stage); + enum x86_intercept_stage stage, + struct x86_exception *exception); void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion *exit_fastpath); - bool (*mpx_supported)(void); - bool (*xsaves_supported)(void); - bool (*umip_emulated)(void); - bool (*pt_supported)(void); - bool (*pku_supported)(void); - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1258,6 +1256,15 @@ struct kvm_x86_ops { int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); }; +struct kvm_x86_init_ops { + int (*cpu_has_kvm_support)(void); + int (*disabled_by_bios)(void); + int (*check_processor_compatibility)(void); + int (*hardware_setup)(void); + + struct kvm_x86_ops *runtime_ops; +}; + struct kvm_arch_async_pf { u32 token; gfn_t gfn; @@ -1265,25 +1272,24 @@ struct kvm_arch_async_pf { bool direct_map; }; -extern struct kvm_x86_ops *kvm_x86_ops; +extern u64 __read_mostly host_efer; + +extern struct kvm_x86_ops kvm_x86_ops; extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return kvm_x86_ops->vm_alloc(); -} - -static inline void kvm_arch_free_vm(struct kvm *kvm) -{ - return kvm_x86_ops->vm_free(kvm); + return __vmalloc(kvm_x86_ops.vm_size, + GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); } +void kvm_arch_free_vm(struct kvm *kvm); #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) { - if (kvm_x86_ops->tlb_remote_flush && - !kvm_x86_ops->tlb_remote_flush(kvm)) + if (kvm_x86_ops.tlb_remote_flush && + !kvm_x86_ops.tlb_remote_flush(kvm)) return 0; else return -ENOTSUPP; @@ -1302,7 +1308,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, + int start_level); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, @@ -1368,10 +1375,11 @@ extern u64 kvm_mce_cap_supported; * * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to * decode the instruction length. For use *only* by - * kvm_x86_ops->skip_emulated_instruction() implementations. + * kvm_x86_ops.skip_emulated_instruction() implementations. * - * EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to - * retry native execution under certain conditions. + * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to + * retry native execution under certain conditions, + * Can only be set in conjunction with EMULTYPE_PF. * * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was * triggered by KVM's magic "force emulation" prefix, @@ -1384,13 +1392,18 @@ extern u64 kvm_mce_cap_supported; * backdoor emulation, which is opt in via module param. * VMware backoor emulation handles select instructions * and reinjects the #GP for all other cases. + * + * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which + * case the CR2/GPA value pass on the stack is valid. */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_ALLOW_RETRY_PF (1 << 3) #define EMULTYPE_TRAP_UD_FORCED (1 << 4) #define EMULTYPE_VMWARE_GP (1 << 5) +#define EMULTYPE_PF (1 << 6) + int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); @@ -1403,8 +1416,6 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); -struct x86_emulate_ctxt; - int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); @@ -1501,8 +1512,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); -void kvm_enable_tdp(void); -void kvm_disable_tdp(void); +void kvm_configure_mmu(bool enable_tdp, int tdp_page_level); static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception) @@ -1659,14 +1669,14 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { - if (kvm_x86_ops->vcpu_blocking) - kvm_x86_ops->vcpu_blocking(vcpu); + if (kvm_x86_ops.vcpu_blocking) + kvm_x86_ops.vcpu_blocking(vcpu); } static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { - if (kvm_x86_ops->vcpu_unblocking) - kvm_x86_ops->vcpu_unblocking(vcpu); + if (kvm_x86_ops.vcpu_unblocking) + kvm_x86_ops.vcpu_unblocking(vcpu); } static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 172f9749dbb2..87bd6025d91d 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -49,8 +49,7 @@ struct kvm_page_track_notifier_node { void kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); -void kvm_page_track_free_memslot(struct kvm_memory_slot *free, - struct kvm_memory_slot *dont); +void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4359b955e0b7..f9cea081c05b 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -102,7 +102,7 @@ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ -#define MCE_LOG_LEN 32 +#define MCE_LOG_MIN_LEN 32U #define MCE_LOG_SIGNATURE "MACHINECHECK" /* AMD Scalable MCA */ @@ -135,11 +135,11 @@ */ struct mce_log_buffer { char signature[12]; /* "MACHINECHECK" */ - unsigned len; /* = MCE_LOG_LEN */ + unsigned len; /* = elements in .mce_entry[] */ unsigned next; unsigned flags; unsigned recordlen; /* length of struct mce */ - struct mce entry[MCE_LOG_LEN]; + struct mce entry[]; }; enum mce_notifier_prios { @@ -238,9 +238,6 @@ extern void mce_disable_bank(int bank); /* * Exception handler */ - -/* Call the installed machine check handler for this CPU setup. */ -extern void (*machine_check_vector)(struct pt_regs *, long error_code); void do_machine_check(struct pt_regs *, long); /* diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 6685e1218959..7063b5a43220 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index b538d9ddee9c..4e55370e48e8 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -213,21 +213,6 @@ static inline void arch_unmap(struct mm_struct *mm, unsigned long start, * So do not enforce things if the VMA is not from the current * mm, or if we are in a kernel thread. */ -static inline bool vma_is_foreign(struct vm_area_struct *vma) -{ - if (!current->mm) - return true; - /* - * Should PKRU be enforced on the access to this VMA? If - * the VMA is from another process, then PKRU has no - * relevance and should not be enforced. - */ - if (current->mm != vma->vm_mm) - return true; - - return false; -} - static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index c215d2762488..e988bac0a4a1 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -13,64 +13,4 @@ struct mod_arch_specific { #endif }; -#ifdef CONFIG_X86_64 -/* X86_64 does not define MODULE_PROC_FAMILY */ -#elif defined CONFIG_M486SX -#define MODULE_PROC_FAMILY "486SX " -#elif defined CONFIG_M486 -#define MODULE_PROC_FAMILY "486 " -#elif defined CONFIG_M586 -#define MODULE_PROC_FAMILY "586 " -#elif defined CONFIG_M586TSC -#define MODULE_PROC_FAMILY "586TSC " -#elif defined CONFIG_M586MMX -#define MODULE_PROC_FAMILY "586MMX " -#elif defined CONFIG_MCORE2 -#define MODULE_PROC_FAMILY "CORE2 " -#elif defined CONFIG_MATOM -#define MODULE_PROC_FAMILY "ATOM " -#elif defined CONFIG_M686 -#define MODULE_PROC_FAMILY "686 " -#elif defined CONFIG_MPENTIUMII -#define MODULE_PROC_FAMILY "PENTIUMII " -#elif defined CONFIG_MPENTIUMIII -#define MODULE_PROC_FAMILY "PENTIUMIII " -#elif defined CONFIG_MPENTIUMM -#define MODULE_PROC_FAMILY "PENTIUMM " -#elif defined CONFIG_MPENTIUM4 -#define MODULE_PROC_FAMILY "PENTIUM4 " -#elif defined CONFIG_MK6 -#define MODULE_PROC_FAMILY "K6 " -#elif defined CONFIG_MK7 -#define MODULE_PROC_FAMILY "K7 " -#elif defined CONFIG_MK8 -#define MODULE_PROC_FAMILY "K8 " -#elif defined CONFIG_MELAN -#define MODULE_PROC_FAMILY "ELAN " -#elif defined CONFIG_MCRUSOE -#define MODULE_PROC_FAMILY "CRUSOE " -#elif defined CONFIG_MEFFICEON -#define MODULE_PROC_FAMILY "EFFICEON " -#elif defined CONFIG_MWINCHIPC6 -#define MODULE_PROC_FAMILY "WINCHIPC6 " -#elif defined CONFIG_MWINCHIP3D -#define MODULE_PROC_FAMILY "WINCHIP3D " -#elif defined CONFIG_MCYRIXIII -#define MODULE_PROC_FAMILY "CYRIXIII " -#elif defined CONFIG_MVIAC3_2 -#define MODULE_PROC_FAMILY "VIAC3-2 " -#elif defined CONFIG_MVIAC7 -#define MODULE_PROC_FAMILY "VIAC7 " -#elif defined CONFIG_MGEODEGX1 -#define MODULE_PROC_FAMILY "GEODEGX1 " -#elif defined CONFIG_MGEODE_LX -#define MODULE_PROC_FAMILY "GEODE " -#else -#error unknown processor family -#endif - -#ifdef CONFIG_X86_32 -# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY -#endif - #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6b79515abb82..d30805ed323e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/nmi.h> +#include <linux/msi.h> #include <asm/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> @@ -34,6 +35,8 @@ typedef int (*hyperv_fill_flush_list_func)( rdmsrl(HV_X64_MSR_SINT0 + int_num, val) #define hv_set_synint_state(int_num, val) \ wrmsrl(HV_X64_MSR_SINT0 + int_num, val) +#define hv_recommend_using_aeoi() \ + (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)) #define hv_get_crash_ctl(val) \ rdmsrl(HV_X64_MSR_CRASH_CTL, val) @@ -46,7 +49,9 @@ typedef int (*hyperv_fill_flush_list_func)( #define hv_set_reference_tsc(val) \ wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) #define hv_set_clocksource_vdso(val) \ - ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) + ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK) +#define hv_enable_vdso_clocksource() \ + vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() void hyperv_callback_vector(void); @@ -240,6 +245,13 @@ bool hv_vcpu_is_preempted(int vcpu); static inline void hv_apic_init(void) {} #endif +static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + struct msi_desc *msi_desc) +{ + msi_entry->address = msi_desc->msg.address_lo; + msi_entry->data = msi_desc->msg.data; +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ebe1685e92dd..12c9684d59ba 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,6 +41,10 @@ /* Intel MSRs. Some also available on other CPUs */ +#define MSR_TEST_CTRL 0x00000033 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29 +#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT) + #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ @@ -70,6 +74,11 @@ */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ +#define MSR_IA32_CORE_CAPS 0x000000cf +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 +#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) @@ -512,6 +521,8 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 9d5252c9685c..b809f117f3f4 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -23,6 +23,8 @@ #define MWAITX_MAX_LOOPS ((u32)-1) #define MWAITX_DISABLE_CSTATES 0xf0 +u32 get_umwait_control_msr(void); + static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 07e95dcb40ad..7e9a281e2660 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -237,27 +237,6 @@ enum ssb_mitigation { extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; -/* - * On VMEXIT we must ensure that no RSB predictions learned in the guest - * can be followed in the host, by overwriting the RSB completely. Both - * retpoline and IBRS mitigations for Spectre v2 need this; only on future - * CPUs with IBRS_ALL *might* it be avoided. - */ -static inline void vmexit_fill_RSB(void) -{ -#ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); -#endif -} - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..a506a411474d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) @@ -73,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 86e7317eb31f..694d8daf4983 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_update_io_bitmap(void) +{ + PVOP_VCALL0(cpu.update_io_bitmap); +} +#endif + static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 84812964d3dd..732f62e04ddb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,6 +140,10 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); +#ifdef CONFIG_X86_IOPL_IOPERM + void (*update_io_bitmap)(void); +#endif + void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 29964b0e1075..e855e9cf2c37 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -50,11 +50,22 @@ #define AMD64_L3_SLICE_SHIFT 48 #define AMD64_L3_SLICE_MASK \ - ((0xFULL) << AMD64_L3_SLICE_SHIFT) + (0xFULL << AMD64_L3_SLICE_SHIFT) +#define AMD64_L3_SLICEID_MASK \ + (0x7ULL << AMD64_L3_SLICE_SHIFT) #define AMD64_L3_THREAD_SHIFT 56 #define AMD64_L3_THREAD_MASK \ - ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + (0xFFULL << AMD64_L3_THREAD_SHIFT) +#define AMD64_L3_F19H_THREAD_MASK \ + (0x3ULL << AMD64_L3_THREAD_SHIFT) + +#define AMD64_L3_EN_ALL_CORES BIT_ULL(47) +#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46) + +#define AMD64_L3_COREID_SHIFT 42 +#define AMD64_L3_COREID_MASK \ + (0x7ULL << AMD64_L3_COREID_SHIFT) #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7e118660bbd9..4d02e64af1b3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -25,6 +25,7 @@ #include <asm/x86_init.h> #include <asm/fpu/xstate.h> #include <asm/fpu/api.h> +#include <asm-generic/pgtable_uffd.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -313,6 +314,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_UFFD_WP); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY); @@ -392,6 +410,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_UFFD_WP; +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_UFFD_WP); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); @@ -595,12 +630,6 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); } -static inline pud_t pud_mknotpresent(pud_t pud) -{ - return pfn_pud(pud_pfn(pud), - __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); -} - static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) @@ -627,12 +656,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return __pmd(val); } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } @@ -828,7 +860,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); @@ -1046,6 +1081,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else @@ -1377,6 +1415,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); +} + +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0b6c4042942a..df1373415f11 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -189,7 +189,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -197,9 +197,15 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * erratum where they can be incorrectly set by hardware on * non-present PTEs. * + * SD Bits 1-4 are not used in non-present format and available for + * special use described below: + * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * + * F (2) in swp entry is used to record when a pagetable is + * writeprotected by userfaultfd WP support. + * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0239998d8cdc..b6606fe6cfdf 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -32,6 +32,7 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 @@ -100,6 +101,14 @@ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP) +#define _PAGE_SWP_UFFD_WP _PAGE_USER +#else +#define _PAGE_UFFD_WP (_AT(pteval_t, 0)) +#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) @@ -118,7 +127,8 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_UFFD_WP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 19b137f1b3be..2ff9b98812b7 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -4,6 +4,11 @@ #define ARCH_DEFAULT_PKEY 0 +/* + * If more than 16 keys are ever supported, a thorough audit + * will be necessary to ensure that the types that store key + * numbers and masks have sufficient capacity. + */ #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 3d4cb83a8828..69485ca13665 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -103,14 +103,14 @@ static __always_inline bool should_resched(int preempt_offset) } #ifdef CONFIG_PREEMPTION - extern asmlinkage void ___preempt_schedule(void); + extern asmlinkage void preempt_schedule_thunk(void); # define __preempt_schedule() \ - asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) + asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT) extern asmlinkage void preempt_schedule(void); - extern asmlinkage void ___preempt_schedule_notrace(void); + extern asmlinkage void preempt_schedule_notrace_thunk(void); # define __preempt_schedule_notrace() \ - asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT) + asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT) extern asmlinkage void preempt_schedule_notrace(void); #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 09705ccc393c..3bcf27caf6c9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -26,6 +26,7 @@ struct vm86; #include <asm/fpu/types.h> #include <asm/unwind_hints.h> #include <asm/vmxfeatures.h> +#include <asm/vdso/processor.h> #include <linux/personality.h> #include <linux/cache.h> @@ -541,7 +542,6 @@ struct thread_struct { mm_segment_t addr_limit; unsigned int sig_on_uaccess_err:1; - unsigned int uaccess_err:1; /* uaccess failed */ /* Floating point and extended processor state */ struct fpu fpu; @@ -677,17 +677,6 @@ static inline unsigned int cpuid_edx(unsigned int op) return edx; } -/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static __always_inline void rep_nop(void) -{ - asm volatile("rep; nop" ::: "memory"); -} - -static __always_inline void cpu_relax(void) -{ - rep_nop(); -} - /* * This function forces the icache and prefetched instruction stream to * catch up with reality in two very specific cases: diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 036c360910c5..a6e8373a5170 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_SECTIONS_H #define _ASM_X86_SECTIONS_H +#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed + #include <asm-generic/sections.h> #include <asm/extable.h> @@ -14,4 +16,22 @@ extern char __end_rodata_hpage_align[]; extern char __end_of_kernel_reserve[]; +extern unsigned long _brk_start, _brk_end; + +static inline bool arch_is_kernel_initmem_freed(unsigned long addr) +{ + /* + * If _brk_start has not been cleared, brk allocation is incomplete, + * and we can not make assumptions about its use. + */ + if (_brk_start) + return 0; + + /* + * After brk allocation is complete, space between _brk_end and _end + * is available for allocation. + */ + return addr >= _brk_end && addr < (unsigned long)&_end; +} + #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 64c3dce374e5..ec2c0a094b5d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); @@ -46,6 +47,8 @@ int set_memory_4k(unsigned long addr, int numpages); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); int set_memory_np_noalias(unsigned long addr, int numpages); +int set_memory_nonglobal(unsigned long addr, int numpages); +int set_memory_global(unsigned long addr, int numpages); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index f176114c04d4..84eab2724875 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -33,11 +33,7 @@ struct sigframe_ia32 { * legacy application accessing/modifying it. */ struct _fpstate_32 fpstate_unused; -#ifdef CONFIG_IA32_EMULATION - unsigned int extramask[_COMPAT_NSIG_WORDS-1]; -#else /* !CONFIG_IA32_EMULATION */ - unsigned long extramask[_NSIG_WORDS-1]; -#endif /* CONFIG_IA32_EMULATION */ + unsigned int extramask[1]; char retcode[8]; /* fp state follows here */ }; diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 2fcbd6f33ef7..65e667279e0f 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -14,12 +14,5 @@ X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); -int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, - struct pt_regs *regs, unsigned long mask); - - -#ifdef CONFIG_X86_X32_ABI -asmlinkage long sys32_x32_rt_sigreturn(void); -#endif #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8db3fdb6102e..7cbf733d11af 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -13,23 +13,14 @@ #include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> -#include <asm/asm-offsets.h> /* For NR_syscalls */ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> -#ifdef CONFIG_X86_64 -typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); -#else -typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, - unsigned long, unsigned long, - unsigned long, unsigned long); -#endif /* CONFIG_X86_64 */ +typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) #define ia32_sys_call_table sys_call_table -#define __NR_syscall_compat_max __NR_syscall_max -#define IA32_NR_syscalls NR_syscalls #endif #if defined(CONFIG_IA32_EMULATION) @@ -168,6 +159,11 @@ static inline int syscall_get_arch(struct task_struct *task) task->thread_info.status & TS_COMPAT) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } + +void do_syscall_64(unsigned long nr, struct pt_regs *regs); +void do_int80_syscall_32(struct pt_regs *regs); +long do_fast_syscall_32(struct pt_regs *regs); + #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e2389ce9bf58..a84333adeef2 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -8,6 +8,50 @@ struct pt_regs; +extern long __x64_sys_ni_syscall(const struct pt_regs *regs); +extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); + +/* + * Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes + * struct pt_regs *regs as the only argument of the syscall stub(s) named as: + * __x64_sys_*() - 64-bit native syscall + * __ia32_sys_*() - 32-bit native syscall or common compat syscall + * __ia32_compat_sys_*() - 32-bit compat syscall + * __x32_compat_sys_*() - 64-bit X32 compat syscall + * + * The registers are decoded according to the ABI: + * 64-bit: RDI, RSI, RDX, R10, R8, R9 + * 32-bit: EBX, ECX, EDX, ESI, EDI, EBP + * + * The stub then passes the decoded arguments to the __se_sys_*() wrapper to + * perform sign-extension (omitted for zero-argument syscalls). Finally the + * arguments are passed to the __do_sys_*() function which is the actual + * syscall. These wrappers are marked as inline so the compiler can optimize + * the functions where appropriate. + * + * Example assembly (slightly re-ordered for better readability): + * + * <__x64_sys_recv>: <-- syscall with 4 parameters + * callq <__fentry__> + * + * mov 0x70(%rdi),%rdi <-- decode regs->di + * mov 0x68(%rdi),%rsi <-- decode regs->si + * mov 0x60(%rdi),%rdx <-- decode regs->dx + * mov 0x38(%rdi),%rcx <-- decode regs->r10 + * + * xor %r9d,%r9d <-- clear %r9 + * xor %r8d,%r8d <-- clear %r8 + * + * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() + * which takes 6 arguments + * + * cltq <-- extend return value to 64-bit + * retq <-- return + * + * This approach avoids leaking random user-provided register content down + * the call chain. + */ + /* Mapping of registers to parameters for syscalls on x86-64 and x32 */ #define SC_X86_64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ @@ -21,68 +65,96 @@ struct pt_regs; ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ ,,(unsigned int)regs->di,,(unsigned int)regs->bp) -#ifdef CONFIG_IA32_EMULATION -/* - * For IA32 emulation, we need to handle "compat" syscalls *and* create - * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the - * ia32 regs in the proper order for shared or "common" syscalls. As some - * syscalls may not be implemented, we need to expand COND_SYSCALL in - * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this - * case as well. - */ -#define __IA32_COMPAT_SYS_STUB0(x, name) \ - asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \ - asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\ +#define __SYS_STUB0(abi, name) \ + long __##abi##_##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \ + long __##abi##_##name(const struct pt_regs *regs) \ + __alias(__do_##name); + +#define __SYS_STUBx(abi, name, ...) \ + long __##abi##_##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \ + long __##abi##_##name(const struct pt_regs *regs) \ { \ - return __se_compat_sys_##name(); \ + return __se_##name(__VA_ARGS__); \ } -#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ - asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ - asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ +#define __COND_SYSCALL(abi, name) \ + __weak long __##abi##_##name(const struct pt_regs *__unused) \ { \ - return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + return sys_ni_syscall(); \ } +#define __SYS_NI(abi, name) \ + SYSCALL_ALIAS(__##abi##_##name, sys_ni_posix_timers); + +#ifdef CONFIG_X86_64 +#define __X64_SYS_STUB0(name) \ + __SYS_STUB0(x64, sys_##name) + +#define __X64_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(x64, sys##name, \ + SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __X64_COND_SYSCALL(name) \ + __COND_SYSCALL(x64, sys_##name) + +#define __X64_SYS_NI(name) \ + __SYS_NI(x64, sys_##name) +#else /* CONFIG_X86_64 */ +#define __X64_SYS_STUB0(name) +#define __X64_SYS_STUBx(x, name, ...) +#define __X64_COND_SYSCALL(name) +#define __X64_SYS_NI(name) +#endif /* CONFIG_X86_64 */ + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#define __IA32_SYS_STUB0(name) \ + __SYS_STUB0(ia32, sys_##name) + #define __IA32_SYS_STUBx(x, name, ...) \ - asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \ - asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \ - { \ - return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ - } + __SYS_STUBx(ia32, sys##name, \ + SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __IA32_COND_SYSCALL(name) \ + __COND_SYSCALL(ia32, sys_##name) +#define __IA32_SYS_NI(name) \ + __SYS_NI(ia32, sys_##name) +#else /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ +#define __IA32_SYS_STUB0(name) +#define __IA32_SYS_STUBx(x, name, ...) +#define __IA32_COND_SYSCALL(name) +#define __IA32_SYS_NI(name) +#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ + +#ifdef CONFIG_IA32_EMULATION /* - * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias - * named __ia32_sys_*() + * For IA32 emulation, we need to handle "compat" syscalls *and* create + * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the + * ia32 regs in the proper order for shared or "common" syscalls. As some + * syscalls may not be implemented, we need to expand COND_SYSCALL in + * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this + * case as well. */ +#define __IA32_COMPAT_SYS_STUB0(name) \ + __SYS_STUB0(ia32, compat_sys_##name) -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ + __SYS_STUBx(ia32, compat_sys##name, \ + SC_IA32_REGS_TO_ARGS(x, __VA_ARGS__)) -#define COND_SYSCALL(name) \ - asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ - { \ - return sys_ni_syscall(); \ - } \ - asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\ - { \ - return sys_ni_syscall(); \ - } +#define __IA32_COMPAT_COND_SYSCALL(name) \ + __COND_SYSCALL(ia32, compat_sys_##name) -#define SYS_NI(name) \ - SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers) +#define __IA32_COMPAT_SYS_NI(name) \ + __SYS_NI(ia32, compat_sys_##name) #else /* CONFIG_IA32_EMULATION */ +#define __IA32_COMPAT_SYS_STUB0(name) #define __IA32_COMPAT_SYS_STUBx(x, name, ...) -#define __IA32_SYS_STUBx(x, fullname, name, ...) +#define __IA32_COMPAT_COND_SYSCALL(name) +#define __IA32_COMPAT_SYS_NI(name) #endif /* CONFIG_IA32_EMULATION */ @@ -92,25 +164,23 @@ struct pt_regs; * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ -#define __X32_COMPAT_SYS_STUB0(x, name, ...) \ - asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \ - asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\ - { \ - return __se_compat_sys_##name();\ - } +#define __X32_COMPAT_SYS_STUB0(name) \ + __SYS_STUB0(x32, compat_sys_##name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ - asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ - asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ - { \ - return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } + __SYS_STUBx(x32, compat_sys##name, \ + SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__)) + +#define __X32_COMPAT_COND_SYSCALL(name) \ + __COND_SYSCALL(x32, compat_sys_##name) +#define __X32_COMPAT_SYS_NI(name) \ + __SYS_NI(x32, compat_sys_##name) #else /* CONFIG_X86_X32 */ -#define __X32_COMPAT_SYS_STUB0(x, name) +#define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) +#define __X32_COMPAT_COND_SYSCALL(name) +#define __X32_COMPAT_SYS_NI(name) #endif /* CONFIG_X86_X32 */ @@ -121,15 +191,12 @@ struct pt_regs; * of them. */ #define COMPAT_SYSCALL_DEFINE0(name) \ - static long __se_compat_sys_##name(void); \ - static inline long __do_compat_sys_##name(void); \ - __IA32_COMPAT_SYS_STUB0(x, name) \ - __X32_COMPAT_SYS_STUB0(x, name) \ - static long __se_compat_sys_##name(void) \ - { \ - return __do_compat_sys_##name(); \ - } \ - static inline long __do_compat_sys_##name(void) + static long \ + __do_compat_sys_##name(const struct pt_regs *__unused); \ + __IA32_COMPAT_SYS_STUB0(name) \ + __X32_COMPAT_SYS_STUB0(name) \ + static long \ + __do_compat_sys_##name(const struct pt_regs *__unused) #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ @@ -148,58 +215,19 @@ struct pt_regs; * kernel/time/posix-stubs.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ - cond_syscall(__ia32_compat_sys_##name); \ - cond_syscall(__x32_compat_sys_##name) + __IA32_COMPAT_COND_SYSCALL(name) \ + __X32_COMPAT_COND_SYSCALL(name) #define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers) + __IA32_COMPAT_SYS_NI(name) \ + __X32_COMPAT_SYS_NI(name) #endif /* CONFIG_COMPAT */ - -/* - * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes - * struct pt_regs *regs as the only argument of the syscall stub named - * __x64_sys_*(). It decodes just the registers it needs and passes them on to - * the __se_sys_*() wrapper performing sign extension and then to the - * __do_sys_*() function doing the actual job. These wrappers and functions - * are inlined (at least in very most cases), meaning that the assembly looks - * as follows (slightly re-ordered for better readability): - * - * <__x64_sys_recv>: <-- syscall with 4 parameters - * callq <__fentry__> - * - * mov 0x70(%rdi),%rdi <-- decode regs->di - * mov 0x68(%rdi),%rsi <-- decode regs->si - * mov 0x60(%rdi),%rdx <-- decode regs->dx - * mov 0x38(%rdi),%rcx <-- decode regs->r10 - * - * xor %r9d,%r9d <-- clear %r9 - * xor %r8d,%r8d <-- clear %r8 - * - * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() - * which takes 6 arguments - * - * cltq <-- extend return value to 64-bit - * retq <-- return - * - * This approach avoids leaking random user-provided register content down - * the call chain. - * - * If IA32_EMULATION is enabled, this macro generates an additional wrapper - * named __ia32_sys_*() which decodes the struct pt_regs *regs according - * to the i386 calling convention (bx, cx, dx, si, di, bp). - */ #define __SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long __x64_sys##name(const struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long __x64_sys##name(const struct pt_regs *regs) \ - { \ - return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + __X64_SYS_STUBx(x, name, __VA_ARGS__) \ __IA32_SYS_STUBx(x, name, __VA_ARGS__) \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ @@ -217,33 +245,28 @@ struct pt_regs; * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() * macros to work correctly. */ -#ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) -#endif - -#ifndef COND_SYSCALL -#define COND_SYSCALL(name) \ - asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ - { \ - return sys_ni_syscall(); \ - } -#endif + static long __do_sys_##sname(const struct pt_regs *__unused); \ + __X64_SYS_STUB0(sname) \ + __IA32_SYS_STUB0(sname) \ + static long __do_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + __X64_COND_SYSCALL(name) \ + __IA32_COND_SYSCALL(name) -#ifndef SYS_NI -#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); -#endif +#define SYS_NI(name) \ + __X64_SYS_NI(name) \ + __IA32_SYS_NI(name) /* * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ -asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); -asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); -asmlinkage long __x64_sys_time(const struct pt_regs *regs); +long __x64_sys_getcpu(const struct pt_regs *regs); +long __x64_sys_gettimeofday(const struct pt_regs *regs); +long __x64_sys_time(const struct pt_regs *regs); #endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 91b7b6e1a115..6714a358235d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -8,42 +8,8 @@ #ifndef _ASM_X86_SYSCALLS_H #define _ASM_X86_SYSCALLS_H -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <linux/signal.h> -#include <linux/types.h> - /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); -#ifdef CONFIG_X86_32 -/* - * These definitions are only valid on pure 32-bit systems; x86-64 uses a - * different syscall calling convention - */ -asmlinkage long sys_ioperm(unsigned long, unsigned long, int); -asmlinkage long sys_iopl(unsigned int); - -/* kernel/ldt.c */ -asmlinkage long sys_modify_ldt(int, void __user *, unsigned long); - -/* kernel/signal.c */ -asmlinkage long sys_rt_sigreturn(void); - -/* kernel/tls.c */ -asmlinkage long sys_set_thread_area(struct user_desc __user *); -asmlinkage long sys_get_thread_area(struct user_desc __user *); - -/* X86_32 only */ - -/* kernel/signal.c */ -asmlinkage long sys_sigreturn(void); - -/* kernel/vm86_32.c */ -struct vm86_struct; -asmlinkage long sys_vm86old(struct vm86_struct __user *); -asmlinkage long sys_vm86(unsigned long, unsigned long); - -#endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALLS_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index cf4327986e98..8de8ceccb8bc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,7 +92,7 @@ struct thread_info { #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ -#define TIF_NOHZ 19 /* in adaptive nohz mode */ +#define TIF_SLD 18 /* Restore split lock detection on context switch */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -122,7 +122,7 @@ struct thread_info { #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) -#define _TIF_NOHZ (1 << TIF_NOHZ) +#define _TIF_SLD (1 << TIF_SLD) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -133,19 +133,15 @@ struct thread_info { #define _TIF_X32 (1 << TIF_X32) #define _TIF_FSCHECK (1 << TIF_FSCHECK) -/* - * work to do in syscall_trace_enter(). Also includes TIF_NOHZ for - * enter_from_user_mode() - */ +/* Work to do before invoking the actual syscall. */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_NOHZ) + _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \ - _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4b14d2318251..79d8d5496330 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -193,4 +193,29 @@ static inline void sched_clear_itmt_support(void) } #endif /* CONFIG_SCHED_MC_PRIO */ +#ifdef CONFIG_SMP +#include <asm/cpufeature.h> + +DECLARE_STATIC_KEY_FALSE(arch_scale_freq_key); + +#define arch_scale_freq_invariant() static_branch_likely(&arch_scale_freq_key) + +DECLARE_PER_CPU(unsigned long, arch_freq_scale); + +static inline long arch_scale_freq_capacity(int cpu) +{ + return per_cpu(arch_freq_scale, cpu); +} +#define arch_scale_freq_capacity arch_scale_freq_capacity + +extern void arch_scale_freq_tick(void); +#define arch_scale_freq_tick arch_scale_freq_tick + +extern void arch_set_max_freq_ratio(bool turbo_disabled); +#else +static inline void arch_set_max_freq_ratio(bool turbo_disabled) +{ +} +#endif + #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index ffa0dc8a535e..c26a7e1d8a2c 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -76,27 +76,24 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long err dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); -#ifdef CONFIG_X86_64 -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address); -asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); -asmlinkage __visible notrace -struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); -void __init trap_init(void); -#endif dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); -#ifdef CONFIG_X86_MCE -dotraplinkage void do_machine_check(struct pt_regs *regs, long error_code); -#endif dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); #endif dotraplinkage void do_mce(struct pt_regs *regs, long error_code); +#ifdef CONFIG_X86_64 +asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); +asmlinkage __visible notrace +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); +void __init trap_init(void); +#endif + static inline int get_si_code(unsigned long condition) { if (condition & DR_STEP) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 61d93f062a36..d8f283b9a569 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -126,11 +126,17 @@ extern int __get_user_bad(void); }) /* - * This is a type: either unsigned long, if the argument fits into - * that type, or otherwise unsigned long long. + * This is the smallest unsigned integer type that can fit a value + * (up to 'long long') */ -#define __inttype(x) \ -__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) +#define __inttype(x) __typeof__( \ + __typefits(x,char, \ + __typefits(x,short, \ + __typefits(x,int, \ + __typefits(x,long,0ULL))))) + +#define __typefits(x,type,not) \ + __builtin_choose_expr(sizeof(x)<=sizeof(type),(unsigned type)0,not) /** * get_user - Get a simple variable from user space. @@ -193,23 +199,12 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) : : "A" (x), "r" (addr) \ : : label) -#define __put_user_asm_ex_u64(x, addr) \ - asm volatile("\n" \ - "1: movl %%eax,0(%1)\n" \ - "2: movl %%edx,4(%1)\n" \ - "3:" \ - _ASM_EXTABLE_EX(1b, 2b) \ - _ASM_EXTABLE_EX(2b, 3b) \ - : : "A" (x), "r" (addr)) - #define __put_user_x8(x, ptr, __ret_pu) \ asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else #define __put_user_goto_u64(x, ptr, label) \ - __put_user_goto(x, ptr, "q", "", "er", label) -#define __put_user_asm_ex_u64(x, addr) \ - __put_user_asm_ex(x, addr, "q", "", "er") + __put_user_goto(x, ptr, "q", "er", label) #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -273,13 +268,13 @@ do { \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __put_user_goto(x, ptr, "b", "b", "iq", label); \ + __put_user_goto(x, ptr, "b", "iq", label); \ break; \ case 2: \ - __put_user_goto(x, ptr, "w", "w", "ir", label); \ + __put_user_goto(x, ptr, "w", "ir", label); \ break; \ case 4: \ - __put_user_goto(x, ptr, "l", "k", "ir", label); \ + __put_user_goto(x, ptr, "l", "ir", label); \ break; \ case 8: \ __put_user_goto_u64(x, ptr, label); \ @@ -289,141 +284,70 @@ do { \ } \ } while (0) -/* - * This doesn't do __uaccess_begin/end - the exception handling - * around it must do that. - */ -#define __put_user_size_ex(x, ptr, size) \ -do { \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ - break; \ - case 2: \ - __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ - break; \ - case 4: \ - __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ - break; \ - case 8: \ - __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ - break; \ - default: \ - __put_user_bad(); \ - } \ -} while (0) - #ifdef CONFIG_X86_32 -#define __get_user_asm_u64(x, ptr, retval, errret) \ +#define __get_user_asm_u64(x, ptr, retval) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - asm volatile("\n" \ - "1: movl %2,%%eax\n" \ - "2: movl %3,%%edx\n" \ - "3:\n" \ + asm volatile("\n" \ + "1: movl %[lowbits],%%eax\n" \ + "2: movl %[highbits],%%edx\n" \ + "3:\n" \ ".section .fixup,\"ax\"\n" \ - "4: mov %4,%0\n" \ + "4: mov %[efault],%[errout]\n" \ " xorl %%eax,%%eax\n" \ " xorl %%edx,%%edx\n" \ " jmp 3b\n" \ ".previous\n" \ _ASM_EXTABLE_UA(1b, 4b) \ _ASM_EXTABLE_UA(2b, 4b) \ - : "=r" (retval), "=&A"(x) \ - : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ - "i" (errret), "0" (retval)); \ + : [errout] "=r" (retval), \ + [output] "=&A"(x) \ + : [lowbits] "m" (__m(__ptr)), \ + [highbits] "m" __m(((u32 __user *)(__ptr)) + 1), \ + [efault] "i" (-EFAULT), "0" (retval)); \ }) -#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else -#define __get_user_asm_u64(x, ptr, retval, errret) \ - __get_user_asm(x, ptr, retval, "q", "", "=r", errret) -#define __get_user_asm_ex_u64(x, ptr) \ - __get_user_asm_ex(x, ptr, "q", "", "=r") +#define __get_user_asm_u64(x, ptr, retval) \ + __get_user_asm(x, ptr, retval, "q", "=r") #endif -#define __get_user_size(x, ptr, size, retval, errret) \ +#define __get_user_size(x, ptr, size, retval) \ do { \ retval = 0; \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ - __get_user_asm(x, ptr, retval, "b", "b", "=q", errret); \ + __get_user_asm(x, ptr, retval, "b", "=q"); \ break; \ case 2: \ - __get_user_asm(x, ptr, retval, "w", "w", "=r", errret); \ + __get_user_asm(x, ptr, retval, "w", "=r"); \ break; \ case 4: \ - __get_user_asm(x, ptr, retval, "l", "k", "=r", errret); \ + __get_user_asm(x, ptr, retval, "l", "=r"); \ break; \ case 8: \ - __get_user_asm_u64(x, ptr, retval, errret); \ + __get_user_asm_u64(x, ptr, retval); \ break; \ default: \ (x) = __get_user_bad(); \ } \ } while (0) -#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("\n" \ - "1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "=r" (err), ltype(x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) - -#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \ +#define __get_user_asm(x, addr, err, itype, ltype) \ asm volatile("\n" \ - "1: mov"itype" %2,%"rtype"1\n" \ + "1: mov"itype" %[umem],%[output]\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ + "3: mov %[efault],%[errout]\n" \ + " xor"itype" %[output],%[output]\n" \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE_UA(1b, 3b) \ - : "=r" (err), ltype(x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) - -/* - * This doesn't do __uaccess_begin/end - the exception handling - * around it must do that. - */ -#define __get_user_size_ex(x, ptr, size) \ -do { \ - __chk_user_ptr(ptr); \ - switch (size) { \ - case 1: \ - __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ - break; \ - case 2: \ - __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ - break; \ - case 4: \ - __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ - break; \ - case 8: \ - __get_user_asm_ex_u64(x, ptr); \ - break; \ - default: \ - (x) = __get_user_bad(); \ - } \ -} while (0) - -#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %1,%"rtype"0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3:xor"itype" %"rtype"0,%"rtype"0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_EX(1b, 3b) \ - : ltype(x) : "m" (__m(addr))) + : [errout] "=r" (err), \ + [output] ltype(x) \ + : [umem] "m" (__m(addr)), \ + [efault] "i" (-EFAULT), "0" (err)) #define __put_user_nocheck(x, ptr, size) \ ({ \ @@ -447,7 +371,7 @@ __pu_label: \ __typeof__(ptr) __gu_ptr = (ptr); \ __typeof__(size) __gu_size = (size); \ __uaccess_begin_nospec(); \ - __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \ + __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ @@ -462,47 +386,13 @@ struct __large_struct { unsigned long buf[100]; }; * we do not write to any memory gcc knows about, so there are no * aliasing issues. */ -#define __put_user_goto(x, addr, itype, rtype, ltype, label) \ +#define __put_user_goto(x, addr, itype, ltype, label) \ asm_volatile_goto("\n" \ - "1: mov"itype" %"rtype"0,%1\n" \ - _ASM_EXTABLE_UA(1b, %l2) \ + "1: mov"itype" %0,%1\n" \ + _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ : : label) -#define __put_user_failed(x, addr, itype, rtype, ltype, errret) \ - ({ __label__ __puflab; \ - int __pufret = errret; \ - __put_user_goto(x,addr,itype,rtype,ltype,__puflab); \ - __pufret = 0; \ - __puflab: __pufret; }) - -#define __put_user_asm(x, addr, retval, itype, rtype, ltype, errret) do { \ - retval = __put_user_failed(x, addr, itype, rtype, ltype, errret); \ -} while (0) - -#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %"rtype"0,%1\n" \ - "2:\n" \ - _ASM_EXTABLE_EX(1b, 2b) \ - : : ltype(x), "m" (__m(addr))) - -/* - * uaccess_try and catch - */ -#define uaccess_try do { \ - current->thread.uaccess_err = 0; \ - __uaccess_begin(); \ - barrier(); - -#define uaccess_try_nospec do { \ - current->thread.uaccess_err = 0; \ - __uaccess_begin_nospec(); \ - -#define uaccess_catch(err) \ - __uaccess_end(); \ - (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ -} while (0) - /** * __get_user - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -552,28 +442,6 @@ struct __large_struct { unsigned long buf[100]; }; #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -/* - * {get|put}_user_try and catch - * - * get_user_try { - * get_user_ex(...); - * } get_user_catch(err) - */ -#define get_user_try uaccess_try_nospec -#define get_user_catch(err) uaccess_catch(err) - -#define get_user_ex(x, ptr) do { \ - unsigned long __gue_val; \ - __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ - (x) = (__force __typeof__(*(ptr)))__gue_val; \ -} while (0) - -#define put_user_try uaccess_try -#define put_user_catch(err) uaccess_catch(err) - -#define put_user_ex(x, ptr) \ - __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); extern __must_check long @@ -584,99 +452,6 @@ extern __must_check long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); unsigned long __must_check __clear_user(void __user *mem, unsigned long len); -extern void __cmpxchg_wrong_size(void) - __compiletime_error("Bad argument size for cmpxchg"); - -#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \ -({ \ - int __ret = 0; \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __uaccess_begin_nospec(); \ - switch (size) { \ - case 1: \ - { \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "q" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - case 2: \ - { \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "r" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - case 4: \ - { \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "r" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - case 8: \ - { \ - if (!IS_ENABLED(CONFIG_X86_64)) \ - __cmpxchg_wrong_size(); \ - \ - asm volatile("\n" \ - "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \ - "2:\n" \ - "\t.section .fixup, \"ax\"\n" \ - "3:\tmov %3, %0\n" \ - "\tjmp 2b\n" \ - "\t.previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ - : "i" (-EFAULT), "r" (__new), "1" (__old) \ - : "memory" \ - ); \ - break; \ - } \ - default: \ - __cmpxchg_wrong_size(); \ - } \ - __uaccess_end(); \ - *(uval) = __old; \ - __ret; \ -}) - -#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \ -({ \ - access_ok((ptr), sizeof(*(ptr))) ? \ - __user_atomic_cmpxchg_inatomic((uval), (ptr), \ - (old), (new), sizeof(*(ptr))) : \ - -EFAULT; \ -}) - /* * movsl can be slow when source and dest are not both 8-byte aligned */ @@ -695,15 +470,6 @@ extern struct movsl_mask { #endif /* - * We rely on the nested NMI work to allow atomic faults from the NMI path; the - * nested NMI paths are careful to preserve CR2. - * - * Caller must use pagefault_enable/disable, or run in interrupt context, - * and also do a uaccess_ok() check - */ -#define __copy_from_user_nmi __copy_from_user_inatomic - -/* * The "unsafe" user accesses aren't really "unsafe", but the naming * is a big fat warning: you have to not only do the access_ok() * checking before using them, but you have to surround them with the @@ -729,7 +495,7 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt do { \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ - __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err, -EFAULT); \ + __get_user_size(__gu_val, (ptr), sizeof(*(ptr)), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ if (unlikely(__gu_err)) goto err_label; \ } while (0) diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index ba2dc1930630..388a40660c7b 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (__builtin_constant_p(n)) { - unsigned long ret; - - switch (n) { - case 1: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)to, from, ret, - "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)to, from, ret, - "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)to, from, ret, - "l", "k", "=r", 4); - __uaccess_end(); - return ret; - } - } return __copy_user_ll(to, (__force const void *)from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 5cd1caa8bc65..bc10e3dc64fe 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -65,117 +65,13 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len) static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic(dst, (__force void *)src, size); - switch (size) { - case 1: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, - ret, "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, - ret, "l", "k", "=r", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 10); - if (likely(!ret)) - __get_user_asm_nozero(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 16: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); - if (likely(!ret)) - __get_user_asm_nozero(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - default: - return copy_user_generic(dst, (__force void *)src, size); - } + return copy_user_generic(dst, (__force void *)src, size); } static __always_inline __must_check unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, src, size); - switch (size) { - case 1: - __uaccess_begin(); - __put_user_asm(*(u8 *)src, (u8 __user *)dst, - ret, "b", "b", "iq", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin(); - __put_user_asm(*(u16 *)src, (u16 __user *)dst, - ret, "w", "w", "ir", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin(); - __put_user_asm(*(u32 *)src, (u32 __user *)dst, - ret, "l", "k", "ir", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 10); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, - ret, "w", "w", "ir", 2); - } - __uaccess_end(); - return ret; - case 16: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 16); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, - ret, "q", "", "er", 8); - } - __uaccess_end(); - return ret; - default: - return copy_user_generic((__force void *)dst, src, size); - } + return copy_user_generic((__force void *)dst, src, size); } static __always_inline __must_check diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index a7dd080749ce..c1c3d31b15c0 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -13,10 +13,13 @@ # define __ARCH_WANT_SYS_OLD_MMAP # define __ARCH_WANT_SYS_OLD_SELECT +# define __NR_ia32_syscall_max __NR_syscall_max + # else # include <asm/unistd_64.h> # include <asm/unistd_64_x32.h> +# include <asm/unistd_32_ia32.h> # define __ARCH_WANT_SYS_TIME # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_COMPAT_SYS_PREADV64 @@ -26,6 +29,10 @@ # endif +# define NR_syscalls (__NR_syscall_max + 1) +# define X32_NR_syscalls (__NR_x32_syscall_max + 1) +# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1) + # define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_OLD_READDIR # define __ARCH_WANT_OLD_STAT diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 7803114aa140..13687bf0e0a9 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -858,4 +858,6 @@ static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) return 1; } +void uv_bau_message_interrupt(struct pt_regs *regs); + #endif /* _ASM_X86_UV_UV_BAU_H */ diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..119ac8612d89 --- /dev/null +++ b/arch/x86/include/asm/vdso/clocksource.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_CLOCKSOURCE_H +#define __ASM_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_TSC, \ + VDSO_CLOCKMODE_PVCLOCK, \ + VDSO_CLOCKMODE_HVCLOCK + +#endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 6ee1f7dba34b..9a6dc9b4ec99 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -243,7 +243,7 @@ static u64 vread_hvclock(void) static inline u64 __arch_get_hw_counter(s32 clock_mode) { - if (clock_mode == VCLOCK_TSC) + if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) return (u64)rdtsc_ordered(); /* * For any memory-mapped vclock type, we need to make sure that gcc @@ -252,13 +252,13 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode) * question isn't enabled, which will segfault. Hence the barriers. */ #ifdef CONFIG_PARAVIRT_CLOCK - if (clock_mode == VCLOCK_PVCLOCK) { + if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) { barrier(); return vread_pvclock(); } #endif #ifdef CONFIG_HYPERV_TIMER - if (clock_mode == VCLOCK_HVCLOCK) { + if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) { barrier(); return vread_hvclock(); } diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h new file mode 100644 index 000000000000..57b1a7034c64 --- /dev/null +++ b/arch/x86/include/asm/vdso/processor.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 ARM Ltd. + */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static __always_inline void rep_nop(void) +{ + asm volatile("rep; nop" ::: "memory"); +} + +static __always_inline void cpu_relax(void) +{ + rep_nop(); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index 0026ab2123ce..be199a9b2676 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -10,8 +10,6 @@ #include <asm/vgtod.h> #include <asm/vvar.h> -int vclocks_used __read_mostly; - DEFINE_VVAR(struct vdso_data, _vdso_data); /* * Update the vDSO data page to keep in sync with kernel timekeeping. @@ -23,19 +21,6 @@ struct vdso_data *__x86_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __x86_get_k_vdso_data -static __always_inline -int __x86_get_clock_mode(struct timekeeper *tk) -{ - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - - /* Mark the new vclock used. */ - BUILD_BUG_ON(VCLOCK_MAX >= 32); - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); - - return vclock_mode; -} -#define __arch_get_clock_mode __x86_get_clock_mode - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h new file mode 100644 index 000000000000..75884d2cdec3 --- /dev/null +++ b/arch/x86/include/asm/vermagic.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#ifdef CONFIG_X86_64 +/* X86_64 does not define MODULE_PROC_FAMILY */ +#elif defined CONFIG_M486SX +#define MODULE_PROC_FAMILY "486SX " +#elif defined CONFIG_M486 +#define MODULE_PROC_FAMILY "486 " +#elif defined CONFIG_M586 +#define MODULE_PROC_FAMILY "586 " +#elif defined CONFIG_M586TSC +#define MODULE_PROC_FAMILY "586TSC " +#elif defined CONFIG_M586MMX +#define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MATOM +#define MODULE_PROC_FAMILY "ATOM " +#elif defined CONFIG_M686 +#define MODULE_PROC_FAMILY "686 " +#elif defined CONFIG_MPENTIUMII +#define MODULE_PROC_FAMILY "PENTIUMII " +#elif defined CONFIG_MPENTIUMIII +#define MODULE_PROC_FAMILY "PENTIUMIII " +#elif defined CONFIG_MPENTIUMM +#define MODULE_PROC_FAMILY "PENTIUMM " +#elif defined CONFIG_MPENTIUM4 +#define MODULE_PROC_FAMILY "PENTIUM4 " +#elif defined CONFIG_MK6 +#define MODULE_PROC_FAMILY "K6 " +#elif defined CONFIG_MK7 +#define MODULE_PROC_FAMILY "K7 " +#elif defined CONFIG_MK8 +#define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MELAN +#define MODULE_PROC_FAMILY "ELAN " +#elif defined CONFIG_MCRUSOE +#define MODULE_PROC_FAMILY "CRUSOE " +#elif defined CONFIG_MEFFICEON +#define MODULE_PROC_FAMILY "EFFICEON " +#elif defined CONFIG_MWINCHIPC6 +#define MODULE_PROC_FAMILY "WINCHIPC6 " +#elif defined CONFIG_MWINCHIP3D +#define MODULE_PROC_FAMILY "WINCHIP3D " +#elif defined CONFIG_MCYRIXIII +#define MODULE_PROC_FAMILY "CYRIXIII " +#elif defined CONFIG_MVIAC3_2 +#define MODULE_PROC_FAMILY "VIAC3-2 " +#elif defined CONFIG_MVIAC7 +#define MODULE_PROC_FAMILY "VIAC7 " +#elif defined CONFIG_MGEODEGX1 +#define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " +#else +#error unknown processor family +#endif + +#ifdef CONFIG_X86_32 +# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY +#else +# define MODULE_ARCH_VERMAGIC "" +#endif + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index a2638c6124ed..7aa38b2ad8a9 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -2,6 +2,11 @@ #ifndef _ASM_X86_VGTOD_H #define _ASM_X86_VGTOD_H +/* + * This check is required to prevent ARCH=um to include + * unwanted headers. + */ +#ifdef CONFIG_GENERIC_GETTIMEOFDAY #include <linux/compiler.h> #include <asm/clocksource.h> #include <vdso/datapage.h> @@ -14,11 +19,6 @@ typedef u64 gtod_long_t; #else typedef unsigned long gtod_long_t; #endif - -extern int vclocks_used; -static inline bool vclock_was_used(int vclock) -{ - return READ_ONCE(vclocks_used) & (1 << vclock); -} +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2a85287b3685..5e090d1f03f8 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,7 +72,7 @@ #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC) #define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA) #define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING) -#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000 +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE) #define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING) #define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING) @@ -500,6 +500,18 @@ enum vmcs_field { VMX_EPT_EXECUTABLE_MASK) #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) +static inline u8 vmx_eptp_page_walk_level(u64 eptp) +{ + u64 encoded_level = eptp & VMX_EPTP_PWL_MASK; + + if (encoded_level == VMX_EPTP_PWL_5) + return 5; + + /* @eptp must be pre-validated by the caller. */ + WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4); + return 4; +} + /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ #define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ VMX_EPT_EXECUTABLE_MASK) diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index a50e4a0de315..9915990fd8cf 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -81,6 +81,7 @@ #define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */ #define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */ #define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */ +#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */ #define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */ #endif /* _ASM_X86_VMXFEATURES_H */ diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index d61ddf3d052b..0c4e5b5e3852 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -11,8 +11,6 @@ * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines */ -#ifdef CONFIG_AS_AVX - #include <linux/compiler.h> #include <asm/fpu/api.h> @@ -170,11 +168,4 @@ do { \ #define AVX_SELECT(FASTEST) \ (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) -#else - -#define AVX_XOR_SPEED {} - -#define AVX_SELECT(FASTEST) (FASTEST) - -#endif #endif |