diff options
Diffstat (limited to 'arch/x86/include')
23 files changed, 240 insertions, 168 deletions
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1c129abb7f09..4e3099d9ae62 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -374,12 +374,12 @@ extern struct apic *apic; #define apic_driver(sym) \ static const struct apic *__apicdrivers_##sym __used \ __aligned(sizeof(struct apic *)) \ - __section(.apicdrivers) = { &sym } + __section(".apicdrivers") = { &sym } #define apic_drivers(sym1, sym2) \ static struct apic *__apicdrivers_##sym1##sym2[2] __used \ __aligned(sizeof(struct apic *)) \ - __section(.apicdrivers) = { &sym1, &sym2 } + __section(".apicdrivers") = { &sym1, &sym2 } extern struct apic *__apicdrivers[], *__apicdrivers_end[]; diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index abe08690a887..69404eae9983 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -8,7 +8,7 @@ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(".data..read_mostly") #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index de58391bdee0..cf0e25f45422 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -43,7 +43,7 @@ struct devs_id { #define sfi_device(i) \ static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \ - __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i + __section(".x86_intel_mid_dev.init") = &i /** * struct mid_sd_board_info - template for SD device creation diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 02a0cf547d7b..2dfc8d380dab 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -9,7 +9,7 @@ #include <asm/nospec-branch.h> /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ -#define __cpuidle __attribute__((__section__(".cpuidle.text"))) +#define __cpuidle __section(".cpuidle.text") /* * Interrupt control: diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5303dbc5c9bc..324ddd7fd0aa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -80,13 +80,14 @@ #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) -#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) +#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) +#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -132,7 +133,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 -#define KVM_MAX_CPUID_ENTRIES 80 +#define KVM_MAX_CPUID_ENTRIES 256 #define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 @@ -636,8 +637,9 @@ struct kvm_vcpu_arch { int halt_request; /* real mode on Intel only */ int cpuid_nent; - struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + struct kvm_cpuid_entry2 *cpuid_entries; + unsigned long cr3_lm_rsvd_bits; int maxphyaddr; int max_tdp_level; @@ -788,6 +790,21 @@ struct kvm_vcpu_arch { /* AMD MSRC001_0015 Hardware Configuration */ u64 msr_hwcr; + + /* pv related cpuid info */ + struct { + /* + * value of the eax register in the KVM_CPUID_FEATURES CPUID + * leaf. + */ + u32 features; + + /* + * indicates whether pv emulation should be disabled if features + * are not present in the guest's cpuid + */ + bool enforce; + } pv_cpuid; }; struct kvm_lpage_info { @@ -860,6 +877,13 @@ struct kvm_hv { struct kvm_hv_syndbg hv_syndbg; }; +struct msr_bitmap_range { + u32 flags; + u32 nmsrs; + u32 base; + unsigned long *bitmap; +}; + enum kvm_irqchip_mode { KVM_IRQCHIP_NONE, KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ @@ -961,8 +985,31 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ + u32 user_space_msr_mask; + + struct { + u8 count; + bool default_allow:1; + struct msr_bitmap_range ranges[16]; + } msr_filter; + struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; + + /* + * Whether the TDP MMU is enabled for this VM. This contains a + * snapshot of the TDP MMU module parameter from when the VM was + * created and remains unchanged for the life of the VM. If this is + * true, TDP MMU handler functions will run for various MMU + * operations. + */ + bool tdp_mmu_enabled; + + /* List of struct tdp_mmu_pages being used as roots */ + struct list_head tdp_mmu_roots; + /* List of struct tdp_mmu_pages not being used as roots */ + struct list_head tdp_mmu_pages; }; struct kvm_vm_stat { @@ -1069,7 +1116,7 @@ struct kvm_x86_ops { void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); - void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); + int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); @@ -1143,7 +1190,12 @@ struct kvm_x86_ops { /* Returns actual tsc_offset set in active VMCS */ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); + /* + * Retrieve somewhat arbitrary exit information. Intended to be used + * only from within tracepoints to avoid VMREADs when tracing is off. + */ + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, + u32 *exit_int_info, u32 *exit_int_info_err_code); int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, @@ -1221,12 +1273,13 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); + bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); + void (*msr_filter_changed)(struct kvm_vcpu *vcpu); }; struct kvm_x86_nested_ops { @@ -1238,7 +1291,7 @@ struct kvm_x86_nested_ops { int (*set_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state); - bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int (*enable_evmcs)(struct kvm_vcpu *vcpu, @@ -1612,8 +1665,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); -void kvm_define_shared_msr(unsigned index, u32 msr); -int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c9f5df0a1c10..2f62bbdd9d12 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -54,7 +54,7 @@ bool sme_active(void); bool sev_active(void); bool sev_es_active(void); -#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) +#define __bss_decrypted __section(".bss..decrypted") #else /* !CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 4f77b8f22e54..ffc289992d1b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -54,6 +54,7 @@ typedef int (*hyperv_fill_flush_list_func)( #define hv_enable_vdso_clocksource() \ vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() +#define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR /* * Reference to pv_ops must be inline so objtool diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 86f20d520a07..0b4920a7238e 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -60,22 +60,20 @@ struct saved_msrs { #define EAX_EDX_RET(val, low, high) "=A" (val) #endif -#ifdef CONFIG_TRACEPOINTS /* * Be very careful with includes. This header is prone to include loops. */ #include <asm/atomic.h> #include <linux/tracepoint-defs.h> -extern struct tracepoint __tracepoint_read_msr; -extern struct tracepoint __tracepoint_write_msr; -extern struct tracepoint __tracepoint_rdpmc; -#define msr_tracepoint_active(t) static_key_false(&(t).key) +#ifdef CONFIG_TRACEPOINTS +DECLARE_TRACEPOINT(read_msr); +DECLARE_TRACEPOINT(write_msr); +DECLARE_TRACEPOINT(rdpmc); extern void do_trace_write_msr(unsigned int msr, u64 val, int failed); extern void do_trace_read_msr(unsigned int msr, u64 val, int failed); extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed); #else -#define msr_tracepoint_active(t) false static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} @@ -128,7 +126,7 @@ static inline unsigned long long native_read_msr(unsigned int msr) val = __rdmsr(msr); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, val, 0); return val; @@ -150,7 +148,7 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, _ASM_EXTABLE(2b, 3b) : [err] "=r" (*err), EAX_EDX_RET(val, low, high) : "c" (msr), [fault] "i" (-EIO)); - if (msr_tracepoint_active(__tracepoint_read_msr)) + if (tracepoint_enabled(read_msr)) do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), *err); return EAX_EDX_VAL(val, low, high); } @@ -161,7 +159,7 @@ native_write_msr(unsigned int msr, u32 low, u32 high) { __wrmsr(msr, low, high); - if (msr_tracepoint_active(__tracepoint_write_msr)) + if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } @@ -181,7 +179,7 @@ native_write_msr_safe(unsigned int msr, u32 low, u32 high) : "c" (msr), "0" (low), "d" (high), [fault] "i" (-EIO) : "memory"); - if (msr_tracepoint_active(__tracepoint_write_msr)) + if (tracepoint_enabled(write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), err); return err; } @@ -248,7 +246,7 @@ static inline unsigned long long native_read_pmc(int counter) DECLARE_ARGS(val, low, high); asm volatile("rdpmc" : EAX_EDX_RET(val, low, high) : "c" (counter)); - if (msr_tracepoint_active(__tracepoint_rdpmc)) + if (tracepoint_enabled(rdpmc)) do_trace_rdpmc(counter, EAX_EDX_VAL(val, low, high), 0); return EAX_EDX_VAL(val, low, high); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 86651e86289d..cb9ad6b73973 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -314,19 +314,19 @@ static inline void mds_idle_clear_cpu_buffers(void) * lfence * jmp spec_trap * do_rop: - * mov %rax,(%rsp) for x86_64 + * mov %rcx,(%rsp) for x86_64 * mov %edx,(%esp) for x86_32 * retq * * Without retpolines configured: * - * jmp *%rax for x86_64 + * jmp *%rcx for x86_64 * jmp *%edx for x86_32 */ #ifdef CONFIG_RETPOLINE # ifdef CONFIG_X86_64 -# define RETPOLINE_RAX_BPF_JIT_SIZE 17 -# define RETPOLINE_RAX_BPF_JIT() \ +# define RETPOLINE_RCX_BPF_JIT_SIZE 17 +# define RETPOLINE_RCX_BPF_JIT() \ do { \ EMIT1_off32(0xE8, 7); /* callq do_rop */ \ /* spec_trap: */ \ @@ -334,7 +334,7 @@ do { \ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ /* do_rop: */ \ - EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \ EMIT1(0xC3); /* retq */ \ } while (0) # else /* !CONFIG_X86_64 */ @@ -352,9 +352,9 @@ do { \ # endif #else /* !CONFIG_RETPOLINE */ # ifdef CONFIG_X86_64 -# define RETPOLINE_RAX_BPF_JIT_SIZE 2 -# define RETPOLINE_RAX_BPF_JIT() \ - EMIT2(0xFF, 0xE0); /* jmp *%rax */ +# define RETPOLINE_RCX_BPF_JIT_SIZE 2 +# define RETPOLINE_RCX_BPF_JIT() \ + EMIT2(0xFF, 0xE1); /* jmp *%rcx */ # else /* !CONFIG_X86_64 */ # define RETPOLINE_EDX_BPF_JIT() \ EMIT2(0xFF, 0xE2) /* jmp *%edx */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 565ad755c785..f462895a33e4 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -42,6 +42,17 @@ #endif /* CONFIG_X86_PAE */ /* + * User space process size: 3GB (default). + */ +#define IA32_PAGE_OFFSET __PAGE_OFFSET +#define TASK_SIZE __PAGE_OFFSET +#define TASK_SIZE_LOW TASK_SIZE +#define TASK_SIZE_MAX TASK_SIZE +#define DEFAULT_MAP_WINDOW TASK_SIZE +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX STACK_TOP + +/* * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S) */ #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d0c6c10c18a0..3f49dac03617 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -60,6 +60,44 @@ #endif /* + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything executable + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] + */ +#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) + +#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ + 0xc0000000 : 0xFFFFe000) + +#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) +#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) +#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ + IA32_PAGE_OFFSET : TASK_SIZE_MAX) + +#define STACK_TOP TASK_SIZE_LOW +#define STACK_TOP_MAX TASK_SIZE_MAX + +/* * Maximum kernel image size is limited to 1 GiB, due to the fixmap living * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). * diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6960cd6d1f23..b9a7fd0a27e2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -137,7 +137,9 @@ union cpuid10_edx { struct { unsigned int num_counters_fixed:5; unsigned int bit_width_fixed:8; - unsigned int reserved:19; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; } split; unsigned int full; }; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ac507586769..82a08b585818 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -482,10 +482,6 @@ extern unsigned int fpu_user_xstate_size; struct perf_event; -typedef struct { - unsigned long seg; -} mm_segment_t; - struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -538,8 +534,6 @@ struct thread_struct { */ unsigned long iopl_emul; - mm_segment_t addr_limit; - unsigned int sig_on_uaccess_err:1; /* Floating point and extended processor state */ @@ -783,67 +777,15 @@ static inline void spin_lock_prefetch(const void *x) }) #ifdef CONFIG_X86_32 -/* - * User space process size: 3GB (default). - */ -#define IA32_PAGE_OFFSET PAGE_OFFSET -#define TASK_SIZE PAGE_OFFSET -#define TASK_SIZE_LOW TASK_SIZE -#define TASK_SIZE_MAX TASK_SIZE -#define DEFAULT_MAP_WINDOW TASK_SIZE -#define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP - #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .addr_limit = KERNEL_DS, \ } #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -/* - * User space process size. This is the first address outside the user range. - * There are a few constraints that determine this: - * - * On Intel CPUs, if a SYSCALL instruction is at the highest canonical - * address, then that syscall will enter the kernel with a - * non-canonical return address, and SYSRET will explode dangerously. - * We avoid this particular problem by preventing anything executable - * from being mapped at the maximum canonical address. - * - * On AMD CPUs in the Ryzen family, there's a nasty bug in which the - * CPUs malfunction if they execute code from the highest canonical page. - * They'll speculate right off the end of the canonical space, and - * bad things happen. This is worked around in the same way as the - * Intel problem. - * - * With page table isolation enabled, we map the LDT in ... [stay tuned] - */ -#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) - -#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) - -/* This decides where the kernel will search for a free chunk of vm - * space during mmap's. - */ -#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ - 0xc0000000 : 0xFFFFe000) - -#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \ - IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW) -#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE_MAX) -#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE_MAX) - -#define STACK_TOP TASK_SIZE_LOW -#define STACK_TOP_MAX TASK_SIZE_MAX - -#define INIT_THREAD { \ - .addr_limit = KERNEL_DS, \ -} +#define INIT_THREAD { } extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 7d7a064af6ff..389d851a02c4 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -119,7 +119,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used notrace \ + static void __section(".discard.text") __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 6bfc878f6771..6a9ccc1b2be5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -28,4 +28,14 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index cf13f9e78585..71d630bb5e08 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -3,10 +3,54 @@ #define __SVM_H #include <uapi/asm/svm.h> - +#include <uapi/asm/kvm.h> + +/* + * 32-bit intercept words in the VMCB Control Area, starting + * at Byte offset 000h. + */ + +enum intercept_words { + INTERCEPT_CR = 0, + INTERCEPT_DR, + INTERCEPT_EXCEPTION, + INTERCEPT_WORD3, + INTERCEPT_WORD4, + INTERCEPT_WORD5, + MAX_INTERCEPT, +}; enum { - INTERCEPT_INTR, + /* Byte offset 000h (word 0) */ + INTERCEPT_CR0_READ = 0, + INTERCEPT_CR3_READ = 3, + INTERCEPT_CR4_READ = 4, + INTERCEPT_CR8_READ = 8, + INTERCEPT_CR0_WRITE = 16, + INTERCEPT_CR3_WRITE = 16 + 3, + INTERCEPT_CR4_WRITE = 16 + 4, + INTERCEPT_CR8_WRITE = 16 + 8, + /* Byte offset 004h (word 1) */ + INTERCEPT_DR0_READ = 32, + INTERCEPT_DR1_READ, + INTERCEPT_DR2_READ, + INTERCEPT_DR3_READ, + INTERCEPT_DR4_READ, + INTERCEPT_DR5_READ, + INTERCEPT_DR6_READ, + INTERCEPT_DR7_READ, + INTERCEPT_DR0_WRITE = 48, + INTERCEPT_DR1_WRITE, + INTERCEPT_DR2_WRITE, + INTERCEPT_DR3_WRITE, + INTERCEPT_DR4_WRITE, + INTERCEPT_DR5_WRITE, + INTERCEPT_DR6_WRITE, + INTERCEPT_DR7_WRITE, + /* Byte offset 008h (word 2) */ + INTERCEPT_EXCEPTION_OFFSET = 64, + /* Byte offset 00Ch (word 3) */ + INTERCEPT_INTR = 96, INTERCEPT_NMI, INTERCEPT_SMI, INTERCEPT_INIT, @@ -38,7 +82,8 @@ enum { INTERCEPT_TASK_SWITCH, INTERCEPT_FERR_FREEZE, INTERCEPT_SHUTDOWN, - INTERCEPT_VMRUN, + /* Byte offset 010h (word 4) */ + INTERCEPT_VMRUN = 128, INTERCEPT_VMMCALL, INTERCEPT_VMLOAD, INTERCEPT_VMSAVE, @@ -53,15 +98,18 @@ enum { INTERCEPT_MWAIT_COND, INTERCEPT_XSETBV, INTERCEPT_RDPRU, + /* Byte offset 014h (word 5) */ + INTERCEPT_INVLPGB = 160, + INTERCEPT_INVLPGB_ILLEGAL, + INTERCEPT_INVPCID, + INTERCEPT_MCOMMIT, + INTERCEPT_TLBSYNC, }; struct __attribute__ ((__packed__)) vmcb_control_area { - u32 intercept_cr; - u32 intercept_dr; - u32 intercept_exceptions; - u64 intercept; - u8 reserved_1[40]; + u32 intercepts[MAX_INTERCEPT]; + u32 reserved_1[15 - MAX_INTERCEPT]; u16 pause_filter_thresh; u16 pause_filter_count; u64 iopm_base_pa; @@ -287,32 +335,6 @@ struct vmcb { #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK #define SVM_SELECTOR_CODE_MASK (1 << 3) -#define INTERCEPT_CR0_READ 0 -#define INTERCEPT_CR3_READ 3 -#define INTERCEPT_CR4_READ 4 -#define INTERCEPT_CR8_READ 8 -#define INTERCEPT_CR0_WRITE (16 + 0) -#define INTERCEPT_CR3_WRITE (16 + 3) -#define INTERCEPT_CR4_WRITE (16 + 4) -#define INTERCEPT_CR8_WRITE (16 + 8) - -#define INTERCEPT_DR0_READ 0 -#define INTERCEPT_DR1_READ 1 -#define INTERCEPT_DR2_READ 2 -#define INTERCEPT_DR3_READ 3 -#define INTERCEPT_DR4_READ 4 -#define INTERCEPT_DR5_READ 5 -#define INTERCEPT_DR6_READ 6 -#define INTERCEPT_DR7_READ 7 -#define INTERCEPT_DR0_WRITE (16 + 0) -#define INTERCEPT_DR1_WRITE (16 + 1) -#define INTERCEPT_DR2_WRITE (16 + 2) -#define INTERCEPT_DR3_WRITE (16 + 3) -#define INTERCEPT_DR4_WRITE (16 + 4) -#define INTERCEPT_DR5_WRITE (16 + 5) -#define INTERCEPT_DR6_WRITE (16 + 6) -#define INTERCEPT_DR7_WRITE (16 + 7) - #define SVM_EVTINJ_VEC_MASK 0xff #define SVM_EVTINJ_TYPE_SHIFT 8 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 267701ae3d86..44733a4bfc42 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -102,7 +102,6 @@ struct thread_info { #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ #define TIF_X32 30 /* 32-bit native x86-64 binary */ -#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -131,7 +130,6 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_X32 (1 << TIF_X32) -#define _TIF_FSCHECK (1 << TIF_FSCHECK) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 477c503f2753..c9fa7be3df82 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -13,30 +13,6 @@ #include <asm/extable.h> /* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. - */ - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - -#define KERNEL_DS MAKE_MM_SEG(-1UL) -#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) - -#define get_fs() (current->thread.addr_limit) -static inline void set_fs(mm_segment_t fs) -{ - current->thread.addr_limit = fs; - /* On user-mode return, check fs is correct */ - set_thread_flag(TIF_FSCHECK); -} - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -#define user_addr_max() (current->thread.addr_limit.seg) - -/* * Test whether a block of memory is a valid user space address. * Returns 0 if the range is valid, nonzero otherwise. */ @@ -93,7 +69,7 @@ static inline bool pagefault_disabled(void); #define access_ok(addr, size) \ ({ \ WARN_ON_IN_IRQ(); \ - likely(!__range_not_ok(addr, size, user_addr_max())); \ + likely(!__range_not_ok(addr, size, TASK_SIZE_MAX)); \ }) extern int __get_user_1(void); @@ -232,16 +208,24 @@ extern void __put_user_nocheck_2(void); extern void __put_user_nocheck_4(void); extern void __put_user_nocheck_8(void); +/* + * ptr must be evaluated and assigned to the temporary __ptr_pu before + * the assignment of x to __val_pu, to avoid any function calls + * involved in the ptr expression (possibly implicitly generated due + * to KASAN) from clobbering %ax. + */ #define do_put_user_call(fn,x,ptr) \ ({ \ int __ret_pu; \ + void __user *__ptr_pu; \ register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX); \ __chk_user_ptr(ptr); \ + __ptr_pu = (ptr); \ __val_pu = (x); \ asm volatile("call __" #fn "_%P[size]" \ : "=c" (__ret_pu), \ ASM_CALL_CONSTRAINT \ - : "0" (ptr), \ + : "0" (__ptr_pu), \ "r" (__val_pu), \ [size] "i" (sizeof(*(ptr))) \ :"ebx"); \ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 172d3e4a9e4b..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,14 +2,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H -#include <asm/tlbflush.h> - enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; -struct cpumask; -struct mm_struct; -struct flush_tlb_info; - #ifdef CONFIG_X86_UV #include <linux/efi.h> @@ -44,10 +38,6 @@ static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cd7de4b401fe..f8ba5289ecb0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -52,7 +52,7 @@ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES) #define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT) #define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING) -#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP) +#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP) #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC) #define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID) #define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..89e5f3d1bba8 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,26 @@ struct kvm_msr_list { __u32 indices[0]; }; +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ +}; + +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 812e9b4c1114..950afebfba88 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -32,6 +32,7 @@ #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 +#define KVM_FEATURE_MSI_EXT_DEST_ID 15 #define KVM_HINTS_REALTIME 0 diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index a7a3403645e5..f1d8307454e0 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -77,6 +77,7 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -182,6 +183,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ |