diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-06-26 10:02:41 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-06-26 10:02:41 +0300 |
commit | f446474889c06883a3879faa0896e2359e812a6b (patch) | |
tree | 95634685d56dd532d1e9b73fbd07ca389296911b /arch/x86 | |
parent | 01bdee64f9cf8e15f998bf52789ed9d0ebdfa621 (diff) | |
parent | 6f0d349d922ba44e4348a17a78ea51b7135965b1 (diff) | |
download | linux-f446474889c06883a3879faa0896e2359e812a6b.tar.xz |
Merge branch 'linus' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Makefile | 6 | ||||
-rw-r--r-- | arch/x86/boot/compressed/eboot.c | 2 | ||||
-rw-r--r-- | arch/x86/entry/common.c | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/barrier.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 60 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cacheinfo.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/intel.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/quirks.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 67 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 9 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 20 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 7 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pvh.c | 1 | ||||
-rw-r--r-- | arch/x86/xen/smp_pv.c | 5 |
24 files changed, 234 insertions, 45 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f0a6ea22429d..a08e82856563 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -258,11 +258,6 @@ archscripts: scripts_basic archheaders: $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all -archprepare: -ifeq ($(CONFIG_KEXEC_FILE),y) - $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c -endif - ### # Kernel objects @@ -327,7 +322,6 @@ archclean: $(Q)rm -rf $(objtree)/arch/x86_64 $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=arch/x86/tools - $(Q)$(MAKE) $(clean)=arch/x86/purgatory define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index a8a8642d2b0b..e57665b4ba1c 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -118,7 +118,7 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) void *romimage; status = efi_call_proto(efi_pci_io_protocol, attributes, pci, - EfiPciIoAttributeOperationGet, 0, 0, + EfiPciIoAttributeOperationGet, 0ULL, &attributes); if (status != EFI_SUCCESS) return status; diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 92190879b228..3b2490b81918 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 042b5e892ed1..14de0432d288 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -38,7 +38,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, { unsigned long mask; - asm ("cmp %1,%2; sbb %0,%0;" + asm volatile ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) :"g"(size),"r" (index) :"cc"); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 425e6b8b9547..6aa8499e1f62 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -114,6 +114,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ZERO_LEN_INS 0x40000000 /* VMFUNC functions */ #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 @@ -351,11 +352,13 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ +#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index efaf2d4f9c3c..d492752f79e1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/reboot.h> +#include <linux/memory.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -392,6 +393,51 @@ extern int uv_hub_info_version(void) } EXPORT_SYMBOL(uv_hub_info_version); +/* Default UV memory block size is 2GB */ +static unsigned long mem_block_size = (2UL << 30); + +/* Kernel parameter to specify UV mem block size */ +static int parse_mem_block_size(char *ptr) +{ + unsigned long size = memparse(ptr, NULL); + + /* Size will be rounded down by set_block_size() below */ + mem_block_size = size; + return 0; +} +early_param("uv_memblksize", parse_mem_block_size); + +static __init int adj_blksize(u32 lgre) +{ + unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT; + unsigned long size; + + for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1) + if (IS_ALIGNED(base, size)) + break; + + if (size >= mem_block_size) + return 0; + + mem_block_size = size; + return 1; +} + +static __init void set_block_size(void) +{ + unsigned int order = ffs(mem_block_size); + + if (order) { + /* adjust for ffs return of 1..64 */ + set_memory_block_size_order(order - 1); + pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size); + } else { + /* bad or zero value, default to 1UL << 31 (2GB) */ + pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size); + set_memory_block_size_order(31); + } +} + /* Build GAM range lookup table: */ static __init void build_uv_gr_table(void) { @@ -1180,23 +1226,30 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) << UV_GAM_RANGE_SHFT); int order = 0; char suffix[] = " KMGTPE"; + int flag = ' '; while (size > 9999 && order < sizeof(suffix)) { size /= 1024; order++; } + /* adjust max block size to current range start */ + if (gre->type == 1 || gre->type == 2) + if (adj_blksize(lgre)) + flag = '*'; + if (!index) { pr_info("UV: GAM Range Table...\n"); - pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); + pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, - size, suffix[order], + flag, size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); + /* update to next range start */ lgre = gre->limit; if (sock_min > gre->sockid) sock_min = gre->sockid; @@ -1427,6 +1480,7 @@ static void __init uv_system_init_hub(void) build_socket_tables(); build_uv_gr_table(); + set_block_size(); uv_init_hub_info(&hub_info); uv_possible_blades = num_possible_nodes(); if (!_node_to_pnode) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cd0fda1fff6d..404df26b7de8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,6 +27,7 @@ #include <asm/pgtable.h> #include <asm/set_memory.h> #include <asm/intel-family.h> +#include <asm/hypervisor.h> static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); @@ -664,6 +665,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_PTI)) return sprintf(buf, "Mitigation: PTI\n"); + if (hypervisor_is_type(X86_HYPER_XEN_PV)) + return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + break; case X86_BUG_SPECTRE_V1: diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 38354c66df81..0c5fcbd998cf 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -671,7 +671,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) num_sharing_cache = ((eax >> 14) & 0xfff) + 1; if (num_sharing_cache) { - int bits = get_count_order(num_sharing_cache) - 1; + int bits = get_count_order(num_sharing_cache); per_cpu(cpu_llc_id, cpu) = c->apicid >> bits; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0df7151cfef4..eb4cb3efd20e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,3 +1,6 @@ +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 + #include <linux/bootmem.h> #include <linux/linkage.h> #include <linux/bitops.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5bbd06f38ff6..f34d89c01edc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -160,6 +160,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), USER ), + MCESEV( + PANIC, "Data load in unrecoverable area of kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e4cf6ff1c2e1..c102ad51025e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll); static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { - int i, ret = 0; char *tmp; + int i; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(msr_ops.status(i)); - if (m->status & MCI_STATUS_VAL) { - __set_bit(i, validp); - if (quirk_no_way_out) - quirk_no_way_out(i, m, regs); - } + if (!(m->status & MCI_STATUS_VAL)) + continue; + + __set_bit(i, validp); + if (quirk_no_way_out) + quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + mce_read_aux(m, i); *msg = tmp; - ret = 1; + return 1; } } - return ret; + return 0; } /* @@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) lmce = m.mcgstatus & MCG_STATUS_LMCES; /* + * Local machine check may already know that we have to panic. + * Broadcast machine check begins rendezvous in mce_start() * Go through all banks in exclusion of the other CPUs. This way we * don't report duplicated events on shared banks because the first one - * to see it will clear it. If this is a Local MCE, then no need to - * perform rendezvous. + * to see it will clear it. */ - if (!lmce) + if (lmce) { + if (no_way_out) + mce_panic("Fatal local machine check", &m, msg); + } else { order = mce_start(&no_way_out); + } for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); @@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) no_way_out = worst >= MCE_PANIC_SEVERITY; } else { /* - * Local MCE skipped calling mce_reign() - * If we found a fatal error, we need to panic here. + * If there was a fatal machine check we should have + * already called mce_panic earlier in this function. + * Since we re-read the banks, we might have found + * something new. Check again to see if we found a + * fatal error. We call "mce_severity()" again to + * make sure we have the right "msg". */ - if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) - mce_panic("Machine check from unknown source", - NULL, NULL); + if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { + mce_severity(&m, cfg->tolerant, &msg, true); + mce_panic("Local fatal machine check!", &m, msg); + } } /* diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 1c2cfa0644aa..97ccf4c3b45b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -190,8 +190,11 @@ static void save_microcode_patch(void *data, unsigned int size) p = memdup_patch(data, size); if (!p) pr_err("Error allocating buffer %p\n", data); - else + else { list_replace(&iter->plist, &p->plist); + kfree(iter->data); + kfree(iter); + } } } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index a21d6ace648e..8047379e575a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,7 +44,7 @@ static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL -unsigned int __pgtable_l5_enabled __initdata; +unsigned int __pgtable_l5_enabled __ro_after_init; unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); unsigned int ptrs_per_p4d __ro_after_init = 1; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 697a4ce04308..736348ead421 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) /* Skylake */ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) { - u32 capid0; + u32 capid0, capid5; pci_read_config_dword(pdev, 0x84, &capid0); + pci_read_config_dword(pdev, 0x98, &capid5); - if ((capid0 & 0xc0) == 0xc0) + /* + * CAPID0{7:6} indicate whether this is an advanced RAS SKU + * CAPID5{8:5} indicate that various NVDIMM usage modes are + * enabled, so memory machine check recovery is also enabled. + */ + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) static_branch_inc(&mcsafe_key); + } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 445ca11ff863..92a3b312a53c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ if (is_ia32_frame(ksig)) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a535dd64de63..e6db475164ed 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -835,16 +835,18 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) - return; cond_local_irq_enable(regs); if (!user_mode(regs)) { - if (!fixup_exception(regs, trapnr)) { - task->thread.error_code = error_code; - task->thread.trap_nr = trapnr; + if (fixup_exception(regs, trapnr)) + return; + + task->thread.error_code = error_code; + task->thread.trap_nr = trapnr; + + if (notify_die(DIE_TRAP, str, regs, error_code, + trapnr, SIGFPE) != NOTIFY_STOP) die(str, regs, error_code); - } return; } diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 58d8d800875d..deb576b23b7c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -293,7 +293,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64); /* has the side-effect of processing the entire instruction */ insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) + if (!insn_complete(insn)) return -ENOEXEC; if (is_prefix_bad(insn)) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 559a12b6184d..1689f433f3a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } +static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; +} + +static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.procbased_ctls_high & + CPU_BASED_MONITOR_TRAP_FLAG; +} + static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) { return vmcs12->cpu_based_vm_exec_control & bit; @@ -11620,6 +11631,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) !nested_cr3_valid(vcpu, vmcs12->host_cr3)) return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; + /* + * From the Intel SDM, volume 3: + * Fields relevant to VM-entry event injection must be set properly. + * These fields are the VM-entry interruption-information field, the + * VM-entry exception error code, and the VM-entry instruction length. + */ + if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { + u32 intr_info = vmcs12->vm_entry_intr_info_field; + u8 vector = intr_info & INTR_INFO_VECTOR_MASK; + u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; + bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; + bool should_have_error_code; + bool urg = nested_cpu_has2(vmcs12, + SECONDARY_EXEC_UNRESTRICTED_GUEST); + bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; + + /* VM-entry interruption-info field: interruption type */ + if (intr_type == INTR_TYPE_RESERVED || + (intr_type == INTR_TYPE_OTHER_EVENT && + !nested_cpu_supports_monitor_trap_flag(vcpu))) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: vector */ + if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || + (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || + (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: deliver error code */ + should_have_error_code = + intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && + x86_exception_has_error_code(vector); + if (has_error_code != should_have_error_code) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry exception error code */ + if (has_error_code && + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: reserved bits */ + if (intr_info & INTR_INFO_RESVD_BITS_MASK) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry instruction length */ + switch (intr_type) { + case INTR_TYPE_SOFT_EXCEPTION: + case INTR_TYPE_SOFT_INTR: + case INTR_TYPE_PRIV_SW_EXCEPTION: + if ((vmcs12->vm_entry_instruction_len > 15) || + (vmcs12->vm_entry_instruction_len == 0 && + !nested_cpu_has_zero_length_injection(vcpu))) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + } + } + return 0; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 331993c49dae..257f27620bc2 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -110,6 +110,15 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu) #endif } +static inline bool x86_exception_has_error_code(unsigned int vector) +{ + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | + BIT(PF_VECTOR) | BIT(AC_VECTOR); + + return (1U << vector) & exception_has_error_code; +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 045f492d5f68..a688617c727e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr) /* Amount of ram needed to start using large blocks */ #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) +/* Adjustable memory block size */ +static unsigned long set_memory_block_size; +int __init set_memory_block_size_order(unsigned int order) +{ + unsigned long size = 1UL << order; + + if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE) + return -EINVAL; + + set_memory_block_size = size; + return 0; +} + static unsigned long probe_memory_block_size(void) { unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; unsigned long bz; - /* If this is UV system, always set 2G block size */ - if (is_uv_system()) { - bz = MAX_BLOCK_SIZE; + /* If memory block size has been set, then use it */ + bz = set_memory_block_size; + if (bz) goto done; - } /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c9081c6671f0..3b5318505c69 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -65,6 +65,13 @@ __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); /* + * NB: needs to live in .data because it's used by xen_prepare_pvh which runs + * before clearing the bss. + */ +uint32_t xen_start_flags __attribute__((section(".data"))) = 0; +EXPORT_SYMBOL(xen_start_flags); + +/* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 357969a3697c..8d4e2e1ae60b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1203,6 +1203,7 @@ asmlinkage __visible void __init xen_start_kernel(void) return; xen_domain_type = XEN_PV_DOMAIN; + xen_start_flags = xen_start_info->flags; xen_setup_features(); diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index aa1c6a6831a9..c85d1a88f476 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void) } xen_pvh = 1; + xen_start_flags = pvh_start_info.flags; msr = cpuid_ebx(xen_cpuid_base() + 2); pfn = __pa(hypercall_page); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 2e20ae2fa2d6..e3b18ad49889 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -32,6 +32,7 @@ #include <xen/interface/vcpu.h> #include <xen/interface/xenpmu.h> +#include <asm/spec-ctrl.h> #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> @@ -70,6 +71,8 @@ static void cpu_bringup(void) cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); + speculative_store_bypass_ht_init(); + xen_setup_cpu_clockevents(); notify_cpu_starting(cpu); @@ -250,6 +253,8 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); + speculative_store_bypass_ht_init(); + xen_pmu_init(0); if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0)) |