diff options
Diffstat (limited to 'arch/x86')
43 files changed, 349 insertions, 251 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 04b8810a5489..5e28e2be3a41 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -138,6 +138,7 @@ config X86 select HAVE_ACPI_APEI_NMI if ACPI select ACPI_LEGACY_TABLES_LOOKUP if ACPI select X86_FEATURE_NAMES if PROC_FS + select SRCU config INSTRUCTION_DECODER def_bool y @@ -861,7 +862,7 @@ config UP_LATE_INIT config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI + depends on X86_32 && !SMP && !X86_32_NON_STANDARD ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -872,6 +873,10 @@ config X86_UP_APIC performance counters), and the NMI watchdog which detects hard lockups. +config X86_UP_APIC_MSI + def_bool y + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI + config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index d999398928bc..ad754b4411f7 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -90,7 +90,7 @@ suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ - perl $(srctree)/arch/x86/tools/calc_run_size.pl) + $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dcc1c536cc21..a950864a64da 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -373,6 +373,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, unsigned long output_len, unsigned long run_size) { + unsigned char *output_orig = output; + real_mode = rmode; sanitize_boot_params(real_mode); @@ -421,7 +423,12 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); decompress(input_data, input_len, NULL, NULL, output, NULL, error); parse_elf(output); - handle_relocations(output, output_len); + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if kASLR has chosen a different load address. + */ + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) + handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); return output; } diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a225a5ca1037..fd9f6b035b16 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -931,4 +931,4 @@ module_exit(sha1_mb_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 0ab4f9fd2687..3a45668f6dc3 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -50,6 +50,7 @@ void acpi_pic_sci_set_trigger(unsigned int, u16); extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity); +extern void (*__acpi_unregister_gsi)(u32 gsi); static inline void disable_acpi(void) { diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aede2c347bde..90a54851aedc 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -174,6 +174,7 @@ #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ /* @@ -388,6 +389,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) +#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 61fd18b83b6c..12cb66f6d3a5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ +#ifdef CONFIG_CPU_SUP_AMD +extern void set_dr_addr_mask(unsigned long mask, int dr); +#else +static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +#endif #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 50d033a8947d..a94b82e8f156 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; } -#define _LDT_empty(info) \ +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ +#define LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ @@ -261,11 +262,18 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) -#ifdef CONFIG_X86_64 -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) -#else -#define LDT_empty(info) (_LDT_empty(info)) -#endif +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ +static inline bool LDT_zero(const struct user_desc *info) +{ + return (info->base_addr == 0 && + info->limit == 0 && + info->contents == 0 && + info->read_exec_only == 0 && + info->seg_32bit == 0 && + info->limit_in_pages == 0 && + info->seg_not_present == 0 && + info->useable == 0); +} static inline void clear_LDT(void) { diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index ef1c4d2d41ec..6c98be864a75 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,6 +12,7 @@ */ struct arch_hw_breakpoint { unsigned long address; + unsigned long mask; u8 len; u8 type; }; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 40269a2bf6f9..4b75d591eb5e 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -130,7 +130,25 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end) { - mpx_notify_unmap(mm, vma, start, end); + /* + * mpx_notify_unmap() goes and reads a rarely-hot + * cacheline in the mm_struct. That can be expensive + * enough to be seen in profiles. + * + * The mpx_notify_unmap() call and its contents have been + * observed to affect munmap() performance on hardware + * where MPX is not present. + * + * The unlikely() optimizes for the fast case: no MPX + * in the CPU, or no MPX use in the process. Even if + * we get this wrong (in the unlikely event that MPX + * is widely enabled on some system) the overhead of + * MPX itself (reading bounds tables) is expected to + * overwhelm the overhead of getting this unlikely() + * consistently wrong. + */ + if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) + mpx_notify_unmap(mm, vma, start, end); } #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c8aa65d56027..d979e5abae55 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -251,6 +251,10 @@ /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index add9b3a4ba13..a18fff361c7f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -611,20 +611,20 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - int irq; - - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - *irqp = gsi; - } else { - mutex_lock(&acpi_ioapic_lock); - irq = mp_map_gsi_to_irq(gsi, - IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); - mutex_unlock(&acpi_ioapic_lock); - if (irq < 0) - return -1; - *irqp = irq; + int rc, irq, trigger, polarity; + + rc = acpi_get_override_irq(gsi, &trigger, &polarity); + if (rc == 0) { + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + irq = acpi_register_gsi(NULL, gsi, trigger, polarity); + if (irq >= 0) { + *irqp = irq; + return 0; + } } - return 0; + + return -1; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15c5df92f74e..a220239cea65 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) return false; } + +void set_dr_addr_mask(unsigned long mask, int dr) +{ + if (!cpu_has_bpext) + return; + + switch (dr) { + case 0: + wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); + break; + case 1: + case 2: + case 3: + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); + break; + default: + break; + } +} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 15c29096136b..36a83617eb21 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -552,7 +552,7 @@ static int __init microcode_init(void) int error; if (paravirt_enabled() || dis_ucode_ldr) - return 0; + return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) microcode_ops = init_intel_microcode(); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a450373e8e91..939155ffdece 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -107,6 +107,7 @@ static struct clocksource hyperv_cs = { .rating = 400, /* use this when running on Hyperv*/ .read = read_hv_clock, .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static void __init ms_hyperv_init_platform(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 944bf019b74f..498b6d967138 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2431,6 +2431,7 @@ __init int intel_pmu_init(void) break; case 55: /* 22nm Atom "Silvermont" */ + case 76: /* 14nm Atom "Airmont" */ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 3c895d480cd7..073983398364 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -568,8 +568,8 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */ - INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf), + /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ + INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1), /* Allow all events as PEBS with no flags */ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), EVENT_CONSTRAINT_END diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 673f930c700f..c4bb8b8e5017 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -103,6 +103,13 @@ static struct kobj_attribute format_attr_##_var = \ #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ +#define RAPL_EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, rapl_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +}; + struct rapl_pmu { spinlock_t lock; int hw_unit; /* 1/2^hw_unit Joule */ @@ -135,7 +142,7 @@ static inline u64 rapl_scale(u64 v) * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); + return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); } static u64 rapl_event_update(struct perf_event *event) @@ -379,23 +386,36 @@ static struct attribute_group rapl_pmu_attr_group = { .attrs = rapl_pmu_attrs, }; -EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); -EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); -EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); -EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); +static ssize_t rapl_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr = \ + container_of(attr, struct perf_pmu_events_attr, attr); + + if (pmu_attr->event_str) + return sprintf(page, "%s", pmu_attr->event_str); + + return 0; +} + +RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); +RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); +RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); +RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); -EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); -EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); -EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); -EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); /* * we compute in 0.23 nJ increments regardless of MSR */ -EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); -EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); -EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); -EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); static struct attribute *rapl_events_srv_attr[] = { EVENT_PTR(rapl_cores), diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 10b8d3eaaf15..c635b8b49e93 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -840,7 +840,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id box->phys_id = phys_id; box->pci_dev = pdev; box->pmu = pmu; - uncore_box_init(box); pci_set_drvdata(pdev, box); raw_spin_lock(&uncore_box_lock); @@ -1004,10 +1003,8 @@ static int uncore_cpu_starting(int cpu) pmu = &type->pmus[j]; box = *per_cpu_ptr(pmu->box, cpu); /* called by uncore_cpu_init? */ - if (box && box->phys_id >= 0) { - uncore_box_init(box); + if (box && box->phys_id >= 0) continue; - } for_each_online_cpu(k) { exist = *per_cpu_ptr(pmu->box, k); @@ -1023,10 +1020,8 @@ static int uncore_cpu_starting(int cpu) } } - if (box) { + if (box) box->phys_id = phys_id; - uncore_box_init(box); - } } } return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 863d9b02563e..6c8c1e7e69d8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -257,6 +257,14 @@ static inline int uncore_num_counters(struct intel_uncore_box *box) return box->pmu->type->num_counters; } +static inline void uncore_box_init(struct intel_uncore_box *box) +{ + if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->init_box) + box->pmu->type->ops->init_box(box); + } +} + static inline void uncore_disable_box(struct intel_uncore_box *box) { if (box->pmu->type->ops->disable_box) @@ -265,6 +273,8 @@ static inline void uncore_disable_box(struct intel_uncore_box *box) static inline void uncore_enable_box(struct intel_uncore_box *box) { + uncore_box_init(box); + if (box->pmu->type->ops->enable_box) box->pmu->type->ops->enable_box(box); } @@ -287,14 +297,6 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box, return box->pmu->type->ops->read_counter(box, event); } -static inline void uncore_box_init(struct intel_uncore_box *box) -{ - if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { - if (box->pmu->type->ops->init_box) - box->pmu->type->ops->init_box(box); - } -} - static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { return (box->phys_id < 0); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 2142376dc8c6..8b7b0a51e742 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -674,7 +674,7 @@ static inline void *alloc_tramp(unsigned long size) } static inline void tramp_free(void *tramp) { - module_free(NULL, tramp); + module_memfree(tramp); } #else /* Trampolines can only be created if modules are supported */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 3d5fb509bdeb..7114ba220fd4 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); + if (info->mask) + set_dr_addr_mask(info->mask, i); return 0; } @@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); -} - -static int get_hbp_len(u8 hbp_len) -{ - unsigned int len_in_bytes = 0; - - switch (hbp_len) { - case X86_BREAKPOINT_LEN_1: - len_in_bytes = 1; - break; - case X86_BREAKPOINT_LEN_2: - len_in_bytes = 2; - break; - case X86_BREAKPOINT_LEN_4: - len_in_bytes = 4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - len_in_bytes = 8; - break; -#endif - } - return len_in_bytes; + if (info->mask) + set_dr_addr_mask(0, i); } /* @@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); va = info->address; - len = get_hbp_len(info->len); + len = bp->attr.bp_len; return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp) } /* Len */ + info->mask = 0; + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: info->len = X86_BREAKPOINT_LEN_1; @@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp) break; #endif default: - return -EINVAL; + if (!is_power_of_2(bp->attr.bp_len)) + return -EINVAL; + if (!cpu_has_bpext) + return -EOPNOTSUPP; + info->mask = bp->attr.bp_len - 1; + info->len = X86_BREAKPOINT_LEN_1; } return 0; } + /* * Validate the arch-specific HW Breakpoint register settings */ @@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (ret) return ret; - ret = -EINVAL; - switch (info->len) { case X86_BREAKPOINT_LEN_1: align = 0; + if (info->mask) + align = info->mask; break; case X86_BREAKPOINT_LEN_2: align = 1; @@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) break; #endif default: - return ret; + WARN_ON_ONCE(1); } /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6307a0f0cf17..705ef8d48e2d 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -127,7 +127,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Machine check polls\n"); #endif #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) - seq_printf(p, "%*s: ", prec, "THR"); + seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); seq_puts(p, " Hypervisor callback interrupts\n"); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f7e3cd50ece0..98f654d466e5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1020,6 +1020,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->flags &= ~X86_EFLAGS_IF; trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); + + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer to get messed up. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); return 1; } NOKPROBE_SYMBOL(setjmp_pre_handler); @@ -1048,24 +1057,25 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); struct jprobe *jp = container_of(p, struct jprobe, kp); + void *saved_sp = kcb->jprobe_saved_sp; if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != kcb->jprobe_saved_sp) { + if (stack_addr(regs) != saved_sp) { struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; printk(KERN_ERR "current sp %p does not match saved sp %p\n", - stack_addr(regs), kcb->jprobe_saved_sp); + stack_addr(regs), saved_sp); printk(KERN_ERR "Saved registers for jprobe %p\n", jp); show_regs(saved_regs); printk(KERN_ERR "Current registers\n"); show_regs(regs); BUG(); } + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), - kcb->jprobes_stack, - MIN_STACK_SIZE(kcb->jprobe_saved_sp)); + memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); preempt_enable_no_resched(); return 1; } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 4e942f31b1a7..7fc5e843f247 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -29,7 +29,28 @@ static int get_free_idx(void) static bool tls_desc_okay(const struct user_desc *info) { - if (LDT_empty(info)) + /* + * For historical reasons (i.e. no one ever documented how any + * of the segmentation APIs work), user programs can and do + * assume that a struct user_desc that's all zeros except for + * entry_number means "no segment at all". This never actually + * worked. In fact, up to Linux 3.19, a struct user_desc like + * this would create a 16-bit read-write segment with base and + * limit both equal to zero. + * + * That was close enough to "no segment at all" until we + * hardened this function to disallow 16-bit TLS segments. Fix + * it up by interpreting these zeroed segments the way that they + * were almost certainly intended to be interpreted. + * + * The correct way to ask for "no segment at all" is to specify + * a user_desc that satisfies LDT_empty. To keep everything + * working, we accept both. + * + * Note that there's a similar kludge in modify_ldt -- look at + * the distinction between modes 1 and 0x11. + */ + if (LDT_empty(info) || LDT_zero(info)) return true; /* @@ -71,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info)) + if (LDT_empty(info) || LDT_zero(info)) desc->a = desc->b = 0; else fill_ldt(desc, info); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b7e50bba3bbb..505449700e0c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -617,7 +617,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - pr_err("Fast TSC calibration failed\n"); + pr_info("Fast TSC calibration failed\n"); return 0; success: diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f9d16ff56c6b..7dc7ba577ecd 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,6 +40,7 @@ config KVM select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_VFIO + select SRCU ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 169b09d76ddd..de12c1d379f1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2348,7 +2348,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) * Not recognized on AMD in compat mode (but is recognized in legacy * mode). */ - if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA) + if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) && !vendor_intel(ctxt)) return emulate_ud(ctxt); @@ -2359,25 +2359,13 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) setup_syscalls_segments(ctxt, &cs, &ss); ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); - switch (ctxt->mode) { - case X86EMUL_MODE_PROT32: - if ((msr_data & 0xfffc) == 0x0) - return emulate_gp(ctxt, 0); - break; - case X86EMUL_MODE_PROT64: - if (msr_data == 0x0) - return emulate_gp(ctxt, 0); - break; - default: - break; - } + if ((msr_data & 0xfffc) == 0x0) + return emulate_gp(ctxt, 0); ctxt->eflags &= ~(EFLG_VM | EFLG_IF); - cs_sel = (u16)msr_data; - cs_sel &= ~SELECTOR_RPL_MASK; + cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; - ss_sel &= ~SELECTOR_RPL_MASK; - if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) { + if (efer & EFER_LMA) { cs.d = 0; cs.l = 1; } @@ -2386,10 +2374,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data); - ctxt->_eip = msr_data; + ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data; ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); - *reg_write(ctxt, VCPU_REGS_RSP) = msr_data; + *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : + (u32)msr_data; return X86EMUL_CONTINUE; } @@ -3791,8 +3780,8 @@ static const struct opcode group5[] = { }; static const struct opcode group6[] = { - DI(Prot, sldt), - DI(Prot, str), + DI(Prot | DstMem, sldt), + DI(Prot | DstMem, str), II(Prot | Priv | SrcMem16, em_lldt, lldt), II(Prot | Priv | SrcMem16, em_ltr, ltr), N, N, N, N, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4f0c0b954686..d52dcf0776ea 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -192,6 +192,9 @@ static void recalculate_apic_map(struct kvm *kvm) u16 cid, lid; u32 ldr, aid; + if (!kvm_apic_present(vcpu)) + continue; + aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); cid = apic_cluster_id(new, ldr); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 38dcec403b46..e3ff27a5b634 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -898,6 +898,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) do_sigbus(regs, error_code, address, fault); + else if (fault & VM_FAULT_SIGSEGV) + bad_area_nosemaphore(regs, error_code, address); else BUG(); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 08a7d313538a..079c3b6a3ff1 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -43,7 +43,7 @@ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, }; -EXPORT_SYMBOL_GPL(__cachemode2pte_tbl); +EXPORT_SYMBOL(__cachemode2pte_tbl); uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, @@ -54,7 +54,7 @@ uint8_t __pte2cachemode_tbl[8] = { [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; -EXPORT_SYMBOL_GPL(__pte2cachemode_tbl); +EXPORT_SYMBOL(__pte2cachemode_tbl); static unsigned long __initdata pgt_buf_start; static unsigned long __initdata pgt_buf_end; diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 67ebf5751222..c439ec478216 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -349,6 +349,12 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) return MPX_INVALID_BOUNDS_DIR; /* + * 32-bit binaries on 64-bit kernels are currently + * unsupported. + */ + if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) + return MPX_INVALID_BOUNDS_DIR; + /* * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index edf299c8ff6c..7ac68698406c 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -234,8 +234,13 @@ void pat_init(void) PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); /* Boot CPU check */ - if (!boot_pat_state) + if (!boot_pat_state) { rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); + if (!boot_pat_state) { + pat_disable("PAT read returns always zero, disabled."); + return; + } + } wrmsrl(MSR_IA32_CR_PAT, pat); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7b20bccf3648..2fb384724ebb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -448,6 +448,22 @@ static const struct dmi_system_id pciprobe_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"), }, }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R32"), + }, + }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R31"), + }, + }, {} }; diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 9b18ef315a55..349c0d32cc0b 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -216,7 +216,7 @@ static void pcibios_allocate_bridge_resources(struct pci_dev *dev) continue; if (r->parent) /* Already allocated */ continue; - if (!r->start || pci_claim_resource(dev, idx) < 0) { + if (!r->start || pci_claim_bridge_resource(dev, idx) < 0) { /* * Something is wrong with the region. * Invalidate the resource to prevent diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 44b9271580b5..852aa4c92da0 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -293,7 +293,6 @@ static void mrst_power_off_unused_dev(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); /* diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index c489ef2c1a39..9098d880c476 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -458,6 +458,7 @@ int __init pci_xen_hvm_init(void) * just how GSIs get registered. */ __acpi_register_gsi = acpi_register_gsi_xen_hvm; + __acpi_unregister_gsi = NULL; #endif #ifdef CONFIG_PCI_MSI @@ -471,52 +472,6 @@ int __init pci_xen_hvm_init(void) } #ifdef CONFIG_XEN_DOM0 -static __init void xen_setup_acpi_sci(void) -{ - int rc; - int trigger, polarity; - int gsi = acpi_sci_override_gsi; - int irq = -1; - int gsi_override = -1; - - if (!gsi) - return; - - rc = acpi_get_override_irq(gsi, &trigger, &polarity); - if (rc) { - printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" - " sci, rc=%d\n", rc); - return; - } - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; - - printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " - "polarity=%d\n", gsi, trigger, polarity); - - /* Before we bind the GSI to a Linux IRQ, check whether - * we need to override it with bus_irq (IRQ) value. Usually for - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) - * but there are oddballs where the IRQ != GSI: - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) - * which ends up being: gsi_to_irq[9] == 20 - * (which is what acpi_gsi_to_irq ends up calling when starting the - * the ACPI interpreter and keels over since IRQ 9 has not been - * setup as we had setup IRQ 20 for it). - */ - if (acpi_gsi_to_irq(gsi, &irq) == 0) { - /* Use the provided value if it's valid. */ - if (irq >= 0) - gsi_override = irq; - } - - gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); - printk(KERN_INFO "xen: acpi sci %d\n", gsi); - - return; -} - int __init pci_xen_initial_domain(void) { int irq; @@ -527,8 +482,8 @@ int __init pci_xen_initial_domain(void) x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; pci_msi_ignore_mask = 1; #endif - xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; + __acpi_unregister_gsi = NULL; /* Pre-allocate legacy irqs */ for (irq = 0; irq < nr_legacy_irqs(); irq++) { int trigger, polarity; diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl deleted file mode 100644 index 23210baade2d..000000000000 --- a/arch/x86/tools/calc_run_size.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/perl -# -# Calculate the amount of space needed to run the kernel, including room for -# the .bss and .brk sections. -# -# Usage: -# objdump -h a.out | perl calc_run_size.pl -use strict; - -my $mem_size = 0; -my $file_offset = 0; - -my $sections=" *[0-9]+ \.(?:bss|brk) +"; -while (<>) { - if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) { - my $size = hex($1); - my $offset = hex($2); - $mem_size += $size; - if ($file_offset == 0) { - $file_offset = $offset; - } elsif ($file_offset != $offset) { - # BFD linker shows the same file offset in ELF. - # Gold linker shows them as consecutive. - next if ($file_offset + $mem_size == $offset + $size); - - printf STDERR "file_offset: 0x%lx\n", $file_offset; - printf STDERR "mem_size: 0x%lx\n", $mem_size; - printf STDERR "offset: 0x%lx\n", $offset; - printf STDERR "size: 0x%lx\n", $size; - - die ".bss and .brk are non-contiguous\n"; - } - } -} - -if ($file_offset == 0) { - die "Never found .bss or .brk file offset\n"; -} -printf("%d\n", $mem_size + $file_offset); diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh new file mode 100644 index 000000000000..1a4c17bb3910 --- /dev/null +++ b/arch/x86/tools/calc_run_size.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Calculate the amount of space needed to run the kernel, including room for +# the .bss and .brk sections. +# +# Usage: +# objdump -h a.out | sh calc_run_size.sh + +NUM='\([0-9a-fA-F]*[ \t]*\)' +OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p') +if [ -z "$OUT" ] ; then + echo "Never found .bss or .brk file offset" >&2 + exit 1 +fi + +OUT=$(echo ${OUT# }) +sizeA=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +offsetA=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +sizeB=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +offsetB=$(printf "%d" 0x${OUT%% *}) + +run_size=$(( $offsetA + $sizeA + $sizeB )) + +# BFD linker shows the same file offset in ELF. +if [ "$offsetA" -ne "$offsetB" ] ; then + # Gold linker shows them as consecutive. + endB=$(( $offsetB + $sizeB )) + if [ "$endB" != "$run_size" ] ; then + printf "sizeA: 0x%x\n" $sizeA >&2 + printf "offsetA: 0x%x\n" $offsetA >&2 + printf "sizeB: 0x%x\n" $sizeB >&2 + printf "offsetB: 0x%x\n" $offsetB >&2 + echo ".bss and .brk are non-contiguous" >&2 + exit 1 + fi +fi + +printf "%d\n" $run_size +exit 0 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6bf3a13e3e0f..78a881b7fc41 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -40,6 +40,7 @@ #include <xen/interface/physdev.h> #include <xen/interface/vcpu.h> #include <xen/interface/memory.h> +#include <xen/interface/nmi.h> #include <xen/interface/xen-mca.h> #include <xen/features.h> #include <xen/page.h> @@ -66,6 +67,7 @@ #include <asm/reboot.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> +#include <asm/mach_traps.h> #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/pat.h> @@ -1351,6 +1353,21 @@ static const struct machine_ops xen_machine_ops __initconst = { .emergency_restart = xen_emergency_restart, }; +static unsigned char xen_get_nmi_reason(void) +{ + unsigned char reason = 0; + + /* Construct a value which looks like it came from port 0x61. */ + if (test_bit(_XEN_NMIREASON_io_error, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_IOCHK; + if (test_bit(_XEN_NMIREASON_pci_serr, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_SERR; + + return reason; +} + static void __init xen_boot_params_init_edd(void) { #if IS_ENABLED(CONFIG_EDD) @@ -1535,9 +1552,12 @@ asmlinkage __visible void __init xen_start_kernel(void) pv_info = xen_info; pv_init_ops = xen_init_ops; pv_apic_ops = xen_apic_ops; - if (!xen_pvh_domain()) + if (!xen_pvh_domain()) { pv_cpu_ops = xen_cpu_ops; + x86_platform.get_nmi_reason = xen_get_nmi_reason; + } + if (xen_feature(XENFEAT_auto_translated_physmap)) x86_init.resources.memory_setup = xen_auto_xlated_memory_setup; else diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index edbc7a63fd73..70fb5075c901 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -167,10 +167,13 @@ static void * __ref alloc_p2m_page(void) return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); } -/* Only to be called in case of a race for a page just allocated! */ -static void free_p2m_page(void *p) +static void __ref free_p2m_page(void *p) { - BUG_ON(!slab_is_available()); + if (unlikely(!slab_is_available())) { + free_bootmem((unsigned long)p, PAGE_SIZE); + return; + } + free_page((unsigned long)p); } @@ -375,7 +378,7 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) p2m_missing_pte : p2m_identity_pte; for (i = 0; i < PMDS_PER_MID_PAGE; i++) { pmdp = populate_extra_pmd( - (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); + (unsigned long)(p2m + pfn) + i * PMD_SIZE); set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); } } @@ -436,10 +439,9 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! */ -static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { pte_t *ptechk; - pte_t *pteret = ptep; pte_t *pte_newpg[PMDS_PER_MID_PAGE]; pmd_t *pmdp; unsigned int level; @@ -473,8 +475,6 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) if (ptechk == pte_pg) { set_pmd(pmdp, __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); - if (vaddr == (addr & ~(PMD_SIZE - 1))) - pteret = pte_offset_kernel(pmdp, addr); pte_newpg[i] = NULL; } @@ -488,7 +488,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) vaddr += PMD_SIZE; } - return pteret; + return lookup_address(addr, &level); } /* @@ -517,7 +517,7 @@ static bool alloc_p2m(unsigned long pfn) if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { /* PMD level is missing, allocate a new one */ - ptep = alloc_p2m_pmd(addr, ptep, pte_pg); + ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) return false; } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index dfd77dec8e2b..865e56cea7a0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -140,7 +140,7 @@ static void __init xen_del_extra_mem(u64 start, u64 size) unsigned long __ref xen_chk_extra_mem(unsigned long pfn) { int i; - unsigned long addr = PFN_PHYS(pfn); + phys_addr_t addr = PFN_PHYS(pfn); for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { if (addr >= xen_extra_mem[i].start && @@ -160,6 +160,8 @@ void __init xen_inv_extra_mem(void) int i; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + if (!xen_extra_mem[i].size) + continue; pfn_s = PFN_DOWN(xen_extra_mem[i].start); pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); for (pfn = pfn_s; pfn < pfn_e; pfn++) @@ -229,15 +231,14 @@ static int __init xen_free_mfn(unsigned long mfn) * as a fallback if the remapping fails. */ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, - unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, - unsigned long *released) + unsigned long end_pfn, unsigned long nr_pages, unsigned long *released) { - unsigned long len = 0; unsigned long pfn, end; int ret; WARN_ON(start_pfn > end_pfn); + /* Release pages first. */ end = min(end_pfn, nr_pages); for (pfn = start_pfn; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); @@ -250,16 +251,14 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); if (ret == 1) { + (*released)++; if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) break; - len++; } else break; } - /* Need to release pages first */ - *released += len; - *identity += set_phys_range_identity(start_pfn, end_pfn); + set_phys_range_identity(start_pfn, end_pfn); } /* @@ -287,7 +286,7 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) } /* Update kernel mapping, but not for highmem. */ - if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) + if (pfn >= PFN_UP(__pa(high_memory - 1))) return; if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), @@ -318,7 +317,6 @@ static void __init xen_do_set_identity_and_remap_chunk( unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_end_pfn = start_pfn + size; unsigned long left = size; - unsigned long ident_cnt = 0; unsigned int i, chunk; WARN_ON(size == 0); @@ -347,8 +345,7 @@ static void __init xen_do_set_identity_and_remap_chunk( xen_remap_mfn = mfn; /* Set identity map */ - ident_cnt += set_phys_range_identity(ident_pfn_iter, - ident_pfn_iter + chunk); + set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); left -= chunk; } @@ -371,7 +368,7 @@ static void __init xen_do_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk( const struct e820entry *list, size_t map_size, unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, - unsigned long *identity, unsigned long *released) + unsigned long *released, unsigned long *remapped) { unsigned long pfn; unsigned long i = 0; @@ -386,8 +383,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Do not remap pages beyond the current allocation */ if (cur_pfn >= nr_pages) { /* Identity map remaining pages */ - *identity += set_phys_range_identity(cur_pfn, - cur_pfn + size); + set_phys_range_identity(cur_pfn, cur_pfn + size); break; } if (cur_pfn + size > nr_pages) @@ -398,7 +394,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( if (!remap_range_size) { pr_warning("Unable to find available pfn range, not remapping identity pages\n"); xen_set_identity_and_release_chunk(cur_pfn, - cur_pfn + left, nr_pages, identity, released); + cur_pfn + left, nr_pages, released); break; } /* Adjust size to fit in current e820 RAM region */ @@ -410,7 +406,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Update variables to reflect new mappings. */ i += size; remap_pfn += size; - *identity += size; + *remapped += size; } /* @@ -427,13 +423,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk( static void __init xen_set_identity_and_remap( const struct e820entry *list, size_t map_size, unsigned long nr_pages, - unsigned long *released) + unsigned long *released, unsigned long *remapped) { phys_addr_t start = 0; - unsigned long identity = 0; unsigned long last_pfn = nr_pages; const struct e820entry *entry; unsigned long num_released = 0; + unsigned long num_remapped = 0; int i; /* @@ -460,14 +456,14 @@ static void __init xen_set_identity_and_remap( last_pfn = xen_set_identity_and_remap_chunk( list, map_size, start_pfn, end_pfn, nr_pages, last_pfn, - &identity, &num_released); + &num_released, &num_remapped); start = end; } } *released = num_released; + *remapped = num_remapped; - pr_info("Set %ld page(s) to 1-1 mapping\n", identity); pr_info("Released %ld page(s)\n", num_released); } @@ -586,6 +582,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long remapped_pages; int i; int op; @@ -635,9 +632,10 @@ char * __init xen_memory_setup(void) * underlying RAM. */ xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, - &xen_released_pages); + &xen_released_pages, &remapped_pages); extra_pages += xen_released_pages; + extra_pages += remapped_pages; /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index f473d268d387..69087341d9ae 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -391,7 +391,7 @@ static const struct clock_event_device *xen_clockevent = struct xen_clock_event_device { struct clock_event_device evt; - char *name; + char name[16]; }; static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; @@ -420,46 +420,38 @@ void xen_teardown_timer(int cpu) if (evt->irq >= 0) { unbind_from_irqhandler(evt->irq, NULL); evt->irq = -1; - kfree(per_cpu(xen_clock_events, cpu).name); - per_cpu(xen_clock_events, cpu).name = NULL; } } void xen_setup_timer(int cpu) { - char *name; - struct clock_event_device *evt; + struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu); + struct clock_event_device *evt = &xevt->evt; int irq; - evt = &per_cpu(xen_clock_events, cpu).evt; WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); if (evt->irq >= 0) xen_teardown_timer(cpu); printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); - name = kasprintf(GFP_KERNEL, "timer%d", cpu); - if (!name) - name = "<timer kasprintf failed>"; + snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu); irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, - name, NULL); + xevt->name, NULL); (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of(cpu); evt->irq = irq; - per_cpu(xen_clock_events, cpu).name = name; } void xen_setup_cpu_clockevents(void) { - BUG_ON(preemptible()); - clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); } |