diff options
Diffstat (limited to 'arch/x86')
43 files changed, 476 insertions, 334 deletions
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index cc3bd583dce1..9e240fcba784 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -14,6 +14,7 @@ #include <linux/types.h> #include "ctype.h" +#include "string.h" int memcmp(const void *s1, const void *s2, size_t len) { diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 725e820602b1..113588ddb43f 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len); #define memset(d,c,l) __builtin_memset(d,c,l) #define memcmp __builtin_memcmp +extern int strcmp(const char *str1, const char *str2); +extern int strncmp(const char *cs, const char *ct, size_t count); +extern size_t strlen(const char *s); +extern char *strstr(const char *s1, const char *s2); +extern size_t strnlen(const char *s, size_t maxlen); +extern unsigned int atou(const char *s); +extern unsigned long long simple_strtoull(const char *cp, char **endp, + unsigned int base); + #endif /* BOOT_STRING_H */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 31c34ee131f3..6ef688a1ef3e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1020,7 +1020,8 @@ struct { const char *basename; struct simd_skcipher_alg *simd; } aesni_simd_skciphers2[] = { -#if IS_ENABLED(CONFIG_CRYPTO_PCBC) +#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \ + IS_BUILTIN(CONFIG_CRYPTO_PCBC) { .algname = "pcbc(aes)", .drvname = "pcbc-aes-aesni", diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 701d29f8e4d3..57f7ec35216e 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -255,23 +255,6 @@ ENTRY(__switch_to_asm) END(__switch_to_asm) /* - * The unwinder expects the last frame on the stack to always be at the same - * offset from the end of the page, which allows it to validate the stack. - * Calling schedule_tail() directly would break that convention because its an - * asmlinkage function so its argument has to be pushed on the stack. This - * wrapper creates a proper "end of stack" frame header before the call. - */ -ENTRY(schedule_tail_wrapper) - FRAME_BEGIN - - pushl %eax - call schedule_tail - popl %eax - - FRAME_END - ret -ENDPROC(schedule_tail_wrapper) -/* * A newly forked process directly context switches into this address. * * eax: prev task we switched from @@ -279,15 +262,24 @@ ENDPROC(schedule_tail_wrapper) * edi: kernel thread arg */ ENTRY(ret_from_fork) - call schedule_tail_wrapper + FRAME_BEGIN /* help unwinder find end of stack */ + + /* + * schedule_tail() is asmlinkage so we have to put its 'prev' argument + * on the stack. + */ + pushl %eax + call schedule_tail + popl %eax testl %ebx, %ebx jnz 1f /* kernel threads are uncommon */ 2: /* When we fork, we trace the syscall return in the child, too. */ - movl %esp, %eax + leal FRAME_OFFSET(%esp), %eax call syscall_return_slowpath + FRAME_END jmp restore_all /* kernel thread */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5b219707c2f2..044d18ebc43c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,6 +36,7 @@ #include <asm/smap.h> #include <asm/pgtable_types.h> #include <asm/export.h> +#include <asm/frame.h> #include <linux/err.h> .code64 @@ -408,17 +409,19 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) + FRAME_BEGIN /* help unwinder find end of stack */ movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ + call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ 2: - movq %rsp, %rdi + leaq FRAME_OFFSET(%rsp),%rdi /* pt_regs pointer */ call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS + FRAME_END jmp restore_regs_and_iret 1: diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 05612a2529c8..496e60391fac 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1010,7 +1010,7 @@ static __init int amd_ibs_init(void) * all online cpus. */ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - "perf/x86/amd/ibs:STARTING", + "perf/x86/amd/ibs:starting", x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 019c5887b698..1635c0c8df23 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; + + /* There's no sense in having PEBS for non sampling events: */ + if (!is_sampling_event(event)) + return -EINVAL; } /* * check that PEBS LBR correction does not conflict with diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 86138267b68a..eb1484c86bb4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3176,13 +3176,16 @@ static void intel_pmu_cpu_starting(int cpu) if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { for_each_cpu(i, topology_sibling_cpumask(cpu)) { + struct cpu_hw_events *sibling; struct intel_excl_cntrs *c; - c = per_cpu(cpu_hw_events, i).excl_cntrs; + sibling = &per_cpu(cpu_hw_events, i); + c = sibling->excl_cntrs; if (c && c->core_id == core_id) { cpuc->kfree_on_online[1] = cpuc->excl_cntrs; cpuc->excl_cntrs = c; - cpuc->excl_thread_id = 1; + if (!sibling->excl_thread_id) + cpuc->excl_thread_id = 1; break; } } @@ -3987,7 +3990,7 @@ __init int intel_pmu_init(void) x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; } - x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1; if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index fec8a461bdef..1076c9a77292 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static struct pmu cstate_pkg_pmu = { @@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, }; static const struct cstate_model nhm_cstates __initconst = { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index be202390bbd3..9dfeeeca0ea8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; /* log dropped samples number */ - if (error[bit]) + if (error[bit]) { perf_log_lost_samples(event, error[bit]); + if (perf_event_account_interrupt(event)) + x86_pmu_stop(event, 0); + } + if (counts[bit]) { __intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index bd34124449b0..22ef4f72cf32 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -161,7 +161,13 @@ static u64 rapl_timer_ms; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - return rapl_pmus->pmus[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; } static inline u64 rapl_read_counter(struct perf_event *event) @@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event) /* must be done before validate_group */ pmu = cpu_to_rapl_pmu(event->cpu); + if (!pmu) + return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; event->hw.event_base = msr; @@ -585,6 +593,20 @@ static int rapl_cpu_online(unsigned int cpu) struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; + if (!pmu) { + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -ENOMEM; + + raw_spin_lock_init(&pmu->lock); + INIT_LIST_HEAD(&pmu->active_list); + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(pmu); + + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; + } + /* * Check if there is an online cpu in the package which collects rapl * events already. @@ -598,27 +620,6 @@ static int rapl_cpu_online(unsigned int cpu) return 0; } -static int rapl_cpu_prepare(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - - if (pmu) - return 0; - - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - pmu->cpu = -1; - rapl_hrtimer_init(pmu); - rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; - return 0; -} - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -697,6 +698,7 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.module = THIS_MODULE; return 0; } @@ -802,29 +804,21 @@ static int __init rapl_pmu_init(void) /* * Install callbacks. Core will call them for each online cpu. */ - - ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare", - rapl_cpu_prepare, NULL); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, "perf/x86/rapl:online", rapl_cpu_online, rapl_cpu_offline); if (ret) - goto out1; + goto out; ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out2; + goto out1; rapl_advertise(); return 0; -out2: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out1: - cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); @@ -835,7 +829,6 @@ module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 97c246f84dea..1ab45976474d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -100,7 +100,13 @@ ssize_t uncore_event_show(struct kobject *kobj, struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - return pmu->boxes[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < max_packages ? pmu->boxes[pkgid] : NULL; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -733,6 +739,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu) .start = uncore_pmu_event_start, .stop = uncore_pmu_event_stop, .read = uncore_pmu_event_read, + .module = THIS_MODULE, }; } else { pmu->pmu = *pmu->type->pmu; @@ -763,30 +770,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) -{ - struct intel_uncore_pmu *pmu = type->pmus; - struct intel_uncore_box *box; - int i, pkg; - - if (pmu) { - pkg = topology_physical_package_id(cpu); - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box) - uncore_box_exit(box); - } - } -} - -static void uncore_exit_boxes(void *dummy) -{ - struct intel_uncore_type **types; - - for (types = uncore_msr_uncores; *types; types++) - __uncore_exit_boxes(*types++, smp_processor_id()); -} - static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { int pkg; @@ -1057,86 +1040,6 @@ static void uncore_pci_exit(void) } } -static int uncore_cpu_dying(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box && atomic_dec_return(&box->refcnt) == 0) - uncore_box_exit(box); - } - } - return 0; -} - -static int first_init; - -static int uncore_cpu_starting(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg, ncpus = 1; - - if (first_init) { - /* - * On init we get the number of online cpus in the package - * and set refcount for all of them. - */ - ncpus = cpumask_weight(topology_core_cpumask(cpu)); - } - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (!box) - continue; - /* The first cpu on a package activates the box */ - if (atomic_add_return(ncpus, &box->refcnt) == ncpus) - uncore_box_init(box); - } - } - - return 0; -} - -static int uncore_cpu_prepare(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - if (pmu->boxes[pkg]) - continue; - /* First cpu of a package allocates the box */ - box = uncore_alloc_box(type, cpu_to_node(cpu)); - if (!box) - return -ENOMEM; - box->pmu = pmu; - box->pkgid = pkg; - pmu->boxes[pkg] = box; - } - } - return 0; -} - static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, int new_cpu) { @@ -1176,12 +1079,14 @@ static void uncore_change_context(struct intel_uncore_type **uncores, static int uncore_event_cpu_offline(unsigned int cpu) { - int target; + struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, pkg, target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) - return 0; - + goto unref; /* Find a new cpu to collect uncore events */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); @@ -1193,12 +1098,82 @@ static int uncore_event_cpu_offline(unsigned int cpu) uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); + +unref: + /* Clear the references */ + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } return 0; } +static int allocate_boxes(struct intel_uncore_type **types, + unsigned int pkg, unsigned int cpu) +{ + struct intel_uncore_box *box, *tmp; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + LIST_HEAD(allocated); + int i; + + /* Try to allocate all required boxes */ + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[pkg]) + continue; + box = uncore_alloc_box(type, cpu_to_node(cpu)); + if (!box) + goto cleanup; + box->pmu = pmu; + box->pkgid = pkg; + list_add(&box->active_list, &allocated); + } + } + /* Install them in the pmus */ + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + box->pmu->boxes[pkg] = box; + } + return 0; + +cleanup: + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + kfree(box); + } + return -ENOMEM; +} + static int uncore_event_cpu_online(unsigned int cpu) { - int target; + struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, ret, pkg, target; + + pkg = topology_logical_package_id(cpu); + ret = allocate_boxes(types, pkg, cpu); + if (ret) + return ret; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box && atomic_inc_return(&box->refcnt) == 1) + uncore_box_init(box); + } + } /* * Check if there is an online cpu in the package @@ -1388,38 +1363,16 @@ static int __init intel_uncore_init(void) if (cret && pret) return -ENODEV; - /* - * Install callbacks. Core will call them for each online cpu. - * - * The first online cpu of each package allocates and takes - * the refcounts for all other online cpus in that package. - * If msrs are not enabled no allocation is required and - * uncore_cpu_prepare() is not called for each online cpu. - */ - if (!cret) { - ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, - "perf/x86/intel/uncore:prepare", - uncore_cpu_prepare, NULL); - if (ret) - goto err; - } else { - cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, - "perf/x86/intel/uncore:prepare", - uncore_cpu_prepare, NULL); - } - first_init = 1; - cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, - "perf/x86/uncore:starting", - uncore_cpu_starting, uncore_cpu_dying); - first_init = 0; - cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, - "perf/x86/uncore:online", - uncore_event_cpu_online, uncore_event_cpu_offline); + /* Install hotplug callbacks to setup the targets for each package */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "perf/x86/intel/uncore:online", + uncore_event_cpu_online, + uncore_event_cpu_offline); + if (ret) + goto err; return 0; err: - /* Undo box->init_box() */ - on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); return ret; @@ -1428,9 +1381,7 @@ module_init(intel_uncore_init); static void __exit intel_uncore_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index e6832be714bc..dae2fedc1601 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { void hswep_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(0); + int pkg = boot_cpu_data.logical_proc_id; if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 34a46dc076d3..8167fdb67ae8 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -57,7 +57,7 @@ #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 38711df3bcb5..2266f864b747 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -140,6 +140,7 @@ extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); +extern bool initrd_gone; #else static inline int __init microcode_init(void) { return 0; }; static inline void __init load_ucode_bsp(void) { } diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 195becc6f780..e793fc9a9b20 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -52,6 +52,21 @@ struct extended_sigtable { #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) +static inline u32 intel_get_microcode_revision(void) +{ + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; +} + #ifdef CONFIG_MICROCODE_INTEL extern void __init load_ucode_intel_bsp(void); extern void load_ucode_intel_ap(void); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index eaf100508c36..1be64da0384e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -219,6 +219,24 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, : "memory"); } +#define native_cpuid_reg(reg) \ +static inline unsigned int native_cpuid_##reg(unsigned int op) \ +{ \ + unsigned int eax = op, ebx, ecx = 0, edx; \ + \ + native_cpuid(&eax, &ebx, &ecx, &edx); \ + \ + return reg; \ +} + +/* + * Native CPUID functions returning a single datum. + */ +native_cpuid_reg(eax) +native_cpuid_reg(ebx) +native_cpuid_reg(ecx) +native_cpuid_reg(edx) + static inline void load_cr3(pgd_t *pgdir) { write_cr3(__pa(pgdir)); diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index a3269c897ec5..2e41c50ddf47 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -58,7 +58,7 @@ get_frame_pointer(struct task_struct *task, struct pt_regs *regs) if (task == current) return __builtin_frame_address(0); - return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp; + return &((struct inactive_task_frame *)task->thread.sp)->bp; } #else static inline unsigned long * diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 5cb436acd463..fcc5cd387fd1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -36,7 +36,10 @@ static inline void prepare_switch_to(struct task_struct *prev, asmlinkage void ret_from_fork(void); -/* data that is pointed to by thread.sp */ +/* + * This is the structure pointed to by thread.sp for an inactive task. The + * order of the fields must match the code in __switch_to_asm(). + */ struct inactive_task_frame { #ifdef CONFIG_X86_64 unsigned long r15; @@ -48,6 +51,11 @@ struct inactive_task_frame { unsigned long di; #endif unsigned long bx; + + /* + * These two fields must be together. They form a stack frame header, + * needed by get_frame_pointer(). + */ unsigned long bp; unsigned long ret_addr; }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 945e512a112a..52f352b063fd 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1875,6 +1875,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1886,6 +1887,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2115,6 +2117,7 @@ static inline void __init check_timer(void) if (idx != -1 && irq_trigger(idx)) unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) @@ -2136,6 +2139,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 71cae73a5076..1d3167269a67 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,15 +309,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 eax, ebx, ecx, edx; - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 7; - - /* get compute unit information */ - smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->x86_max_cores /= smp_num_siblings; - c->cpu_core_id = ebx & 0xff; + node_id = cpuid_ecx(0x8000001e) & 7; /* * We may have multiple LLCs if L3 caches exist, so check if we diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dc1697ca5191..9bab7a8a4293 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1221,7 +1221,7 @@ static __init int setup_disablecpuid(char *arg) { int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) + if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) setup_clear_cpu_cap(bit); else return 0; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb03..203f860d2ab3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -14,6 +14,7 @@ #include <asm/bugs.h> #include <asm/cpu.h> #include <asm/intel-family.h> +#include <asm/microcode_intel.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> @@ -78,14 +79,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { - unsigned lower_word; - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* Required by the SDM */ - sync_core(); - rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); - } + if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) + c->microcode = intel_get_microcode_revision(); /* * Atom erratum AAE44/AAF40/AAG38/AAH41: diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 00ef43233e03..537c6647d84c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void __restart_timer(struct timer_list *t, unsigned long interval) +static void __start_timer(struct timer_list *t, unsigned long interval) { unsigned long when = jiffies + interval; unsigned long flags; local_irq_save(flags); - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer(t, when); - } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); - } + if (!timer_pending(t) || time_before(when, t->expires)) + mod_timer(t, round_jiffies(when)); local_irq_restore(flags); } @@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data) done: __this_cpu_write(mce_next_interval, iv); - __restart_timer(t, iv); + __start_timer(t, iv); } /* @@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval) struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); - __restart_timer(t, interval); + __start_timer(t, interval); if (interval < iv) __this_cpu_write(mce_next_interval, interval); @@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; if (mca_cfg.ignore_ce || !iv) return; - per_cpu(mce_next_interval, cpu) = iv; - - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, cpu); + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); } static void __mcheck_cpu_setup_timer(void) @@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void) unsigned int cpu = smp_processor_id(); setup_pinned_timer(t, mce_timer_fn, cpu); - mce_start_timer(cpu, t); + mce_start_timer(t); } /* Handle unconfigured int18 (should never happen) */ @@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { - struct timer_list *t = &per_cpu(mce_timer, cpu); + struct timer_list *t = this_cpu_ptr(&mce_timer); int ret; mce_device_create(cpu); @@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu) return ret; } mce_reenable_cpu(); - mce_start_timer(cpu, t); + mce_start_timer(t); return 0; } static int mce_cpu_pre_down(unsigned int cpu) { - struct timer_list *t = &per_cpu(mce_timer, cpu); + struct timer_list *t = this_cpu_ptr(&mce_timer); mce_disable_cpu(); del_timer_sync(t); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 6a31e2691f3a..079e81733a58 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -384,8 +384,9 @@ void load_ucode_amd_ap(unsigned int family) reget: if (!get_builtin_microcode(&cp, family)) { #ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); + if (!initrd_gone) + cp = find_cpio_data(ucode_path, (void *)initrd_start, + initrd_end - initrd_start, NULL); #endif if (!(cp.data && cp.size)) { /* diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 2af69d27da62..73102d932760 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -46,6 +46,8 @@ static struct microcode_ops *microcode_ops; static bool dis_ucode_ldr = true; +bool initrd_gone; + LIST_HEAD(microcode_cache); /* @@ -190,21 +192,24 @@ void load_ucode_ap(void) static int __init save_microcode_in_initrd(void) { struct cpuinfo_x86 *c = &boot_cpu_data; + int ret = -EINVAL; switch (c->x86_vendor) { case X86_VENDOR_INTEL: if (c->x86 >= 6) - return save_microcode_in_initrd_intel(); + ret = save_microcode_in_initrd_intel(); break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - return save_microcode_in_initrd_amd(c->x86); + ret = save_microcode_in_initrd_amd(c->x86); break; default: break; } - return -EINVAL; + initrd_gone = true; + + return ret; } struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) @@ -247,9 +252,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) * has the virtual address of the beginning of the initrd. It also * possibly relocates the ramdisk. In either case, initrd_start contains * the updated address so use that instead. + * + * initrd_gone is for the hotplug case where we've thrown out initrd + * already. */ - if (!use_pa && initrd_start) - start = initrd_start; + if (!use_pa) { + if (initrd_gone) + return (struct cpio_data){ NULL, 0, "" }; + if (initrd_start) + start = initrd_start; + } return find_cpio_data(path, (void *)start, size, NULL); #else /* !CONFIG_BLK_DEV_INITRD */ diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index b624b54912e1..8325d8a09ab0 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -41,7 +41,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; -/* Current microcode patch used in early patching */ +/* Current microcode patch used in early patching on the APs. */ struct microcode_intel *intel_ucode_patch; static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, @@ -150,7 +150,7 @@ static struct ucode_patch *__alloc_microcode_buf(void *data, unsigned int size) { struct ucode_patch *p; - p = kzalloc(size, GFP_KERNEL); + p = kzalloc(sizeof(struct ucode_patch), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -368,26 +368,6 @@ next: return patch; } -static void cpuid_1(void) -{ - /* - * According to the Intel SDM, Volume 3, 9.11.7: - * - * CPUID returns a value in a model specific register in - * addition to its usual register return values. The - * semantics of CPUID cause it to deposit an update ID value - * in the 64-bit model-specific register at address 08BH - * (IA32_BIOS_SIGN_ID). If no update is present in the - * processor, the value in the MSR remains unmodified. - * - * Use native_cpuid -- this code runs very early and we don't - * want to mess with paravirt. - */ - unsigned int eax = 1, ebx, ecx = 0, edx; - - native_cpuid(&eax, &ebx, &ecx, &edx); -} - static int collect_cpu_info_early(struct ucode_cpu_info *uci) { unsigned int val[2]; @@ -410,15 +390,8 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci) native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); csig.pf = 1 << ((val[1] >> 18) & 7); } - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - cpuid_1(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - csig.rev = val[1]; + csig.rev = intel_get_microcode_revision(); uci->cpu_sig = csig; uci->valid = 1; @@ -602,7 +575,7 @@ static inline void print_ucode(struct ucode_cpu_info *uci) static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) { struct microcode_intel *mc; - unsigned int val[2]; + u32 rev; mc = uci->mc; if (!mc) @@ -610,21 +583,16 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - cpuid_1(); - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc->hdr.rev) + rev = intel_get_microcode_revision(); + if (rev != mc->hdr.rev) return -1; #ifdef CONFIG_X86_64 /* Flush global tlb. This is precaution. */ flush_tlb_early(); #endif - uci->cpu_sig.rev = val[1]; + uci->cpu_sig.rev = rev; if (early) print_ucode(uci); @@ -639,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void) struct ucode_cpu_info uci; struct cpio_data cp; - /* - * AP loading didn't find any microcode patch, no need to save anything. - */ - if (!intel_ucode_patch || IS_ERR(intel_ucode_patch)) - return 0; - if (!load_builtin_intel_microcode(&cp)) cp = find_microcode_in_initrd(ucode_path, false); @@ -660,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void) return 0; } - /* * @res_patch, output: a pointer to the patch we found. */ @@ -804,8 +765,8 @@ static int apply_microcode_intel(int cpu) struct microcode_intel *mc; struct ucode_cpu_info *uci; struct cpuinfo_x86 *c; - unsigned int val[2]; static int prev_rev; + u32 rev; /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) @@ -822,33 +783,28 @@ static int apply_microcode_intel(int cpu) /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); - wrmsrl(MSR_IA32_UCODE_REV, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - cpuid_1(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + rev = intel_get_microcode_revision(); - if (val[1] != mc->hdr.rev) { + if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); return -1; } - if (val[1] != prev_rev) { + if (rev != prev_rev) { pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", - val[1], + rev, mc->hdr.date & 0xffff, mc->hdr.date >> 24, (mc->hdr.date >> 16) & 0xff); - prev_rev = val[1]; + prev_rev = rev; } c = &cpu_data(cpu); - uci->cpu_sig.rev = val[1]; - c->microcode = val[1]; + uci->cpu_sig.rev = rev; + c->microcode = rev; return 0; } @@ -860,7 +816,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; int new_rev = uci->cpu_sig.rev; unsigned int leftover = size; - unsigned int curr_mc_size = 0; + unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; while (leftover) { @@ -901,6 +857,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(new_mc); new_rev = mc_header.rev; new_mc = mc; + new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ } @@ -926,7 +883,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, curr_mc_size); + save_mc_for_early(new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e4e97a5355ce..de7234401275 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,6 +9,7 @@ #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> +#include <asm/fpu/xstate.h> #include <asm/traps.h> #include <linux/hardirq.h> @@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state) * it will #GP. Make sure it is replaced after the memset(). */ if (static_cpu_has(X86_FEATURE_XSAVES)) - state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | + xfeatures_mask; if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 85e87b46c318..dc6ba5bda9fc 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index be3a49ee0356..e41af597aed8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void) crystal_khz = 24000; /* 24.0 MHz */ break; case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; case INTEL_FAM6_ATOM_GOLDMONT: diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 4443e499f279..23d15565d02a 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -6,6 +6,21 @@ #define FRAME_HEADER_SIZE (sizeof(long) * 2) +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + if (task == current) \ + val = READ_ONCE(x); \ + else \ + val = READ_ONCE_NOCHECK(x); \ + val; \ +}) + static void unwind_dump(struct unwind_state *state, unsigned long *sp) { static bool dumped_before = false; @@ -48,7 +63,8 @@ unsigned long unwind_get_return_address(struct unwind_state *state) if (state->regs && user_mode(state->regs)) return 0; - addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p, + addr = READ_ONCE_TASK_STACK(state->task, *addr_p); + addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); return __kernel_text_address(addr) ? addr : 0; @@ -162,7 +178,7 @@ bool unwind_next_frame(struct unwind_state *state) if (state->regs) next_bp = (unsigned long *)state->regs->bp; else - next_bp = (unsigned long *)*state->bp; + next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp); /* is the next frame pointer an encoded pointer to pt_regs? */ regs = decode_frame_pointer(next_bp); @@ -207,6 +223,16 @@ bool unwind_next_frame(struct unwind_state *state) return true; bad_address: + /* + * When unwinding a non-current task, the task might actually be + * running on another CPU, in which case it could be modifying its + * stack while we're reading it. This is generally not a problem and + * can be ignored as long as the caller understands that unwinding + * another task will not always succeed. + */ + if (state->task != current) + goto the_end; + if (state->regs) { printk_deferred_once(KERN_WARNING "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 56628a44668b..cedbba0f3402 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -818,6 +818,20 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } +static int segmented_write_std(struct x86_emulate_ctxt *ctxt, + struct segmented_address addr, + void *data, + unsigned int size) +{ + int rc; + ulong linear; + + rc = linearize(ctxt, addr, size, true, &linear); + if (rc != X86EMUL_CONTINUE) + return rc; + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); +} + /* * Prefetch the remaining bytes of the instruction without crossing page * boundary if they are not in fetch_cache yet. @@ -1571,7 +1585,6 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, &ctxt->exception); } -/* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg, u8 cpl, enum x86_transfer_type transfer, @@ -1608,20 +1621,34 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; - /* NULL selector is not valid for TR, CS and SS (except for long mode) */ - if ((seg == VCPU_SREG_CS - || (seg == VCPU_SREG_SS - && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)) - || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - /* TR should be in GDT only */ if (seg == VCPU_SREG_TR && (selector & (1 << 2))) goto exception; - if (null_selector) /* for NULL selector skip all following checks */ + /* NULL selector is not valid for TR, CS and (except for long mode) SS */ + if (null_selector) { + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR) + goto exception; + + if (seg == VCPU_SREG_SS) { + if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl) + goto exception; + + /* + * ctxt->ops->set_segment expects the CPL to be in + * SS.DPL, so fake an expand-up 32-bit data segment. + */ + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + seg_desc.dpl = cpl; + seg_desc.d = 1; + seg_desc.g = 1; + } + + /* Skip all following checks */ goto load; + } ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr); if (ret != X86EMUL_CONTINUE) @@ -1737,6 +1764,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); + + /* + * None of MOV, POP and LSS can load a NULL selector in CPL=3, but + * they can load it at CPL<3 (Intel's manual says only LSS can, + * but it's wrong). + * + * However, the Intel manual says that putting IST=1/DPL=3 in + * an interrupt gate will result in SS=3 (the AMD manual instead + * says it doesn't), so allow SS=3 in __load_segment_descriptor + * and only forbid it here. + */ + if (seg == VCPU_SREG_SS && selector == 3 && + ctxt->mode == X86EMUL_MODE_PROT64) + return emulate_exception(ctxt, GP_VECTOR, 0, true); + return __load_segment_descriptor(ctxt, selector, seg, cpl, X86_TRANSFER_NONE, NULL); } @@ -3685,8 +3727,8 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, } /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return segmented_write(ctxt, ctxt->dst.addr.mem, - &desc_ptr, 2 + ctxt->op_bytes); + return segmented_write_std(ctxt, ctxt->dst.addr.mem, + &desc_ptr, 2 + ctxt->op_bytes); } static int em_sgdt(struct x86_emulate_ctxt *ctxt) @@ -3932,7 +3974,7 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) else size = offsetof(struct fxregs_state, xmm_space[0]); - return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size); + return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); } static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, @@ -3974,7 +4016,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512); if (rc != X86EMUL_CONTINUE) return rc; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5fe290c1b7d8..2f6ef5121a4c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2426,3 +2426,9 @@ void kvm_lapic_init(void) jump_label_rate_limit(&apic_hw_disabled, HZ); jump_label_rate_limit(&apic_sw_disabled, HZ); } + +void kvm_lapic_exit(void) +{ + static_key_deferred_flush(&apic_hw_disabled); + static_key_deferred_flush(&apic_sw_disabled); +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e0c80233b3e1..ff8039d61672 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -110,6 +110,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); void kvm_lapic_init(void); +void kvm_lapic_exit(void); #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f22810a7e0c..e52c9088660f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3182,6 +3182,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) memcpy(dest, xsave, XSAVE_HDR_OFFSET); /* Set XSTATE_BV */ + xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; /* @@ -3342,6 +3343,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, switch (cap->cap) { case KVM_CAP_HYPERV_SYNIC: + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; return kvm_hv_activate_synic(vcpu); default: return -EINVAL; @@ -6045,6 +6048,7 @@ out: void kvm_arch_exit(void) { + kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) @@ -6168,7 +6172,8 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); + return emulator_write_emulated(ctxt, rip, instruction, 3, + &ctxt->exception); } static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 324e5713d386..af59f808742f 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void *)-1) { + if (info->si_addr == (void __user *)-1) { err = -EINVAL; goto err_out; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e76d1af60f7a..bb660e53cbd6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1172,6 +1172,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; prog->jited = 1; + } else { + prog = orig_prog; } out_addrs: diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 3cd69832d7f4..3961103e9176 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -114,6 +114,16 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"), }, }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=42606 */ + { + .callback = set_nouse_crs, + .ident = "Supermicro X8DTH", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_PRODUCT_NAME, "X8DTH-i/6/iF/6F"), + DMI_MATCH(DMI_BIOS_VERSION, "2.0a"), + }, + }, /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */ { diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 936a488d6cf6..274dfc481849 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -210,6 +210,70 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } +#define OVERFLOW_ADDR_SHIFT (64 - EFI_PAGE_SHIFT) +#define OVERFLOW_ADDR_MASK (U64_MAX << OVERFLOW_ADDR_SHIFT) +#define U64_HIGH_BIT (~(U64_MAX >> 1)) + +static bool __init efi_memmap_entry_valid(const efi_memory_desc_t *md, int i) +{ + u64 end = (md->num_pages << EFI_PAGE_SHIFT) + md->phys_addr - 1; + u64 end_hi = 0; + char buf[64]; + + if (md->num_pages == 0) { + end = 0; + } else if (md->num_pages > EFI_PAGES_MAX || + EFI_PAGES_MAX - md->num_pages < + (md->phys_addr >> EFI_PAGE_SHIFT)) { + end_hi = (md->num_pages & OVERFLOW_ADDR_MASK) + >> OVERFLOW_ADDR_SHIFT; + + if ((md->phys_addr & U64_HIGH_BIT) && !(end & U64_HIGH_BIT)) + end_hi += 1; + } else { + return true; + } + + pr_warn_once(FW_BUG "Invalid EFI memory map entries:\n"); + + if (end_hi) { + pr_warn("mem%02u: %s range=[0x%016llx-0x%llx%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end_hi, end); + } else { + pr_warn("mem%02u: %s range=[0x%016llx-0x%016llx] (invalid)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, end); + } + return false; +} + +static void __init efi_clean_memmap(void) +{ + efi_memory_desc_t *out = efi.memmap.map; + const efi_memory_desc_t *in = out; + const efi_memory_desc_t *end = efi.memmap.map_end; + int i, n_removal; + + for (i = n_removal = 0; in < end; i++) { + if (efi_memmap_entry_valid(in, i)) { + if (out != in) + memcpy(out, in, efi.memmap.desc_size); + out = (void *)out + efi.memmap.desc_size; + } else { + n_removal++; + } + in = (void *)in + efi.memmap.desc_size; + } + + if (n_removal > 0) { + u64 size = efi.memmap.nr_map - n_removal; + + pr_warn("Removing %d invalid memory map entries.\n", n_removal); + efi_memmap_install(efi.memmap.phys_map, size); + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -472,6 +536,8 @@ void __init efi_init(void) } } + efi_clean_memmap(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 319148bd4b05..2f25a363068c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -269,6 +269,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.use_pgd = true; /* + * Certain firmware versions are way too sentimential and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel * text and allocate a new stack because we can't rely on the diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 10aca63a50d7..30031d5293c4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -214,7 +214,7 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Could not allocate boot services memmap\n"); return; @@ -355,7 +355,7 @@ void __init efi_free_boot_services(void) } new_size = efi.memmap.desc_size * num_entries; - new_phys = memblock_alloc(new_size, 0); + new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { pr_err("Failed to allocate new EFI memmap\n"); return; diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 61b5ed2b7d40..90e4f2a6625b 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # SPI Devices -obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o +obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o # I2C Devices obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c index 30c601b399ee..27186ad654c9 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c @@ -11,6 +11,7 @@ * of the License. */ +#include <linux/err.h> #include <linux/init.h> #include <linux/sfi.h> #include <linux/spi/pxa2xx_spi.h> @@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info) { struct spi_board_info *spi_info = info; + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return ERR_PTR(-ENODEV); + spi_info->mode = SPI_MODE_0; spi_info->controller_data = &spidev_spi_chip; |