diff options
Diffstat (limited to 'arch/x86/kernel')
83 files changed, 1235 insertions, 653 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9414f84584e4..90d84c3eee53 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -28,7 +28,7 @@ #include <linux/acpi_pmtmr.h> #include <linux/efi.h> #include <linux/cpumask.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/dmi.h> #include <linux/irq.h> #include <linux/slab.h> @@ -161,13 +161,15 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) /** * acpi_register_lapic - register a local apic and generates a logic cpu number * @id: local apic id to register + * @acpiid: ACPI id to register * @enabled: this cpu is enabled or not * * Returns the logic cpu number which maps to the local apic */ -static int acpi_register_lapic(int id, u8 enabled) +static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) { unsigned int ver = 0; + int cpu; if (id >= MAX_LOCAL_APIC) { printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); @@ -182,7 +184,11 @@ static int acpi_register_lapic(int id, u8 enabled) if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; - return generic_processor_info(id, ver); + cpu = generic_processor_info(id, ver); + if (cpu >= 0) + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; + + return cpu; } static int __init @@ -212,7 +218,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (!apic->apic_id_valid(apic_id) && enabled) printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); else - acpi_register_lapic(apic_id, enabled); + acpi_register_lapic(apic_id, processor->uid, enabled); #else printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); #endif @@ -240,6 +246,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) * when we use CPU hotplug. */ acpi_register_lapic(processor->id, /* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -258,6 +265,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) acpi_table_print_madt_entry(header); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -714,7 +722,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { int cpu; - cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED); + cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); return cpu; diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 4b28159e0421..bdfad642123f 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -5,7 +5,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/cpu.h> diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 8e3842fc8bea..42d27a62a404 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -20,7 +20,6 @@ #include <linux/string.h> #include <linux/spinlock.h> #include <linux/pci.h> -#include <linux/module.h> #include <linux/topology.h> #include <linux/interrupt.h> #include <linux/bitmap.h> diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index e991d5c8bb3a..4fdf6230d93c 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -9,7 +9,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <asm/amd_nb.h> @@ -219,24 +219,22 @@ int amd_set_subcaches(int cpu, unsigned long mask) return 0; } -static int amd_cache_gart(void) +static void amd_cache_gart(void) { u16 i; - if (!amd_nb_has_feature(AMD_NB_GART)) - return 0; - - flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); - if (!flush_words) { - amd_northbridges.flags &= ~AMD_NB_GART; - return -ENOMEM; - } + if (!amd_nb_has_feature(AMD_NB_GART)) + return; - for (i = 0; i != amd_nb_num(); i++) - pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, - &flush_words[i]); + flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); + if (!flush_words) { + amd_northbridges.flags &= ~AMD_NB_GART; + pr_notice("Cannot initialize GART flush words, GART support disabled\n"); + return; + } - return 0; + for (i = 0; i != amd_nb_num(); i++) + pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]); } void amd_flush_garts(void) @@ -278,17 +276,10 @@ EXPORT_SYMBOL_GPL(amd_flush_garts); static __init int init_amd_nbs(void) { - int err = 0; + amd_cache_northbridges(); + amd_cache_gart(); - err = amd_cache_northbridges(); - - if (err < 0) - pr_notice("Cannot enumerate AMD northbridges\n"); - - if (amd_cache_gart() < 0) - pr_notice("Cannot initialize GART flush words, GART support disabled\n"); - - return err; + return 0; } /* This has to go after the PCI subsystem */ diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index cefacbad1531..456316f6c868 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -215,26 +215,18 @@ void apbt_setup_secondary_clock(void) * cpu timers during the offline process due to the ordering of notification. * the extra interrupt is harmless. */ -static int apbt_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) +static int apbt_cpu_dead(unsigned int cpu) { - unsigned long cpu = (unsigned long)hcpu; struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DEAD: - dw_apb_clockevent_pause(adev->timer); - if (system_state == SYSTEM_RUNNING) { - pr_debug("skipping APBT CPU %lu offline\n", cpu); - } else { - pr_debug("APBT clockevent for cpu %lu offline\n", cpu); - dw_apb_clockevent_stop(adev->timer); - } - break; - default: - pr_debug("APBT notified %lu, no action\n", action); + dw_apb_clockevent_pause(adev->timer); + if (system_state == SYSTEM_RUNNING) { + pr_debug("skipping APBT CPU %u offline\n", cpu); + } else { + pr_debug("APBT clockevent for cpu %u offline\n", cpu); + dw_apb_clockevent_stop(adev->timer); } - return NOTIFY_OK; + return 0; } static __init int apbt_late_init(void) @@ -242,9 +234,8 @@ static __init int apbt_late_init(void) if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT || !apb_timer_block_enabled) return 0; - /* This notifier should be called after workqueue is ready */ - hotcpu_notifier(apbt_cpuhp_notify, -20); - return 0; + return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "X86_APB_DEAD", NULL, + apbt_cpu_dead); } fs_initcall(apbt_late_init); #else diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 60078a67d7e3..7943d38c57ca 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -23,7 +23,7 @@ #include <linux/bootmem.h> #include <linux/ftrace.h> #include <linux/ioport.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/syscore_ops.h> #include <linux/delay.h> #include <linux/timex.h> @@ -92,8 +92,10 @@ static int apic_extnmi = APIC_EXTNMI_BSP; */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); #ifdef CONFIG_X86_32 @@ -2045,7 +2047,7 @@ int generic_processor_info(int apicid, int version) int thiscpu = max + disabled_cpus - 1; pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i almost" + "APIC: NR_CPUS/possible_cpus limit of %i almost" " reached. Keeping one slot for boot cpu." " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); @@ -2057,7 +2059,7 @@ int generic_processor_info(int apicid, int version) int thiscpu = max + disabled_cpus; pr_warning( - "ACPI: NR_CPUS/possible_cpus limit of %i reached." + "APIC: NR_CPUS/possible_cpus limit of %i reached." " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); disabled_cpus++; @@ -2085,7 +2087,7 @@ int generic_processor_info(int apicid, int version) if (topology_update_package_map(apicid, cpu) < 0) { int thiscpu = max + disabled_cpus; - pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n", + pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", thiscpu, apicid); disabled_cpus++; return -ENOSPC; diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 76f89e2b245a..5b2ae106bd4a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -15,7 +15,7 @@ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/hardirq.h> -#include <linux/module.h> +#include <linux/export.h> #include <asm/smp.h> #include <asm/apic.h> #include <asm/ipi.h> @@ -181,7 +181,6 @@ static struct apic apic_flat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, @@ -278,7 +277,6 @@ static struct apic apic_physflat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 13d19ed58514..c05688b2deff 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -11,7 +11,6 @@ #include <linux/threads.h> #include <linux/cpumask.h> -#include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/ctype.h> @@ -141,7 +140,6 @@ struct apic apic_noop = { .get_apic_id = noop_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index ab5c2c685a3c..714d4fda0d52 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -269,7 +269,6 @@ static const struct apic apic_numachip1 __refconst = { .get_apic_id = numachip1_get_apic_id, .set_apic_id = numachip1_set_apic_id, - .apic_id_mask = 0xffU << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, @@ -321,7 +320,6 @@ static const struct apic apic_numachip2 __refconst = { .get_apic_id = numachip2_get_apic_id, .set_apic_id = numachip2_set_apic_id, - .apic_id_mask = 0xffU << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index cf9bd896c12d..06dbaa458bfe 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -171,7 +171,6 @@ static struct apic apic_bigsmp = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 7788ce643bf4..f29501e1a5c1 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -16,7 +16,7 @@ #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/nmi.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/delay.h> #ifdef CONFIG_HARDLOCKUP_DETECTOR diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 446702ed99dc..7491f417a8e4 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -39,7 +39,7 @@ #include <linux/mc146818rtc.h> #include <linux/compiler.h> #include <linux/acpi.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/syscore_ops.h> #include <linux/freezer.h> #include <linux/kthread.h> @@ -981,7 +981,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi, return __irq_domain_alloc_irqs(domain, irq, 1, ioapic_alloc_attr_node(info), - info, legacy); + info, legacy, NULL); } /* @@ -1014,7 +1014,8 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain, info->ioapic_pin)) return -ENOMEM; } else { - irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true); + irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true, + NULL); if (irq >= 0) { irq_data = irq_domain_get_irq_data(domain, irq); data = irq_data->chip_data; @@ -2567,29 +2568,25 @@ static struct resource * __init ioapic_setup_resources(void) unsigned long n; struct resource *res; char *mem; - int i, num = 0; + int i; - for_each_ioapic(i) - num++; - if (num == 0) + if (nr_ioapics == 0) return NULL; n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= num; + n *= nr_ioapics; mem = alloc_bootmem(n); res = (void *)mem; - mem += sizeof(struct resource) * num; + mem += sizeof(struct resource) * nr_ioapics; - num = 0; for_each_ioapic(i) { - res[num].name = mem; - res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; - ioapics[i].iomem_res = &res[num]; - num++; + ioapics[i].iomem_res = &res[i]; } ioapic_resources = res; diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 2a0f225afebd..3a205d4a12d0 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -8,7 +8,6 @@ #include <linux/mc146818rtc.h> #include <linux/cache.h> #include <linux/cpu.h> -#include <linux/module.h> #include <asm/smp.h> #include <asm/mtrr.h> diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index f316e34abb42..7c43e716c158 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -8,7 +8,7 @@ */ #include <linux/threads.h> #include <linux/cpumask.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/ctype.h> @@ -101,7 +101,6 @@ static struct apic apic_default = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 1793dba7a741..c303054b90b5 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -11,10 +11,9 @@ #include <linux/threads.h> #include <linux/cpumask.h> #include <linux/string.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/kernel.h> #include <linux/ctype.h> -#include <linux/init.h> #include <linux/hardirq.h> #include <linux/dmar.h> diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index aca8b75c1552..6368fa69d2af 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -152,68 +152,48 @@ static void init_x2apic_ldr(void) } } - /* - * At CPU state changes, update the x2apic cluster sibling info. - */ -static int -update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) +/* + * At CPU state changes, update the x2apic cluster sibling info. + */ +int x2apic_prepare_cpu(unsigned int cpu) { - unsigned int this_cpu = (unsigned long)hcpu; - unsigned int cpu; - int err = 0; - - switch (action) { - case CPU_UP_PREPARE: - if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), - GFP_KERNEL)) { - err = -ENOMEM; - } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), - GFP_KERNEL)) { - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - err = -ENOMEM; - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - for_each_online_cpu(cpu) { - if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) - continue; - cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); - cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); - } - free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); - free_cpumask_var(per_cpu(ipi_mask, this_cpu)); - break; + if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) { + free_cpumask_var(per_cpu(cpus_in_cluster, cpu)); + return -ENOMEM; } - return notifier_from_errno(err); + return 0; } -static struct notifier_block x2apic_cpu_notifier = { - .notifier_call = update_clusterinfo, -}; - -static int x2apic_init_cpu_notifier(void) +int x2apic_dead_cpu(unsigned int this_cpu) { - int cpu = smp_processor_id(); - - zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); + int cpu; - BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); - - cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); - register_hotcpu_notifier(&x2apic_cpu_notifier); - return 1; + for_each_online_cpu(cpu) { + if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) + continue; + cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); + } + free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); + free_cpumask_var(per_cpu(ipi_mask, this_cpu)); + return 0; } static int x2apic_cluster_probe(void) { - if (x2apic_mode) - return x2apic_init_cpu_notifier(); - else + int cpu = smp_processor_id(); + + if (!x2apic_mode) return 0; + + cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); + cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", + x2apic_prepare_cpu, x2apic_dead_cpu); + return 1; } static const struct cpumask *x2apic_cluster_target_cpus(void) @@ -270,7 +250,6 @@ static struct apic apic_x2apic_cluster = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a1242e2c12e6..4f13f54f1b1f 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -126,7 +126,6 @@ static struct apic apic_x2apic_phys = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = x2apic_set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 29003154fafd..09b59adaea3f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -12,7 +12,7 @@ #include <linux/proc_fs.h> #include <linux/threads.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/sched.h> @@ -582,7 +582,6 @@ static struct apic __refdata apic_x2apic_uv_x = { .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFFFFFFFu, .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, @@ -919,7 +918,7 @@ static void uv_heartbeat(unsigned long ignored) uv_set_scir_bits(bits); /* enable next timer period */ - mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); + mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); } static void uv_heartbeat_enable(int cpu) @@ -928,7 +927,7 @@ static void uv_heartbeat_enable(int cpu) struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); - setup_timer(timer, uv_heartbeat, cpu); + setup_pinned_timer(timer, uv_heartbeat, cpu); timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; add_timer_on(timer, cpu); uv_cpu_scir_info(cpu)->enabled = 1; diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 674134e9f5e5..2bd5c6ff7ee7 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -31,7 +31,9 @@ void common(void) { BLANK(); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); - OFFSET(TI_addr_limit, thread_info, addr_limit); + + BLANK(); + OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0fe6953f421c..809eda03c527 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <linux/bitops.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/percpu.h> #include <linux/string.h> #include <linux/ctype.h> @@ -1452,7 +1452,7 @@ void cpu_init(void) struct task_struct *me; struct tss_struct *t; unsigned long v; - int cpu = stack_smp_processor_id(); + int cpu = raw_smp_processor_id(); int i; wait_for_master_cpu(cpu); diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 73d391ae452f..27e46658ebe3 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -21,7 +21,8 @@ * */ -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <asm/processor.h> #include <asm/hypervisor.h> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 6e2ffbebbcdb..fcd484d2bb03 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -5,7 +5,7 @@ #include <linux/smp.h> #include <linux/sched.h> #include <linux/thread_info.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/uaccess.h> #include <asm/cpufeature.h> @@ -13,6 +13,7 @@ #include <asm/msr.h> #include <asm/bugs.h> #include <asm/cpu.h> +#include <asm/intel-family.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> @@ -300,15 +301,14 @@ static void intel_workarounds(struct cpuinfo_x86 *c) } /* - * P4 Xeon errata 037 workaround. + * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) - > 0) { + MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); - pr_info("CPU: Disabling hardware prefetching (Errata 037)\n"); + pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n"); } } @@ -509,6 +509,10 @@ static void init_intel(struct cpuinfo_x86 *c) (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); + if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && + ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) + set_cpu_bug(c, X86_BUG_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index fbb5e90557a5..e42117d5f4d7 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -1,7 +1,7 @@ #include <asm/cpu_device_id.h> #include <asm/cpufeature.h> #include <linux/cpu.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> /** diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 34c89a3e8260..83f1a98d37db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) return; mce_setup(&m); - m.bank = 1; + m.bank = -1; /* Fake a memory read error with unknown channel */ m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 92e5e37d97bf..79d8ec849468 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr) } if (rdmsrl_safe(msr, &v)) { - WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); + WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr); /* * Return zero in case the access faulted. This should * not happen normally but can happen if the CPU does @@ -1309,7 +1309,7 @@ static void __restart_timer(struct timer_list *t, unsigned long interval) if (timer_pending(t)) { if (time_before(when, t->expires)) - mod_timer_pinned(t, when); + mod_timer(t, when); } else { t->expires = round_jiffies(when); add_timer_on(t, smp_processor_id()); @@ -1735,7 +1735,7 @@ static void __mcheck_cpu_init_timer(void) struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); - setup_timer(t, mce_timer_fn, cpu); + setup_pinned_timer(t, mce_timer_fn, cpu); mce_start_timer(cpu, t); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 10b0661651e0..7b7f3be783d4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = { EXPORT_SYMBOL_GPL(amd_df_mcablock_names); static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); -static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ +static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 8581963894c7..27a0228c9cae 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -56,24 +56,24 @@ static u8 *container; static size_t container_size; static u32 ucode_new_rev; -u8 amd_ucode_patch[PATCH_MAX_SIZE]; +static u8 amd_ucode_patch[PATCH_MAX_SIZE]; static u16 this_equiv_id; static struct cpio_data ucode_cpio; -/* - * Microcode patch container file is prepended to the initrd in cpio format. - * See Documentation/x86/early-microcode.txt - */ -static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; - static struct cpio_data __init find_ucode_in_initrd(void) { - long offset = 0; +#ifdef CONFIG_BLK_DEV_INITRD char *path; void *start; size_t size; + /* + * Microcode patch container file is prepended to the initrd in cpio + * format. See Documentation/x86/early-microcode.txt + */ + static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; + #ifdef CONFIG_X86_32 struct boot_params *p; @@ -89,9 +89,12 @@ static struct cpio_data __init find_ucode_in_initrd(void) path = ucode_path; start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); size = boot_params.hdr.ramdisk_size; -#endif +#endif /* !CONFIG_X86_32 */ - return find_cpio_data(path, start, size, &offset); + return find_cpio_data(path, start, size, NULL); +#else + return (struct cpio_data){ NULL, 0, "" }; +#endif } static size_t compute_container_size(u8 *data, u32 total_size) @@ -289,11 +292,11 @@ void __init load_ucode_amd_bsp(unsigned int family) size = &ucode_cpio.size; #endif - cp = find_ucode_in_initrd(); - if (!cp.data) { - if (!load_builtin_amd_microcode(&cp, family)) - return; - } + if (!load_builtin_amd_microcode(&cp, family)) + cp = find_ucode_in_initrd(); + + if (!(cp.data && cp.size)) + return; *data = cp.data; *size = cp.size; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index ac360bfbbdb6..df04b2d033f6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -60,7 +60,6 @@ static bool dis_ucode_ldr; static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -EXPORT_SYMBOL_GPL(ucode_cpu_info); /* * Operations that are run on a target cpu: @@ -175,24 +174,24 @@ void load_ucode_ap(void) } } -int __init save_microcode_in_initrd(void) +static int __init save_microcode_in_initrd(void) { struct cpuinfo_x86 *c = &boot_cpu_data; switch (c->x86_vendor) { case X86_VENDOR_INTEL: if (c->x86 >= 6) - save_microcode_in_initrd_intel(); + return save_microcode_in_initrd_intel(); break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - save_microcode_in_initrd_amd(); + return save_microcode_in_initrd_amd(); break; default: break; } - return 0; + return -EINVAL; } void reload_early_microcode(void) @@ -691,4 +690,5 @@ int __init microcode_init(void) return error; } +fs_initcall(save_microcode_in_initrd); late_initcall(microcode_init); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 65cbbcd48fe4..cdc0deab00c9 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -40,9 +40,13 @@ #include <asm/msr.h> /* - * Temporary microcode blobs pointers storage. We note here the pointers to - * microcode blobs we've got from whatever storage (detached initrd, builtin). - * Later on, we put those into final storage mc_saved_data.mc_saved. + * Temporary microcode blobs pointers storage. We note here during early load + * the pointers to microcode blobs we've got from whatever storage (detached + * initrd, builtin). Later on, we put those into final storage + * mc_saved_data.mc_saved. + * + * Important: those are offsets from the beginning of initrd or absolute + * addresses within the kernel image when built-in. */ static unsigned long mc_tmp_ptrs[MAX_UCODE_COUNT]; @@ -51,8 +55,15 @@ static struct mc_saved_data { struct microcode_intel **mc_saved; } mc_saved_data; +/* Microcode blobs within the initrd. 0 if builtin. */ +static struct ucode_blobs { + unsigned long start; + bool valid; +} blobs; + +/* Go through saved patches and find the one suitable for the current CPU. */ static enum ucode_state -load_microcode_early(struct microcode_intel **saved, +find_microcode_patch(struct microcode_intel **saved, unsigned int num_saved, struct ucode_cpu_info *uci) { struct microcode_intel *ucode_ptr, *new_mc = NULL; @@ -121,13 +132,13 @@ load_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, if (!mcs->mc_saved) { copy_ptrs(mc_saved_tmp, mc_ptrs, offset, count); - return load_microcode_early(mc_saved_tmp, count, uci); + return find_microcode_patch(mc_saved_tmp, count, uci); } else { #ifdef CONFIG_X86_32 microcode_phys(mc_saved_tmp, mcs); - return load_microcode_early(mc_saved_tmp, count, uci); + return find_microcode_patch(mc_saved_tmp, count, uci); #else - return load_microcode_early(mcs->mc_saved, count, uci); + return find_microcode_patch(mcs->mc_saved, count, uci); #endif } } @@ -450,8 +461,6 @@ static void show_saved_mc(void) #endif } -#ifdef CONFIG_HOTPLUG_CPU -static DEFINE_MUTEX(x86_cpu_microcode_mutex); /* * Save this mc into mc_saved_data. So it will be loaded early when a CPU is * hot added or resumes. @@ -459,19 +468,18 @@ static DEFINE_MUTEX(x86_cpu_microcode_mutex); * Please make sure this mc should be a valid microcode patch before calling * this function. */ -int save_mc_for_early(u8 *mc) +static void save_mc_for_early(u8 *mc) { +#ifdef CONFIG_HOTPLUG_CPU + /* Synchronization during CPU hotplug. */ + static DEFINE_MUTEX(x86_cpu_microcode_mutex); + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; unsigned int mc_saved_count_init; unsigned int num_saved; struct microcode_intel **mc_saved; - int ret = 0; - int i; + int ret, i; - /* - * Hold hotplug lock so mc_saved_data is not accessed by a CPU in - * hotplug. - */ mutex_lock(&x86_cpu_microcode_mutex); mc_saved_count_init = mc_saved_data.num_saved; @@ -509,11 +517,8 @@ int save_mc_for_early(u8 *mc) out: mutex_unlock(&x86_cpu_microcode_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(save_mc_for_early); #endif +} static bool __init load_builtin_intel_microcode(struct cpio_data *cp) { @@ -532,37 +537,6 @@ static bool __init load_builtin_intel_microcode(struct cpio_data *cp) #endif } -static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; -static __init enum ucode_state -scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, - unsigned long start, unsigned long size, - struct ucode_cpu_info *uci) -{ - struct cpio_data cd; - long offset = 0; -#ifdef CONFIG_X86_32 - char *p = (char *)__pa_nodebug(ucode_name); -#else - char *p = ucode_name; -#endif - - cd.data = NULL; - cd.size = 0; - - /* try built-in microcode if no initrd */ - if (!size) { - if (!load_builtin_intel_microcode(&cd)) - return UCODE_ERROR; - } else { - cd = find_cpio_data(p, (void *)start, size, &offset); - if (!cd.data) - return UCODE_ERROR; - } - - return get_matching_model_microcode(start, cd.data, cd.size, - mcs, mc_ptrs, uci); -} - /* * Print ucode update info. */ @@ -680,38 +654,117 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) */ int __init save_microcode_in_initrd_intel(void) { - unsigned int count = mc_saved_data.num_saved; struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; - int ret = 0; + unsigned int count = mc_saved_data.num_saved; + unsigned long offset = 0; + int ret; if (!count) - return ret; + return 0; + + /* + * We have found a valid initrd but it might've been relocated in the + * meantime so get its updated address. + */ + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && blobs.valid) + offset = initrd_start; - copy_ptrs(mc_saved, mc_tmp_ptrs, get_initrd_start(), count); + copy_ptrs(mc_saved, mc_tmp_ptrs, offset, count); ret = save_microcode(&mc_saved_data, mc_saved, count); if (ret) pr_err("Cannot save microcode patches from initrd.\n"); - - show_saved_mc(); + else + show_saved_mc(); return ret; } +static __init enum ucode_state +__scan_microcode_initrd(struct cpio_data *cd, struct ucode_blobs *blbp) +{ +#ifdef CONFIG_BLK_DEV_INITRD + static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; + char *p = IS_ENABLED(CONFIG_X86_32) ? (char *)__pa_nodebug(ucode_name) + : ucode_name; +# ifdef CONFIG_X86_32 + unsigned long start = 0, size; + struct boot_params *params; + + params = (struct boot_params *)__pa_nodebug(&boot_params); + size = params->hdr.ramdisk_size; + + /* + * Set start only if we have an initrd image. We cannot use initrd_start + * because it is not set that early yet. + */ + start = (size ? params->hdr.ramdisk_image : 0); + +# else /* CONFIG_X86_64 */ + unsigned long start = 0, size; + + size = (u64)boot_params.ext_ramdisk_size << 32; + size |= boot_params.hdr.ramdisk_size; + + if (size) { + start = (u64)boot_params.ext_ramdisk_image << 32; + start |= boot_params.hdr.ramdisk_image; + + start += PAGE_OFFSET; + } +# endif + + *cd = find_cpio_data(p, (void *)start, size, NULL); + if (cd->data) { + blbp->start = start; + blbp->valid = true; + + return UCODE_OK; + } else +#endif /* CONFIG_BLK_DEV_INITRD */ + return UCODE_ERROR; +} + +static __init enum ucode_state +scan_microcode(struct mc_saved_data *mcs, unsigned long *mc_ptrs, + struct ucode_cpu_info *uci, struct ucode_blobs *blbp) +{ + struct cpio_data cd = { NULL, 0, "" }; + enum ucode_state ret; + + /* try built-in microcode first */ + if (load_builtin_intel_microcode(&cd)) + /* + * Invalidate blobs as we might've gotten an initrd too, + * supplied by the boot loader, by mistake or simply forgotten + * there. That's fine, we ignore it since we've found builtin + * microcode already. + */ + blbp->valid = false; + else { + ret = __scan_microcode_initrd(&cd, blbp); + if (ret != UCODE_OK) + return ret; + } + + return get_matching_model_microcode(blbp->start, cd.data, cd.size, + mcs, mc_ptrs, uci); +} + static void __init _load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs, - unsigned long start, unsigned long size) + struct ucode_blobs *blbp) { struct ucode_cpu_info uci; enum ucode_state ret; collect_cpu_info_early(&uci); - ret = scan_microcode(mcs, mc_ptrs, start, size, &uci); + ret = scan_microcode(mcs, mc_ptrs, &uci, blbp); if (ret != UCODE_OK) return; - ret = load_microcode(mcs, mc_ptrs, start, &uci); + ret = load_microcode(mcs, mc_ptrs, blbp->start, &uci); if (ret != UCODE_OK) return; @@ -720,54 +773,60 @@ _load_ucode_intel_bsp(struct mc_saved_data *mcs, unsigned long *mc_ptrs, void __init load_ucode_intel_bsp(void) { - u64 start, size; -#ifdef CONFIG_X86_32 - struct boot_params *p; - - p = (struct boot_params *)__pa_nodebug(&boot_params); - size = p->hdr.ramdisk_size; + struct ucode_blobs *blobs_p; + struct mc_saved_data *mcs; + unsigned long *ptrs; - /* - * Set start only if we have an initrd image. We cannot use initrd_start - * because it is not set that early yet. - */ - start = (size ? p->hdr.ramdisk_image : 0); - - _load_ucode_intel_bsp((struct mc_saved_data *)__pa_nodebug(&mc_saved_data), - (unsigned long *)__pa_nodebug(&mc_tmp_ptrs), - start, size); +#ifdef CONFIG_X86_32 + mcs = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); + ptrs = (unsigned long *)__pa_nodebug(&mc_tmp_ptrs); + blobs_p = (struct ucode_blobs *)__pa_nodebug(&blobs); #else - size = boot_params.hdr.ramdisk_size; - start = (size ? boot_params.hdr.ramdisk_image + PAGE_OFFSET : 0); - - _load_ucode_intel_bsp(&mc_saved_data, mc_tmp_ptrs, start, size); + mcs = &mc_saved_data; + ptrs = mc_tmp_ptrs; + blobs_p = &blobs; #endif + + _load_ucode_intel_bsp(mcs, ptrs, blobs_p); } void load_ucode_intel_ap(void) { - unsigned long *mcs_tmp_p; - struct mc_saved_data *mcs_p; + struct ucode_blobs *blobs_p; + unsigned long *ptrs, start = 0; + struct mc_saved_data *mcs; struct ucode_cpu_info uci; enum ucode_state ret; -#ifdef CONFIG_X86_32 - mcs_tmp_p = (unsigned long *)__pa_nodebug(mc_tmp_ptrs); - mcs_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); +#ifdef CONFIG_X86_32 + mcs = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); + ptrs = (unsigned long *)__pa_nodebug(mc_tmp_ptrs); + blobs_p = (struct ucode_blobs *)__pa_nodebug(&blobs); #else - mcs_tmp_p = mc_tmp_ptrs; - mcs_p = &mc_saved_data; + mcs = &mc_saved_data; + ptrs = mc_tmp_ptrs; + blobs_p = &blobs; #endif /* * If there is no valid ucode previously saved in memory, no need to * update ucode on this AP. */ - if (!mcs_p->num_saved) + if (!mcs->num_saved) return; + if (blobs_p->valid) { + start = blobs_p->start; + + /* + * Pay attention to CONFIG_RANDOMIZE_MEMORY=y as it shuffles + * physmem mapping too and there we have the initrd. + */ + start += PAGE_OFFSET - __PAGE_OFFSET_BASE; + } + collect_cpu_info_early(&uci); - ret = load_microcode(mcs_p, mcs_tmp_p, get_initrd_start_addr(), &uci); + ret = load_microcode(mcs, ptrs, start, &uci); if (ret != UCODE_OK) return; @@ -784,7 +843,7 @@ void reload_ucode_intel(void) collect_cpu_info_early(&uci); - ret = load_microcode_early(mc_saved_data.mc_saved, + ret = find_microcode_patch(mc_saved_data.mc_saved, mc_saved_data.num_saved, &uci); if (ret != UCODE_OK) return; @@ -794,6 +853,7 @@ void reload_ucode_intel(void) static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { + static struct cpu_signature prev; struct cpuinfo_x86 *c = &cpu_data(cpu_num); unsigned int val[2]; @@ -808,8 +868,13 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) } csig->rev = c->microcode; - pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", - cpu_num, csig->sig, csig->pf, csig->rev); + + /* No extra locking on prev, races are harmless. */ + if (csig->sig != prev.sig || csig->pf != prev.pf || csig->rev != prev.rev) { + pr_info("sig=0x%x, pf=0x%x, revision=0x%x\n", + csig->sig, csig->pf, csig->rev); + prev = *csig; + } return 0; } @@ -838,6 +903,7 @@ static int apply_microcode_intel(int cpu) struct ucode_cpu_info *uci; struct cpuinfo_x86 *c; unsigned int val[2]; + static int prev_rev; /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) @@ -872,11 +938,14 @@ static int apply_microcode_intel(int cpu) return -1; } - pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", - cpu, val[1], - mc->hdr.date & 0xffff, - mc->hdr.date >> 24, - (mc->hdr.date >> 16) & 0xff); + if (val[1] != prev_rev) { + pr_info("updated to revision 0x%x, date = %04x-%02x-%02x\n", + val[1], + mc->hdr.date & 0xffff, + mc->hdr.date >> 24, + (mc->hdr.date >> 16) & 0xff); + prev_rev = val[1]; + } c = &cpu_data(cpu); diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index 2ce1a7dc45b7..406cb6c0d9dd 100644 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c @@ -141,7 +141,6 @@ int microcode_sanity_check(void *mc, int print_err) } return 0; } -EXPORT_SYMBOL_GPL(microcode_sanity_check); /* * Returns 1 if update has been found, 0 otherwise. @@ -183,4 +182,3 @@ int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev) return find_matching_signature(mc, csig, cpf); } -EXPORT_SYMBOL_GPL(has_newer_microcode); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 10c11b4da31d..8f44c5a50ab8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -13,7 +13,8 @@ #include <linux/types.h> #include <linux/time.h> #include <linux/clocksource.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/hardirq.h> #include <linux/efi.h> #include <linux/interrupt.h> diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 31e951ce6dff..3b442b64c72d 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -17,7 +17,6 @@ * License along with this library; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> #include <linux/smp.h> diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 16e37a2581ac..fdc55215d44d 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -4,7 +4,7 @@ */ #define DEBUG -#include <linux/module.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/io.h> #include <linux/mm.h> diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index d76f13d6d8d6..6d9b45549109 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -2,7 +2,6 @@ #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/proc_fs.h> -#include <linux/module.h> #include <linux/ctype.h> #include <linux/string.h> #include <linux/slab.h> diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7d393ecdeee6..28f1b54b7fad 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -38,7 +38,7 @@ #include <linux/stop_machine.h> #include <linux/kvm_para.h> #include <linux/uaccess.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/mutex.h> #include <linux/init.h> #include <linux/sort.h> diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 2e8caf03f593..181eabecae25 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -12,7 +12,7 @@ */ #include <linux/percpu.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/smp.h> diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index f6f50c4ceaec..cfa97ff67bda 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup); */ #define SANITY_CHECK_LOOPS 8 +#ifdef CONFIG_ARCH_RANDOM void x86_init_rdrand(struct cpuinfo_x86 *c) { -#ifdef CONFIG_ARCH_RANDOM unsigned long tmp; int i; @@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) return; } } -#endif } +#endif diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 8cac429b6a1d..1ff0598d309c 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -22,7 +22,8 @@ */ #include <linux/dmi.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <asm/div64.h> #include <asm/x86_init.h> #include <asm/hypervisor.h> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9ef978d69c22..9616cf76940c 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -20,7 +20,7 @@ #include <linux/delay.h> #include <linux/elf.h> #include <linux/elfcore.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/vmalloc.h> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ef8017ca5ba9..92e8f0a7159c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -87,7 +87,7 @@ static inline int valid_stack_ptr(struct task_struct *task, else return 0; } - return p > t && p < t + THREAD_SIZE - size; + return p >= t && p < t + THREAD_SIZE - size; } unsigned long @@ -98,6 +98,14 @@ print_context_stack(struct task_struct *task, { struct stack_frame *frame = (struct stack_frame *)bp; + /* + * If we overflowed the stack into a guard page, jump back to the + * bottom of the usable stack. + */ + if ((unsigned long)task_stack_page(task) - (unsigned long)stack < + PAGE_SIZE) + stack = (unsigned long *)task_stack_page(task); + while (valid_stack_ptr(task, stack, sizeof(*stack), end)) { unsigned long addr; @@ -197,6 +205,11 @@ void show_stack(struct task_struct *task, unsigned long *sp) show_stack_log_lvl(task, NULL, sp, bp, ""); } +void show_stack_regs(struct pt_regs *regs) +{ + show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, ""); +} + static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -226,6 +239,8 @@ unsigned long oops_begin(void) EXPORT_SYMBOL_GPL(oops_begin); NOKPROBE_SYMBOL(oops_begin); +void __noreturn rewind_stack_do_exit(int signr); + void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) @@ -247,7 +262,13 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + + /* + * We're not going to return, but we might be on an IST stack or + * have very little stack space left. Rewind the stack and kill + * the task. + */ + rewind_stack_do_exit(signr); } NOKPROBE_SYMBOL(oops_end); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index fef917e79b9d..09675712eba8 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -7,7 +7,7 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kdebug.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/ptrace.h> #include <linux/kexec.h> #include <linux/sysfs.h> @@ -96,7 +96,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, int i; if (sp == NULL) { - if (task) + if (regs) + sp = (unsigned long *)regs->sp; + else if (task) sp = (unsigned long *)task->thread.sp; else sp = (unsigned long *)&sp; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d558a8a49016..9ee4520ce83c 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -7,7 +7,7 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kdebug.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/ptrace.h> #include <linux/kexec.h> #include <linux/sysfs.h> @@ -264,7 +264,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, * back trace for this cpu: */ if (sp == NULL) { - if (task) + if (regs) + sp = (unsigned long *)regs->sp; + else if (task) sp = (unsigned long *)task->thread.sp; else sp = (unsigned long *)&sp; @@ -272,6 +274,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { + unsigned long word; + if (stack >= irq_stack && stack <= irq_stack_end) { if (stack == irq_stack_end) { stack = (unsigned long *) (irq_stack_end[-1]); @@ -281,12 +285,18 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, if (kstack_end(stack)) break; } + + if (probe_kernel_address(stack, word)) + break; + if ((i % STACKSLOTS_PER_LINE) == 0) { if (i != 0) pr_cont("\n"); - printk("%s %016lx", log_lvl, *stack++); + printk("%s %016lx", log_lvl, word); } else - pr_cont(" %016lx", *stack++); + pr_cont(" %016lx", word); + + stack++; touch_nmi_watchdog(); } preempt_enable(); diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c index afe65dffee80..4312f8ae71b7 100644 --- a/arch/x86/kernel/ebda.c +++ b/arch/x86/kernel/ebda.c @@ -6,66 +6,92 @@ #include <asm/bios_ebda.h> /* + * This function reserves all conventional PC system BIOS related + * firmware memory areas (some of which are data, some of which + * are code), that must not be used by the kernel as available + * RAM. + * * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of - * conventional memory (int 0x12) too. This also contains a - * workaround for Dell systems that neglect to reserve EBDA. - * The same workaround also avoids a problem with the AMD768MPX - * chipset: reserve a page before VGA to prevent PCI prefetch - * into it (errata #56). Usually the page is reserved anyways, - * unless you have no PS/2 mouse plugged in. + * conventional memory (int 0x12) too. + * + * This means that as a first approximation on most systems we can + * guess the reserved BIOS area by looking at the low BIOS RAM size + * value and assume that everything above that value (up to 1MB) is + * reserved. + * + * But life in firmware country is not that simple: + * + * - This code also contains a quirk for Dell systems that neglect + * to reserve the EBDA area in the 'RAM size' value ... + * + * - The same quirk also avoids a problem with the AMD768MPX + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). (Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in.) + * + * - Plus paravirt systems don't have a reliable value in the + * 'BIOS RAM size' pointer we can rely on, so we must quirk + * them too. + * + * Due to those various problems this function is deliberately + * very conservative and tries to err on the side of reserving + * too much, to not risk reserving too little. + * + * Losing a small amount of memory in the bottom megabyte is + * rarely a problem, as long as we have enough memory to install + * the SMP bootup trampoline which *must* be in this area. * - * This functions is deliberately very conservative. Losing - * memory in the bottom megabyte is rarely a problem, as long - * as we have enough memory to install the trampoline. Using - * memory that is in use by the BIOS or by some DMA device - * the BIOS didn't shut down *is* a big problem. + * Using memory that is in use by the BIOS or by some DMA device + * the BIOS didn't shut down *is* a big problem to the kernel, + * obviously. */ -#define BIOS_LOWMEM_KILOBYTES 0x413 -#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ -#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ +#define BIOS_RAM_SIZE_KB_PTR 0x413 -void __init reserve_ebda_region(void) +#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ +#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ + +void __init reserve_bios_regions(void) { - unsigned int lowmem, ebda_addr; + unsigned int bios_start, ebda_start; /* - * To determine the position of the EBDA and the - * end of conventional memory, we need to look at - * the BIOS data area. In a paravirtual environment - * that area is absent. We'll just have to assume - * that the paravirt case can handle memory setup - * correctly, without our help. + * NOTE: In a paravirtual environment the BIOS reserved + * area is absent. We'll just have to assume that the + * paravirt case can handle memory setup correctly, + * without our help. */ - if (!x86_platform.legacy.ebda_search) + if (!x86_platform.legacy.reserve_bios_regions) return; - /* end of low (conventional) memory */ - lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES); - lowmem <<= 10; - - /* start of EBDA area */ - ebda_addr = get_bios_ebda(); - /* - * Note: some old Dells seem to need 4k EBDA without - * reporting so, so just consider the memory above 0x9f000 - * to be off limits (bugzilla 2990). + * BIOS RAM size is encoded in kilobytes, convert it + * to bytes to get a first guess at where the BIOS + * firmware area starts: */ + bios_start = *(unsigned short *)__va(BIOS_RAM_SIZE_KB_PTR); + bios_start <<= 10; - /* If the EBDA address is below 128K, assume it is bogus */ - if (ebda_addr < INSANE_CUTOFF) - ebda_addr = LOWMEM_CAP; + /* + * If bios_start is less than 128K, assume it is bogus + * and bump it up to 640K. Similarly, if bios_start is above 640K, + * don't trust it. + */ + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) + bios_start = BIOS_START_MAX; - /* If lowmem is less than 128K, assume it is bogus */ - if (lowmem < INSANE_CUTOFF) - lowmem = LOWMEM_CAP; + /* Get the start address of the EBDA page: */ + ebda_start = get_bios_ebda(); - /* Use the lower of the lowmem and EBDA markers as the cutoff */ - lowmem = min(lowmem, ebda_addr); - lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ + /* + * If the EBDA start address is sane and is below the BIOS region, + * then also reserve everything from the EBDA start address up to + * the BIOS region. + */ + if (ebda_start >= BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; - /* reserve all memory between lowmem and the 1MB mark */ - memblock_reserve(lowmem, 0x100000 - lowmem); + /* Reserve all memory between bios_start and the 1MB mark: */ + memblock_reserve(bios_start, 0x100000 - bios_start); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97027545a72d..3fc03a09a93b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,10 +8,14 @@ #include <asm/fpu/internal.h> #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> +#include <asm/fpu/types.h> #include <asm/traps.h> #include <linux/hardirq.h> +#define CREATE_TRACE_POINTS +#include <asm/trace/fpu.h> + /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: @@ -192,6 +196,7 @@ void fpu__save(struct fpu *fpu) WARN_ON_FPU(fpu != ¤t->thread.fpu); preempt_disable(); + trace_x86_fpu_before_save(fpu); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { if (use_eager_fpu()) @@ -200,6 +205,7 @@ void fpu__save(struct fpu *fpu) fpregs_deactivate(fpu); } } + trace_x86_fpu_after_save(fpu); preempt_enable(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -222,7 +228,14 @@ void fpstate_init(union fpregs_state *state) return; } - memset(state, 0, xstate_size); + memset(state, 0, fpu_kernel_xstate_size); + + /* + * XRSTORS requires that this bit is set in xcomp_bv, or + * it will #GP. Make sure it is replaced after the memset(). + */ + if (static_cpu_has(X86_FEATURE_XSAVES)) + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); @@ -247,7 +260,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * leak into the child task: */ if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, xstate_size); + memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* * Save current FPU registers directly into the child @@ -266,7 +279,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) */ preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); + memcpy(&src_fpu->state, &dst_fpu->state, + fpu_kernel_xstate_size); if (use_eager_fpu()) copy_kernel_to_fpregs(&src_fpu->state); @@ -275,6 +289,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) } preempt_enable(); + trace_x86_fpu_copy_src(src_fpu); + trace_x86_fpu_copy_dst(dst_fpu); + return 0; } @@ -288,7 +305,9 @@ void fpu__activate_curr(struct fpu *fpu) if (!fpu->fpstate_active) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for the current task: */ fpu->fpstate_active = 1; } @@ -314,7 +333,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu) } else { if (!fpu->fpstate_active) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for current and for stopped child tasks: */ fpu->fpstate_active = 1; } @@ -347,7 +368,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu) fpu->last_cpu = -1; } else { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for stopped child tasks: */ fpu->fpstate_active = 1; } @@ -432,9 +455,11 @@ void fpu__restore(struct fpu *fpu) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); + trace_x86_fpu_before_restore(fpu); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); fpu->counter++; + trace_x86_fpu_after_restore(fpu); kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(fpu__restore); @@ -463,6 +488,8 @@ void fpu__drop(struct fpu *fpu) fpu->fpstate_active = 0; + trace_x86_fpu_dropped(fpu); + preempt_enable(); } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index aacfd7a82cec..93982aebb398 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void) * This is inherent to the XSAVE architecture which puts all state * components into a single, continuous memory block: */ -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); +unsigned int fpu_kernel_xstate_size; +EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); /* Get alignment of the TYPE. */ #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) @@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void) * Add back the dynamically-calculated register state * size. */ - task_size += xstate_size; + task_size += fpu_kernel_xstate_size; /* * We dynamically size 'struct fpu', so we require that @@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void) } /* - * Set up the xstate_size based on the legacy FPU context size. + * Set up the user and kernel xstate sizes based on the legacy FPU context size. * * We set this up first, and later it will be overwritten by * fpu__init_system_xstate() if the CPU knows about xstates. @@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) on_boot_cpu = 0; /* - * Note that xstate_size might be overwriten later during + * Note that xstate sizes might be overwritten later during * fpu__init_system_xstate(). */ @@ -219,27 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct swregs_state); + fpu_kernel_xstate_size = sizeof(struct swregs_state); } else { if (boot_cpu_has(X86_FEATURE_FXSR)) - xstate_size = sizeof(struct fxregs_state); + fpu_kernel_xstate_size = + sizeof(struct fxregs_state); else - xstate_size = sizeof(struct fregs_state); + fpu_kernel_xstate_size = + sizeof(struct fregs_state); } - /* - * Quirk: we don't yet handle the XSAVES* instructions - * correctly, as we don't correctly convert between - * standard and compacted format when interfacing - * with user-space - so disable it for now. - * - * The difference is small: with recent CPUs the - * compacted format is only marginally smaller than - * the standard FPU state format. - * - * ( This is easy to backport while we are fixing - * XSAVES* support. ) - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + fpu_user_xstate_size = fpu_kernel_xstate_size; } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 81422dfb152b..c114b132d121 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -4,6 +4,7 @@ #include <asm/fpu/internal.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> +#include <asm/fpu/xstate.h> /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, @@ -85,21 +86,26 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; - fpu__activate_fpstate_read(fpu); - xsave = &fpu->state.xsave; - /* - * Copy the 48bytes defined by the software first into the xstate - * memory layout in the thread struct, so that we can copy the entire - * xstateregs to the user using one user_regset_copyout(). - */ - memcpy(&xsave->i387.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - /* - * Copy the xstate memory layout. - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + fpu__activate_fpstate_read(fpu); + + if (using_compacted_format()) { + ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave); + } else { + fpstate_sanitize_xstate(fpu); + /* + * Copy the 48 bytes defined by the software into the xsave + * area in the thread struct, so that we can copy the whole + * area to user using one user_regset_copyout(). + */ + memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); + + /* + * Copy the xstate memory layout. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + } return ret; } @@ -114,11 +120,27 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!boot_cpu_has(X86_FEATURE_XSAVE)) return -ENODEV; - fpu__activate_fpstate_write(fpu); + /* + * A whole standard-format XSAVE buffer is needed: + */ + if ((pos != 0) || (count < fpu_user_xstate_size)) + return -EFAULT; xsave = &fpu->state.xsave; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + fpu__activate_fpstate_write(fpu); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + ret = copyin_to_xsaves(kbuf, ubuf, xsave); + else + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + + /* + * In case of failure, mark all states as init: + */ + if (ret) + fpstate_init(&fpu->state); + /* * mxcsr reserved bits must be masked to zero for security reasons. */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 31c6a60505e6..9e231d88bb33 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -8,8 +8,10 @@ #include <asm/fpu/internal.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> +#include <asm/fpu/xstate.h> #include <asm/sigframe.h> +#include <asm/trace/fpu.h> static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; @@ -31,7 +33,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf, /* Check for the first magic field and other error scenarios. */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fpu_user_xstate_size || fx_sw->xstate_size > fx_sw->extended_size) return -1; @@ -88,7 +90,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) if (!use_xsave()) return err; - err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); + err |= __put_user(FP_XSTATE_MAGIC2, + (__u32 *)(buf + fpu_user_xstate_size)); /* * Read the xfeatures which we copied (directly from the cpu or @@ -125,7 +128,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) else err = copy_fregs_to_user((struct fregs_state __user *) buf); - if (unlikely(err) && __clear_user(buf, xstate_size)) + if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size)) err = -EFAULT; return err; } @@ -167,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpregs_active()) { + if (fpregs_active() || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; @@ -175,8 +178,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (ia32_fxstate) copy_fxregs_to_kernel(&tsk->thread.fpu); } else { + /* + * It is a *bug* if kernel uses compacted-format for xsave + * area and we copy it out directly to a signal frame. It + * should have been handled above by saving the registers + * directly. + */ + if (boot_cpu_has(X86_FEATURE_XSAVES)) { + WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); + return -1; + } + fpstate_sanitize_xstate(&tsk->thread.fpu); - if (__copy_to_user(buf_fx, xsave, xstate_size)) + if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) return -1; } @@ -250,7 +264,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - int state_size = xstate_size; + int state_size = fpu_kernel_xstate_size; u64 xfeatures = 0; int fx_only = 0; @@ -282,6 +296,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ state_size = sizeof(struct fxregs_state); fx_only = 1; + trace_x86_fpu_xstate_check_failed(fpu); } else { state_size = fx_sw_user.xstate_size; xfeatures = fx_sw_user.xfeatures; @@ -308,9 +323,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpu__drop(fpu); - if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { + if (using_compacted_format()) { + err = copyin_to_xsaves(NULL, buf_fx, + &fpu->state.xsave); + } else { + err = __copy_from_user(&fpu->state.xsave, + buf_fx, state_size); + } + + if (err || __copy_from_user(&env, buf, sizeof(env))) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); @@ -341,7 +364,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) static inline int xstate_sigframe_size(void) { - return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; + return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : + fpu_user_xstate_size; } /* @@ -385,12 +409,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, */ void fpu__init_prepare_fx_sw_frame(void) { - int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; + int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; fx_sw_reserved.xfeatures = xfeatures_mask; - fx_sw_reserved.xstate_size = xstate_size; + fx_sw_reserved.xstate_size = fpu_user_xstate_size; if (config_enabled(CONFIG_IA32_EMULATION) || config_enabled(CONFIG_X86_32)) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4ea2a59483c7..680049aa4593 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -11,6 +11,7 @@ #include <asm/fpu/internal.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> +#include <asm/fpu/xstate.h> #include <asm/tlbflush.h> @@ -44,6 +45,13 @@ static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; /* + * The XSAVE area of kernel can be in standard or compacted format; + * it is always in standard format for user mode. This is the user + * mode standard format size used for signal and ptrace frames. + */ +unsigned int fpu_user_xstate_size; + +/* * Clear all of the X86_FEATURE_* bits that are unavailable * when the CPU has no XSAVE support. */ @@ -105,6 +113,27 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) } EXPORT_SYMBOL_GPL(cpu_has_xfeatures); +static int xfeature_is_supervisor(int xfeature_nr) +{ + /* + * We currently do not support supervisor states, but if + * we did, we could find out like this. + * + * SDM says: If state component 'i' is a user state component, + * ECX[0] return 0; if state component i is a supervisor + * state component, ECX[0] returns 1. + */ + u32 eax, ebx, ecx, edx; + + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + return !!(ecx & 1); +} + +static int xfeature_is_user(int xfeature_nr) +{ + return !xfeature_is_supervisor(xfeature_nr); +} + /* * When executing XSAVEOPT (or other optimized XSAVE instructions), if * a processor implementation detects that an FPU state component is still @@ -171,7 +200,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) */ while (xfeatures) { if (xfeatures & 0x1) { - int offset = xstate_offsets[feature_bit]; + int offset = xstate_comp_offsets[feature_bit]; int size = xstate_sizes[feature_bit]; memcpy((void *)fx + offset, @@ -192,6 +221,15 @@ void fpu__init_cpu_xstate(void) { if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) return; + /* + * Make it clear that XSAVES supervisor states are not yet + * implemented should anyone expect it to work by changing + * bits in XFEATURE_MASK_* macros and XCR0. + */ + WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), + "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); + + xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; cr4_set_bits(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); @@ -217,13 +255,29 @@ static void __init setup_xstate_features(void) /* start at the beginnning of the "extended state" */ unsigned int last_good_offset = offsetof(struct xregs_state, extended_state_area); + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_offsets[0] = 0; + xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space); + xstate_offsets[1] = xstate_sizes[0]; + xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space); for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { if (!xfeature_enabled(i)) continue; cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); - xstate_offsets[i] = ebx; + + /* + * If an xfeature is supervisor state, the offset + * in EBX is invalid. We leave it to -1. + */ + if (xfeature_is_user(i)) + xstate_offsets[i] = ebx; + xstate_sizes[i] = eax; /* * In our xstate size checks, we assume that the @@ -233,8 +287,6 @@ static void __init setup_xstate_features(void) WARN_ONCE(last_good_offset > xstate_offsets[i], "x86/fpu: misordered xstate at %d\n", last_good_offset); last_good_offset = xstate_offsets[i]; - - printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax); } } @@ -263,6 +315,33 @@ static void __init print_xstate_features(void) } /* + * This check is important because it is easy to get XSTATE_* + * confused with XSTATE_BIT_*. + */ +#define CHECK_XFEATURE(nr) do { \ + WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ + WARN_ON(nr >= XFEATURE_MAX); \ +} while (0) + +/* + * We could cache this like xstate_size[], but we only use + * it here, so it would be a waste of space. + */ +static int xfeature_is_aligned(int xfeature_nr) +{ + u32 eax, ebx, ecx, edx; + + CHECK_XFEATURE(xfeature_nr); + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + /* + * The value returned by ECX[1] indicates the alignment + * of state component 'i' when the compacted format + * of the extended region of an XSAVE area is used: + */ + return !!(ecx & 2); +} + +/* * This function sets up offsets and sizes of all extended states in * xsave area. This supports both standard format and compacted format * of the xsave aread. @@ -299,10 +378,29 @@ static void __init setup_xstate_comp(void) else xstate_comp_sizes[i] = 0; - if (i > FIRST_EXTENDED_XFEATURE) + if (i > FIRST_EXTENDED_XFEATURE) { xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + xstate_comp_sizes[i-1]; + if (xfeature_is_aligned(i)) + xstate_comp_offsets[i] = + ALIGN(xstate_comp_offsets[i], 64); + } + } +} + +/* + * Print out xstate component offsets and sizes + */ +static void __init print_xstate_offset_size(void) +{ + int i; + + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (!xfeature_enabled(i)) + continue; + pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", + i, xstate_comp_offsets[i], i, xstate_sizes[i]); } } @@ -322,13 +420,11 @@ static void __init setup_init_fpu_buf(void) setup_xstate_features(); print_xstate_features(); - if (boot_cpu_has(X86_FEATURE_XSAVES)) { + if (boot_cpu_has(X86_FEATURE_XSAVES)) init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; - init_fpstate.xsave.header.xfeatures = xfeatures_mask; - } /* - * Init all the features state with header_bv being 0x0 + * Init all the features state with header.xfeatures being 0x0 */ copy_kernel_to_xregs_booting(&init_fpstate.xsave); @@ -339,58 +435,19 @@ static void __init setup_init_fpu_buf(void) copy_xregs_to_kernel_booting(&init_fpstate.xsave); } -static int xfeature_is_supervisor(int xfeature_nr) -{ - /* - * We currently do not support supervisor states, but if - * we did, we could find out like this. - * - * SDM says: If state component i is a user state component, - * ECX[0] return 0; if state component i is a supervisor - * state component, ECX[0] returns 1. - u32 eax, ebx, ecx, edx; - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx; - return !!(ecx & 1); - */ - return 0; -} -/* -static int xfeature_is_user(int xfeature_nr) -{ - return !xfeature_is_supervisor(xfeature_nr); -} -*/ - -/* - * This check is important because it is easy to get XSTATE_* - * confused with XSTATE_BIT_*. - */ -#define CHECK_XFEATURE(nr) do { \ - WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ - WARN_ON(nr >= XFEATURE_MAX); \ -} while (0) - -/* - * We could cache this like xstate_size[], but we only use - * it here, so it would be a waste of space. - */ -static int xfeature_is_aligned(int xfeature_nr) +static int xfeature_uncompacted_offset(int xfeature_nr) { u32 eax, ebx, ecx, edx; - CHECK_XFEATURE(xfeature_nr); - cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); /* - * The value returned by ECX[1] indicates the alignment - * of state component i when the compacted format - * of the extended region of an XSAVE area is used + * Only XSAVES supports supervisor states and it uses compacted + * format. Checking a supervisor state's uncompacted offset is + * an error. */ - return !!(ecx & 2); -} - -static int xfeature_uncompacted_offset(int xfeature_nr) -{ - u32 eax, ebx, ecx, edx; + if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) { + WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); + return -1; + } CHECK_XFEATURE(xfeature_nr); cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); @@ -415,7 +472,7 @@ static int xfeature_size(int xfeature_nr) * that it is obvious which aspect of 'XSAVES' is being handled * by the calling code. */ -static int using_compacted_format(void) +int using_compacted_format(void) { return boot_cpu_has(X86_FEATURE_XSAVES); } @@ -530,11 +587,12 @@ static void do_extra_xstate_size_checks(void) */ paranoid_xstate_size += xfeature_size(i); } - XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); + XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); } + /* - * Calculate total size of enabled xstates in XCR0/xfeatures_mask. + * Get total size of enabled xstates in XCR0/xfeatures_mask. * * Note the SDM's wording here. "sub-function 0" only enumerates * the size of the *user* states. If we use it to size a buffer @@ -544,34 +602,33 @@ static void do_extra_xstate_size_checks(void) * Note that we do not currently set any bits on IA32_XSS so * 'XCR0 | IA32_XSS == XCR0' for now. */ -static unsigned int __init calculate_xstate_size(void) +static unsigned int __init get_xsaves_size(void) { unsigned int eax, ebx, ecx, edx; - unsigned int calculated_xstate_size; + /* + * - CPUID function 0DH, sub-function 1: + * EBX enumerates the size (in bytes) required by + * the XSAVES instruction for an XSAVE area + * containing all the state components + * corresponding to bits currently set in + * XCR0 | IA32_XSS. + */ + cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); + return ebx; +} - if (!boot_cpu_has(X86_FEATURE_XSAVES)) { - /* - * - CPUID function 0DH, sub-function 0: - * EBX enumerates the size (in bytes) required by - * the XSAVE instruction for an XSAVE area - * containing all the *user* state components - * corresponding to bits currently set in XCR0. - */ - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - calculated_xstate_size = ebx; - } else { - /* - * - CPUID function 0DH, sub-function 1: - * EBX enumerates the size (in bytes) required by - * the XSAVES instruction for an XSAVE area - * containing all the state components - * corresponding to bits currently set in - * XCR0 | IA32_XSS. - */ - cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); - calculated_xstate_size = ebx; - } - return calculated_xstate_size; +static unsigned int __init get_xsave_size(void) +{ + unsigned int eax, ebx, ecx, edx; + /* + * - CPUID function 0DH, sub-function 0: + * EBX enumerates the size (in bytes) required by + * the XSAVE instruction for an XSAVE area + * containing all the *user* state components + * corresponding to bits currently set in XCR0. + */ + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + return ebx; } /* @@ -591,7 +648,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size) static int init_xstate_size(void) { /* Recompute the context size for enabled features: */ - unsigned int possible_xstate_size = calculate_xstate_size(); + unsigned int possible_xstate_size; + unsigned int xsave_size; + + xsave_size = get_xsave_size(); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + possible_xstate_size = get_xsaves_size(); + else + possible_xstate_size = xsave_size; /* Ensure we have the space to store all enabled: */ if (!is_supported_xstate_size(possible_xstate_size)) @@ -601,8 +666,13 @@ static int init_xstate_size(void) * The size is OK, we are definitely going to use xsave, * make it known to the world that we need more space. */ - xstate_size = possible_xstate_size; + fpu_kernel_xstate_size = possible_xstate_size; do_extra_xstate_size_checks(); + + /* + * User space is always in standard format. + */ + fpu_user_xstate_size = xsave_size; return 0; } @@ -644,8 +714,13 @@ void __init fpu__init_system_xstate(void) xfeatures_mask = eax + ((u64)edx << 32); if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { + /* + * This indicates that something really unexpected happened + * with the enumeration. Disable XSAVE and try to continue + * booting without it. This is too early to BUG(). + */ pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); - BUG(); + goto out_disable; } xfeatures_mask &= fpu__get_supported_xfeatures_mask(); @@ -653,21 +728,29 @@ void __init fpu__init_system_xstate(void) /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); err = init_xstate_size(); - if (err) { - /* something went wrong, boot without any XSAVE support */ - fpu__init_disable_system_xstate(); - return; - } + if (err) + goto out_disable; + + /* + * Update info used for ptrace frames; use standard-format size and no + * supervisor xstates: + */ + update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR); - update_regset_xstate_info(xstate_size, xfeatures_mask); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp(); + print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, - xstate_size, + fpu_kernel_xstate_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); + return; + +out_disable: + /* something went wrong, try to boot without any XSAVE support */ + fpu__init_disable_system_xstate(); } /* @@ -693,6 +776,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) { int feature_nr = fls64(xstate_feature_mask) - 1; + if (!xfeature_enabled(feature_nr)) { + WARN_ON_FPU(1); + return NULL; + } + return (void *)xsave + xstate_comp_offsets[feature_nr]; } /* @@ -887,16 +975,16 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; - /* Set the bits we need in PKRU */ + /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) new_pkru_bits |= PKRU_AD_BIT; if (init_val & PKEY_DISABLE_WRITE) new_pkru_bits |= PKRU_WD_BIT; - /* Shift the bits in to the correct place in PKRU for pkey. */ + /* Shift the bits in to the correct place in PKRU for pkey: */ new_pkru_bits <<= pkey_shift; - /* Locate old copy of the state in the xsave buffer */ + /* Locate old copy of the state in the xsave buffer: */ old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); /* @@ -909,9 +997,10 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, else new_pkru_state.pkru = old_pkru_state->pkru; - /* mask off any old bits in place */ + /* Mask off any old bits in place: */ new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); - /* Set the newly-requested bits */ + + /* Set the newly-requested bits: */ new_pkru_state.pkru |= new_pkru_bits; /* @@ -925,8 +1014,168 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, */ new_pkru_state.pad = 0; - fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, - sizeof(new_pkru_state)); + fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); + + return 0; +} + +/* + * This is similar to user_regset_copyout(), but will not add offset to + * the source data pointer or increment pos, count, kbuf, and ubuf. + */ +static inline int xstate_copyout(unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf, + const void *data, const int start_pos, + const int end_pos) +{ + if ((count == 0) || (pos < start_pos)) + return 0; + + if (end_pos < 0 || pos < end_pos) { + unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); + + if (kbuf) { + memcpy(kbuf + pos, data, copy); + } else { + if (__copy_to_user(ubuf + pos, data, copy)) + return -EFAULT; + } + } + return 0; +} + +/* + * Convert from kernel XSAVES compacted format to standard format and copy + * to a ptrace buffer. It supports partial copy but pos always starts from + * zero. This is called from xstateregs_get() and there we check the CPU + * has XSAVES. + */ +int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, + void __user *ubuf, struct xregs_state *xsave) +{ + unsigned int offset, size; + int ret, i; + struct xstate_header header; + + /* + * Currently copy_regset_to_user() starts from pos 0: + */ + if (unlikely(pos != 0)) + return -EFAULT; + + /* + * The destination is a ptrace buffer; we put in only user xstates: + */ + memset(&header, 0, sizeof(header)); + header.xfeatures = xsave->header.xfeatures; + header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + + /* + * Copy xregs_state->header: + */ + offset = offsetof(struct xregs_state, header); + size = sizeof(header); + + ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count); + + if (ret) + return ret; + + for (i = 0; i < XFEATURE_MAX; i++) { + /* + * Copy only in-use xstates: + */ + if ((header.xfeatures >> i) & 1) { + void *src = __raw_xsave_addr(xsave, 1 << i); + + offset = xstate_offsets[i]; + size = xstate_sizes[i]; + + ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count); + + if (ret) + return ret; + + if (offset + size >= count) + break; + } + + } + + /* + * Fill xsave->i387.sw_reserved value for ptrace frame: + */ + offset = offsetof(struct fxregs_state, sw_reserved); + size = sizeof(xstate_fx_sw_bytes); + + ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); + + if (ret) + return ret; + + return 0; +} + +/* + * Convert from a ptrace standard-format buffer to kernel XSAVES format + * and copy to the target thread. This is called from xstateregs_set() and + * there we check the CPU has XSAVES and a whole standard-sized buffer + * exists. + */ +int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, + struct xregs_state *xsave) +{ + unsigned int offset, size; + int i; + u64 xfeatures; + u64 allowed_features; + + offset = offsetof(struct xregs_state, header); + size = sizeof(xfeatures); + + if (kbuf) { + memcpy(&xfeatures, kbuf + offset, size); + } else { + if (__copy_from_user(&xfeatures, ubuf + offset, size)) + return -EFAULT; + } + + /* + * Reject if the user sets any disabled or supervisor features: + */ + allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; + + if (xfeatures & ~allowed_features) + return -EINVAL; + + for (i = 0; i < XFEATURE_MAX; i++) { + u64 mask = ((u64)1 << i); + + if (xfeatures & mask) { + void *dst = __raw_xsave_addr(xsave, 1 << i); + + offset = xstate_offsets[i]; + size = xstate_sizes[i]; + + if (kbuf) { + memcpy(dst, kbuf + offset, size); + } else { + if (__copy_from_user(dst, ubuf + offset, size)) + return -EFAULT; + } + } + } + + /* + * The state that came in from userspace was user-state only. + * Mask all the user states out of 'xfeatures': + */ + xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; + + /* + * Add back in the features that came in from userspace: + */ + xsave->header.xfeatures |= xfeatures; return 0; } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index d784bb547a9d..2dda0bc4576e 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -26,7 +26,7 @@ static void __init i386_default_early_setup(void) x86_init.resources.reserve_resources = i386_reserve_resources; x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; - reserve_ebda_region(); + reserve_bios_regions(); } asmlinkage __visible void __init i386_start_kernel(void) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b72fb0b71dd1..99d48e7d2974 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -183,7 +183,7 @@ void __init x86_64_start_reservations(char *real_mode_data) copy_bootdata(__va(real_mode_data)); x86_early_init_platform_quirks(); - reserve_ebda_region(); + reserve_bios_regions(); switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_INTEL_MID: diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 5df831ef1442..9f8efc9f0075 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -38,7 +38,7 @@ #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) +L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) @@ -299,6 +299,7 @@ ENTRY(secondary_startup_64) pushq $__KERNEL_CS # set correct cs pushq %rax # target address in negative space lretq +ENDPROC(secondary_startup_64) #include "verify_cpu.S" diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f112af7aa62e..3d747070fe67 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -710,31 +710,29 @@ static void hpet_work(struct work_struct *w) complete(&hpet_work->complete); } -static int hpet_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) +static int hpet_cpuhp_online(unsigned int cpu) { - unsigned long cpu = (unsigned long)hcpu; struct hpet_work_struct work; + + INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); + init_completion(&work.complete); + /* FIXME: add schedule_work_on() */ + schedule_delayed_work_on(cpu, &work.work, 0); + wait_for_completion(&work.complete); + destroy_delayed_work_on_stack(&work.work); + return 0; +} + +static int hpet_cpuhp_dead(unsigned int cpu) +{ struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); - init_completion(&work.complete); - /* FIXME: add schedule_work_on() */ - schedule_delayed_work_on(cpu, &work.work, 0); - wait_for_completion(&work.complete); - destroy_delayed_work_on_stack(&work.work); - break; - case CPU_DEAD: - if (hdev) { - free_irq(hdev->irq, hdev); - hdev->flags &= ~HPET_DEV_USED; - per_cpu(cpu_hpet_dev, cpu) = NULL; - } - break; - } - return NOTIFY_OK; + if (!hdev) + return 0; + free_irq(hdev->irq, hdev); + hdev->flags &= ~HPET_DEV_USED; + per_cpu(cpu_hpet_dev, cpu) = NULL; + return 0; } #else @@ -750,11 +748,8 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) } #endif -static int hpet_cpuhp_notify(struct notifier_block *n, - unsigned long action, void *hcpu) -{ - return NOTIFY_OK; -} +#define hpet_cpuhp_online NULL +#define hpet_cpuhp_dead NULL #endif @@ -931,7 +926,7 @@ out_nohpet: */ static __init int hpet_late_init(void) { - int cpu; + int ret; if (boot_hpet_disable) return -ENODEV; @@ -961,16 +956,20 @@ static __init int hpet_late_init(void) if (boot_cpu_has(X86_FEATURE_ARAT)) return 0; - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) { - hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); - } - /* This notifier should be called after workqueue is ready */ - __hotcpu_notifier(hpet_cpuhp_notify, -20); - cpu_notifier_register_done(); - + ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "AP_X86_HPET_ONLINE", + hpet_cpuhp_online, NULL); + if (ret) + return ret; + ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "X86_HPET_DEAD", NULL, + hpet_cpuhp_dead); + if (ret) + goto err_cpuhp; return 0; + +err_cpuhp: + cpuhp_remove_state(CPUHP_AP_X86_HPET_ONLINE); + return ret; } fs_initcall(hpet_late_init); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 2bcfb5f2bc44..8771766d46b6 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -36,13 +36,14 @@ #include <linux/percpu.h> #include <linux/kdebug.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/smp.h> #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/debugreg.h> +#include <asm/user.h> /* Per cpu debug control register value */ DEFINE_PER_CPU(unsigned long, cpu_dr7); diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 64341aa485ae..1f9b878ef5ef 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,4 +1,5 @@ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/spinlock_types.h> #include <asm/checksum.h> #include <asm/pgtable.h> @@ -42,3 +43,5 @@ EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(___preempt_schedule); EXPORT_SYMBOL(___preempt_schedule_notrace); #endif + +EXPORT_SYMBOL(__sw_hweight32); diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index efb82f07b29c..6ebe00cb4a3b 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -3,7 +3,7 @@ * */ #include <linux/clockchips.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/timex.h> #include <linux/i8253.h> diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index a979b5bd2fc0..50c89e8a95f2 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -6,7 +6,7 @@ * outb_p/inb_p API uses. */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/dmi.h> diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index c627bf8d98ad..1f38d9a4d9de 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -8,7 +8,6 @@ * io_apic.c.) */ -#include <linux/module.h> #include <linux/seq_file.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 206d0b90a3ab..4a7903714065 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -11,7 +11,6 @@ #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/seq_file.h> -#include <linux/module.h> #include <linux/delay.h> #include <linux/ftrace.h> #include <linux/uaccess.h> diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index dc1404bf8e4b..bdb83e431d89 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -8,7 +8,7 @@ */ #include <linux/debugfs.h> #include <linux/uaccess.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/stat.h> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index eea2a6f72b31..1726c4c12336 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -21,7 +21,7 @@ */ #include <linux/context_tracking.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/kernel.h> #include <linux/kvm_para.h> #include <linux/cpu.h> @@ -301,8 +301,6 @@ static void kvm_register_steal_time(void) if (!has_steal_clock) return; - memset(st, 0, sizeof(*st)); - wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); pr_info("kvm-stealtime: cpu %d, msr %llx\n", cpu, (unsigned long long) slow_virt_to_phys(st)); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 97340f2c437c..068c4a929de6 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -16,7 +16,6 @@ #include <linux/mc146818rtc.h> #include <linux/bitops.h> #include <linux/acpi.h> -#include <linux/module.h> #include <linux/smp.h> #include <linux/pci.h> diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 33ee3e0efd65..1939a0269377 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -3,7 +3,7 @@ * compiled in a FTRACE-compatible way. */ #include <linux/spinlock.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/jump_label.h> #include <asm/paravirt.h> diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 7b3b3f24c3ea..ad5bc9578a73 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -19,7 +19,8 @@ */ #include <linux/errno.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/efi.h> #include <linux/bcd.h> #include <linux/highmem.h> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 7c577a178859..5069ef560d83 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -2,7 +2,7 @@ #include <linux/pci.h> #include <linux/cache.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/swiotlb.h> #include <linux/bootmem.h> #include <linux/dma-mapping.h> diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b2f8a33b36ff..24a50301f150 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -7,12 +7,12 @@ void __init x86_early_init_platform_quirks(void) { x86_platform.legacy.rtc = 1; - x86_platform.legacy.ebda_search = 0; + x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 1; switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_PC: - x86_platform.legacy.ebda_search = 1; + x86_platform.legacy.reserve_bios_regions = 1; break; case X86_SUBARCH_XEN: case X86_SUBARCH_LGUEST: diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 92f70147a9a6..0c5315d322c8 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -3,7 +3,7 @@ * Copyright (c) 2015, Intel Corporation. */ #include <linux/platform_device.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/ioport.h> static int found(u64 start, u64 end, void *data) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 96becbbb52e0..62c0b0ea2ce4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,7 +7,8 @@ #include <linux/prctl.h> #include <linux/slab.h> #include <linux/sched.h> -#include <linux/module.h> +#include <linux/init.h> +#include <linux/export.h> #include <linux/pm.h> #include <linux/tick.h> #include <linux/random.h> @@ -404,7 +405,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) if (c->x86_vendor != X86_VENDOR_INTEL) return 0; - if (!cpu_has(c, X86_FEATURE_MWAIT)) + if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR)) return 0; return 1; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9f950917528b..d86be29c38c7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -25,7 +25,7 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/mc146818rtc.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/personality.h> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6e789ca1f841..63236d8f84bf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -26,7 +26,7 @@ #include <linux/user.h> #include <linux/interrupt.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/ptrace.h> #include <linux/notifier.h> #include <linux/kprobes.h> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a9b31eb815f2..63bf27d972b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -1,6 +1,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include <linux/module.h> +#include <linux/export.h> #include <linux/reboot.h> #include <linux/init.h> #include <linux/pm.h> @@ -55,6 +55,19 @@ bool port_cf9_safe = false; */ /* + * Some machines require the "reboot=a" commandline options + */ +static int __init set_acpi_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_ACPI) { + reboot_type = BOOT_ACPI; + pr_info("%s series board detected. Selecting %s-method for reboots.\n", + d->ident, "ACPI"); + } + return 0; +} + +/* * Some machines require the "reboot=b" or "reboot=k" commandline options, * this quirk makes that automatic. */ @@ -395,6 +408,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 7450 AIO */ + .callback = set_acpi_reboot, + .ident = "Dell OptiPlex 7450 AIO", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"), + }, + }, /* Hewlett-Packard */ { /* Handle problems with rebooting on HP laptops */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c4e7b3991b60..991b77986d57 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -36,7 +36,7 @@ #include <linux/console.h> #include <linux/root_dev.h> #include <linux/highmem.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/efi.h> #include <linux/init.h> #include <linux/edd.h> @@ -113,6 +113,7 @@ #include <asm/prom.h> #include <asm/microcode.h> #include <asm/mmu_context.h> +#include <asm/kaslr.h> /* * max_low_pfn_mapped: highest direct mapped pfn under 4GB @@ -399,10 +400,6 @@ static void __init reserve_initrd(void) memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); } -static void __init early_initrd_acpi_init(void) -{ - early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start); -} #else static void __init early_reserve_initrd(void) { @@ -410,9 +407,6 @@ static void __init early_reserve_initrd(void) static void __init reserve_initrd(void) { } -static void __init early_initrd_acpi_init(void) -{ -} #endif /* CONFIG_BLK_DEV_INITRD */ static void __init parse_setup_data(void) @@ -942,6 +936,8 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); + kernel_randomize_memory(); + iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; setup_memory_map(); parse_setup_data(); @@ -1146,7 +1142,7 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); - early_initrd_acpi_init(); + acpi_table_upgrade(); vsmp_init(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e4fcb87ba7a6..7a40e068302d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -236,6 +236,8 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); + per_cpu(x86_cpu_to_acpiid, cpu) = + early_per_cpu_map(x86_cpu_to_acpiid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = @@ -271,6 +273,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; + early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index dc3c0b1c816f..b44564bf86a8 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -1,11 +1,104 @@ #include <linux/compat.h> #include <linux/uaccess.h> +/* + * The compat_siginfo_t structure and handing code is very easy + * to break in several ways. It must always be updated when new + * updates are made to the main siginfo_t, and + * copy_siginfo_to_user32() must be updated when the + * (arch-independent) copy_siginfo_to_user() is updated. + * + * It is also easy to put a new member in the compat_siginfo_t + * which has implicit alignment which can move internal structure + * alignment around breaking the ABI. This can happen if you, + * for instance, put a plain 64-bit value in there. + */ +static inline void signal_compat_build_tests(void) +{ + int _sifields_offset = offsetof(compat_siginfo_t, _sifields); + + /* + * If adding a new si_code, there is probably new data in + * the siginfo. Make sure folks bumping the si_code + * limits also have to look at this code. Make sure any + * new fields are handled in copy_siginfo_to_user32()! + */ + BUILD_BUG_ON(NSIGILL != 8); + BUILD_BUG_ON(NSIGFPE != 8); + BUILD_BUG_ON(NSIGSEGV != 4); + BUILD_BUG_ON(NSIGBUS != 5); + BUILD_BUG_ON(NSIGTRAP != 4); + BUILD_BUG_ON(NSIGCHLD != 6); + BUILD_BUG_ON(NSIGSYS != 1); + + /* This is part of the ABI and can never change in size: */ + BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128); + /* + * The offsets of all the (unioned) si_fields are fixed + * in the ABI, of course. Make sure none of them ever + * move and are always at the beginning: + */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); +#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + + /* + * Ensure that the size of each si_field never changes. + * If it does, it is a sign that the + * copy_siginfo_to_user32() code below needs to updated + * along with the size in the CHECK_SI_SIZE(). + * + * We repeat this check for both the generic and compat + * siginfos. + * + * Note: it is OK for these to grow as long as the whole + * structure stays within the padding size (checked + * above). + */ +#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name)) +#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name)) + + CHECK_CSI_OFFSET(_kill); + CHECK_CSI_SIZE (_kill, 2*sizeof(int)); + CHECK_SI_SIZE (_kill, 2*sizeof(int)); + + CHECK_CSI_OFFSET(_timer); + CHECK_CSI_SIZE (_timer, 5*sizeof(int)); + CHECK_SI_SIZE (_timer, 6*sizeof(int)); + + CHECK_CSI_OFFSET(_rt); + CHECK_CSI_SIZE (_rt, 3*sizeof(int)); + CHECK_SI_SIZE (_rt, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld); + CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); + CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld_x32); + CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); + /* no _sigchld_x32 in the generic siginfo_t */ + + CHECK_CSI_OFFSET(_sigfault); + CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); + CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigpoll); + CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); + CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigsys); + CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); + CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + + /* any new si_fields should be added here */ +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err = 0; bool ia32 = test_thread_flag(TIF_IA32); + signal_compat_build_tests(); + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; @@ -32,6 +125,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || + from->si_code == BUS_MCEERR_AO)) + put_user_ex(from->si_addr_lsb, &to->si_addr_lsb); + + if (from->si_signo == SIGSEGV) { + if (from->si_code == SEGV_BNDERR) { + compat_uptr_t lower = (unsigned long)&to->si_lower; + compat_uptr_t upper = (unsigned long)&to->si_upper; + put_user_ex(lower, &to->si_lower); + put_user_ex(upper, &to->si_upper); + } + if (from->si_code == SEGV_PKUERR) + put_user_ex(from->si_pkey, &to->si_pkey); + } break; case __SI_SYS >> 16: put_user_ex(from->si_syscall, &to->si_syscall); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..2a6e84a30a54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -43,7 +43,7 @@ #include <linux/init.h> #include <linux/smp.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/percpu.h> #include <linux/bootmem.h> @@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +/* Maximum number of SMT threads on any online core */ +int __max_smt_threads __read_mostly; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu) bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; - int i; + int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); @@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu) if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); } + + threads = cpumask_weight(topology_sibling_cpumask(cpu)); + if (threads > __max_smt_threads) + __max_smt_threads = threads; } /* maps the cpu to the sched domain representing multi-core */ @@ -1285,7 +1292,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) cpumask_copy(cpu_callin_mask, cpumask_of(0)); mb(); - current_thread_info()->cpu = 0; /* needed? */ for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); @@ -1441,6 +1447,21 @@ __init void prefill_possible_map(void) #ifdef CONFIG_HOTPLUG_CPU +/* Recompute SMT state for all CPUs on offline */ +static void recompute_smt_state(void) +{ + int max_threads, cpu; + + max_threads = 0; + for_each_online_cpu (cpu) { + int threads = cpumask_weight(topology_sibling_cpumask(cpu)); + + if (threads > max_threads) + max_threads = threads; + } + __max_smt_threads = max_threads; +} + static void remove_siblinginfo(int cpu) { int sibling; @@ -1465,6 +1486,7 @@ static void remove_siblinginfo(int cpu) c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); + recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) @@ -1622,7 +1644,7 @@ static inline void mwait_play_dead(void) } } -static inline void hlt_play_dead(void) +void hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 9ee98eefc44d..4738f5e0f2ab 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -5,7 +5,7 @@ */ #include <linux/sched.h> #include <linux/stacktrace.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/uaccess.h> #include <asm/stacktrace.h> diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 9b0185fbe3eb..654f6c66fe45 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -323,25 +323,16 @@ static int tboot_wait_for_aps(int num_aps) return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); } -static int tboot_cpu_callback(struct notifier_block *nfb, unsigned long action, - void *hcpu) +static int tboot_dying_cpu(unsigned int cpu) { - switch (action) { - case CPU_DYING: - atomic_inc(&ap_wfs_count); - if (num_online_cpus() == 1) - if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) - return NOTIFY_BAD; - break; + atomic_inc(&ap_wfs_count); + if (num_online_cpus() == 1) { + if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) + return -EBUSY; } - return NOTIFY_OK; + return 0; } -static struct notifier_block tboot_cpu_notifier = -{ - .notifier_call = tboot_cpu_callback, -}; - #ifdef CONFIG_DEBUG_FS #define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \ @@ -417,8 +408,8 @@ static __init int tboot_late_init(void) tboot_create_trampoline(); atomic_set(&ap_wfs_count, 0); - register_hotcpu_notifier(&tboot_cpu_notifier); - + cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "AP_X86_TBOOT_DYING", NULL, + tboot_dying_cpu); #ifdef CONFIG_DEBUG_FS debugfs_create_file("tboot_log", S_IRUSR, arch_debugfs_dir, NULL, &tboot_log_fops); diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index cb4a01b41e27..222e84e2432e 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -9,7 +9,6 @@ * as published by the Free Software Foundation; version 2 * of the License. */ -#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/sections.h> #include <asm/asm.h> @@ -74,7 +73,3 @@ int rodata_test(void) return 0; } - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for marking rodata as read-only"); -MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 00f03d82e69a..b70ca12dd389 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -21,7 +21,7 @@ #include <linux/kdebug.h> #include <linux/kgdb.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/ptrace.h> #include <linux/uprobes.h> #include <linux/string.h> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 38ba6de56ede..1ef87e887051 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -3,7 +3,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/timer.h> #include <linux/acpi_pmtmr.h> #include <linux/cpufreq.h> @@ -239,7 +239,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) +static void set_cyc2ns_scale(unsigned long khz, int cpu) { unsigned long long tsc_now, ns_now; struct cyc2ns_data *data; @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) local_irq_save(flags); sched_clock_idle_sleep_event(); - if (!cpu_khz) + if (!khz) goto done; data = cyc2ns_write_begin(cpu); @@ -261,7 +261,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz, + clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz, NSEC_PER_MSEC, 0); /* @@ -335,12 +335,6 @@ int check_tsc_unstable(void) } EXPORT_SYMBOL_GPL(check_tsc_unstable); -int check_tsc_disabled(void) -{ - return tsc_disabled; -} -EXPORT_SYMBOL_GPL(check_tsc_disabled); - #ifdef CONFIG_X86_TSC int __init notsc_setup(char *str) { @@ -665,19 +659,77 @@ success: } /** - * native_calibrate_tsc - calibrate the tsc on boot + * native_calibrate_tsc + * Determine TSC frequency via CPUID, else return 0. */ unsigned long native_calibrate_tsc(void) { + unsigned int eax_denominator, ebx_numerator, ecx_hz, edx; + unsigned int crystal_khz; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + + if (boot_cpu_data.cpuid_level < 0x15) + return 0; + + eax_denominator = ebx_numerator = ecx_hz = edx = 0; + + /* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */ + cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx); + + if (ebx_numerator == 0 || eax_denominator == 0) + return 0; + + crystal_khz = ecx_hz / 1000; + + if (crystal_khz == 0) { + switch (boot_cpu_data.x86_model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + crystal_khz = 24000; /* 24.0 MHz */ + break; + case 0x5C: /* BXT */ + crystal_khz = 19200; /* 19.2 MHz */ + break; + } + } + + return crystal_khz * ebx_numerator / eax_denominator; +} + +static unsigned long cpu_khz_from_cpuid(void) +{ + unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + + if (boot_cpu_data.cpuid_level < 0x16) + return 0; + + eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0; + + cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx); + + return eax_base_mhz * 1000; +} + +/** + * native_calibrate_cpu - calibrate the cpu on boot + */ +unsigned long native_calibrate_cpu(void) +{ u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; - /* Calibrate TSC using MSR for Intel Atom SoCs */ - local_irq_save(flags); - fast_calibrate = try_msr_calibrate_tsc(); - local_irq_restore(flags); + fast_calibrate = cpu_khz_from_cpuid(); + if (fast_calibrate) + return fast_calibrate; + + fast_calibrate = cpu_khz_from_msr(); if (fast_calibrate) return fast_calibrate; @@ -837,8 +889,12 @@ int recalibrate_cpu_khz(void) if (!boot_cpu_has(X86_FEATURE_TSC)) return -ENODEV; + cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; + if (tsc_khz == 0) + tsc_khz = cpu_khz; + else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) + cpu_khz = tsc_khz; cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); @@ -1244,8 +1300,18 @@ void __init tsc_init(void) return; } + cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); - cpu_khz = tsc_khz; + + /* + * Trust non-zero tsc_khz as authorative, + * and use it to sanity check cpu_khz, + * which will be off if system timer is off. + */ + if (tsc_khz == 0) + tsc_khz = cpu_khz; + else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) + cpu_khz = tsc_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); @@ -1265,7 +1331,7 @@ void __init tsc_init(void) */ for_each_possible_cpu(cpu) { cyc2ns_init(cpu); - set_cyc2ns_scale(cpu_khz, cpu); + set_cyc2ns_scale(tsc_khz, cpu); } if (tsc_disabled > 0) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 9911a0620f9a..0fe720d64fef 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -1,14 +1,5 @@ /* - * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms. - * - * TSC in Intel Atom SoC runs at a constant rate which can be figured - * by this formula: - * <maximum core-clock to bus-clock ratio> * <maximum resolved frequency> - * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5 - * for details. - * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR - * based calibration is the only option. - * + * tsc_msr.c - TSC frequency enumeration via MSR * * Copyright (C) 2013 Intel Corporation * Author: Bin Gao <bin.gao@intel.com> @@ -22,18 +13,10 @@ #include <asm/apic.h> #include <asm/param.h> -/* CPU reference clock frequency: in KHz */ -#define FREQ_80 80000 -#define FREQ_83 83200 -#define FREQ_100 99840 -#define FREQ_133 133200 -#define FREQ_166 166400 - -#define MAX_NUM_FREQS 8 +#define MAX_NUM_FREQS 9 /* - * According to Intel 64 and IA-32 System Programming Guide, - * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be + * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40]. * Unfortunately some Intel Atom SoCs aren't quite compliant to this, * so we need manually differentiate SoC families. This is what the @@ -48,17 +31,18 @@ struct freq_desc { static struct freq_desc freq_desc_tables[] = { /* PNW */ - { 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, + { 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }, /* CLV+ */ - { 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, - /* TNG */ - { 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } }, - /* VLV2 */ - { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, - /* ANN */ - { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, - /* AIRMONT */ - { 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80, 0, 0, 0 } }, + { 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, + /* TNG - Intel Atom processor Z3400 series */ + { 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } }, + /* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */ + { 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } }, + /* ANN - Intel Atom processor Z3500 series */ + { 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } }, + /* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */ + { 6, 0x4c, 1, { 83300, 100000, 133300, 116700, + 80000, 93300, 90000, 88900, 87500 } }, }; static int match_cpu(u8 family, u8 model) @@ -79,16 +63,20 @@ static int match_cpu(u8 family, u8 model) (freq_desc_tables[cpu_index].freqs[freq_id]) /* - * Do MSR calibration only for known/supported CPUs. + * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Returns the calibration value or 0 if MSR calibration failed. + * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Return processor base frequency in KHz, or 0 on failure. */ -unsigned long try_msr_calibrate_tsc(void) +unsigned long cpu_khz_from_msr(void) { u32 lo, hi, ratio, freq_id, freq; unsigned long res; int cpu_index; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return 0; + cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); if (cpu_index < 0) return 0; @@ -100,31 +88,17 @@ unsigned long try_msr_calibrate_tsc(void) rdmsr(MSR_IA32_PERF_STATUS, lo, hi); ratio = (hi >> 8) & 0x1f; } - pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio); - - if (!ratio) - goto fail; /* Get FSB FREQ ID */ rdmsr(MSR_FSB_FREQ, lo, hi); freq_id = lo & 0x7; freq = id_to_freq(cpu_index, freq_id); - pr_info("Resolved frequency ID: %u, frequency: %u KHz\n", - freq_id, freq); - if (!freq) - goto fail; /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ res = freq * ratio; - pr_info("TSC runs at %lu KHz\n", res); #ifdef CONFIG_X86_LOCAL_APIC lapic_timer_frequency = (freq * 1000) / HZ; - pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency); #endif return res; - -fail: - pr_warn("Fast TSC calibration using MSR failed\n"); - return 0; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 3dce1ca0a653..01f30e56f99e 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) static inline int is_revectored(int nr, struct revectored_struct *bitmap) { - __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" - :"=r" (nr) - :"m" (*bitmap), "r" (nr)); - return nr; + return test_bit(nr, bitmap->__map); } #define val_byte(val, n) (((__u8 *)&val)[n]) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index cd05942bc918..95e49f6e4fc3 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -1,7 +1,8 @@ /* Exports for assembly files. All C exports should go in the respective C files. */ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/spinlock_types.h> #include <linux/smp.h> #include <net/checksum.h> @@ -44,6 +45,9 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(__sw_hweight32); +EXPORT_SYMBOL(__sw_hweight64); + /* * Export string functions. We normally rely on gcc builtin for most of these, * but gcc sometimes decides not to inline them. diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index dad5fe9633a3..76c5e52436c4 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -5,7 +5,7 @@ */ #include <linux/init.h> #include <linux/ioport.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/pci.h> #include <asm/bios_ebda.h> @@ -92,6 +92,7 @@ static void default_nmi_init(void) { }; static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { + .calibrate_cpu = native_calibrate_cpu, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, |