diff options
Diffstat (limited to 'arch/x86/kernel')
71 files changed, 522 insertions, 267 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0061ea263061..cd40aba6aa95 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,6 +31,7 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/irq.h> +#include <linux/slab.h> #include <linux/bootmem.h> #include <linux/ioport.h> #include <linux/pci.h> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3a4bf35c179b..1a160d5d44d0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -8,6 +8,7 @@ #include <linux/vmalloc.h> #include <linux/memory.h> #include <linux/stop_machine.h> +#include <linux/slab.h> #include <asm/alternative.h> #include <asm/sections.h> #include <asm/pgtable.h> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index adb0ba025702..f854d89b7edf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -18,8 +18,8 @@ */ #include <linux/pci.h> -#include <linux/gfp.h> #include <linux/bitmap.h> +#include <linux/slab.h> #include <linux/debugfs.h> #include <linux/scatterlist.h> #include <linux/dma-mapping.h> @@ -118,7 +118,7 @@ static bool check_device(struct device *dev) return false; /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) + if (dev->bus != &pci_bus_type) return false; devid = get_device_id(dev); @@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u32 tail, head; u8 *target; + WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); @@ -2186,7 +2187,7 @@ static void prealloc_protection_domains(void) struct dma_ops_domain *dma_dom; u16 devid; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { /* Do we handle this device? */ if (!check_device(&dev->dev)) @@ -2298,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain) list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { struct device *dev = dev_data->dev; - do_detach(dev); + __detach_device(dev); atomic_set(&dev_data->bind, 0); } @@ -2327,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void) return NULL; spin_lock_init(&domain->lock); + mutex_init(&domain->api_lock); domain->id = domain_id_alloc(); if (!domain->id) goto out_err; @@ -2379,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) free_pagetable(domain); - domain_id_free(domain->id); - - kfree(domain); + protection_domain_free(domain); dom->priv = NULL; } @@ -2456,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, iova &= PAGE_MASK; paddr &= PAGE_MASK; + mutex_lock(&domain->api_lock); + for (i = 0; i < npages; ++i) { ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); if (ret) @@ -2465,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, paddr += PAGE_SIZE; } + mutex_unlock(&domain->api_lock); + return 0; } @@ -2477,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova &= PAGE_MASK; + mutex_lock(&domain->api_lock); + for (i = 0; i < npages; ++i) { iommu_unmap_page(domain, iova, PM_MAP_4k); iova += PAGE_SIZE; } iommu_flush_tlb_pde(domain); + + mutex_unlock(&domain->api_lock); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 9dc91b431470..6360abf993d4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -19,8 +19,8 @@ #include <linux/pci.h> #include <linux/acpi.h> -#include <linux/gfp.h> #include <linux/list.h> +#include <linux/slab.h> #include <linux/sysdev.h> #include <linux/interrupt.h> #include <linux/msi.h> @@ -138,9 +138,9 @@ int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; /* - * Set to true if ACPI table parsing and hardware intialization went properly + * The ACPI table parsing functions set this variable on an error */ -static bool amd_iommu_initialized; +static int __initdata amd_iommu_init_err; /* * List of protection domains - used during resume @@ -391,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) */ for (i = 0; i < table->length; ++i) checksum += p[i]; - if (checksum != 0) + if (checksum != 0) { /* ACPI table corrupt */ - return -ENODEV; + amd_iommu_init_err = -ENODEV; + return 0; + } p += IVRS_HEADER_LENGTH; @@ -436,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) if (cmd_buf == NULL) return NULL; - iommu->cmd_buf_size = CMD_BUFFER_SIZE; + iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; return cmd_buf; } @@ -472,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) &entry, sizeof(entry)); amd_iommu_reset_cmd_buffer(iommu); + iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); } static void __init free_command_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size)); + get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); } /* allocates the memory where the IOMMU will log its events to */ @@ -920,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) h->mmio_phys); iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); - if (iommu == NULL) - return -ENOMEM; + if (iommu == NULL) { + amd_iommu_init_err = -ENOMEM; + return 0; + } + ret = init_iommu_one(iommu, h); - if (ret) - return ret; + if (ret) { + amd_iommu_init_err = ret; + return 0; + } break; default: break; @@ -934,8 +942,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); - amd_iommu_initialized = true; - return 0; } @@ -1211,6 +1217,10 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) return -ENODEV; + ret = amd_iommu_init_err; + if (ret) + goto out; + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); @@ -1270,12 +1280,19 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_iommu_all) != 0) goto free; - if (!amd_iommu_initialized) + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; goto free; + } if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; + goto free; + } + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; @@ -1288,6 +1305,8 @@ static int __init amd_iommu_init(void) if (ret) goto free; + enable_iommus(); + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1300,8 +1319,6 @@ static int __init amd_iommu_init(void) amd_iommu_init_notifier(); - enable_iommus(); - if (iommu_pass_through) goto out; @@ -1315,6 +1332,7 @@ out: return ret; free: + disable_iommus(); amd_iommu_uninit_devices(); diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 4b7099526d2c..a35347501d36 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/sysdev.h> +#include <linux/slab.h> #include <linux/pm.h> #include <linux/pci.h> #include <linux/sfi.h> @@ -428,7 +429,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n, static __init int apbt_late_init(void) { - if (disable_apbt_percpu) + if (disable_apbt_percpu || !apb_timer_block_enabled) return 0; /* This notifier should be called after workqueue is ready */ hotcpu_notifier(apbt_cpuhp_notify, -20); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b25..b5d8b0bcf235 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void) for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; + u32 ctl; bus = bus_dev_ranges[i].bus; dev_base = bus_dev_ranges[i].dev_base; @@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void) gart_iommu_aperture = 1; x86_init.iommu.iommu_init = gart_iommu_init; - aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; + ctl = read_pci_config(bus, slot, 3, + AMD64_GARTAPERTURECTL); + + /* + * Before we do anything else disable the GART. It may + * still be enabled if we boot into a crash-kernel here. + * Reconfiguring the GART while it is enabled could have + * unknown side-effects. + */ + ctl &= ~GARTEN; + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + + aper_order = (ctl >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; aper_base <<= 25; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 00187f1fcfb7..e5a4a1e01618 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1640,8 +1640,10 @@ int __init APIC_init_uniprocessor(void) } #endif +#ifndef CONFIG_SMP enable_IR_x2apic(); default_setup_apic_routing(); +#endif verify_local_APIC(); connect_bsp_APIC(); diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index dd2b5f264643..03ba1b895f5e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -42,6 +42,7 @@ #include <linux/errno.h> #include <linux/acpi.h> #include <linux/init.h> +#include <linux/gfp.h> #include <linux/nmi.h> #include <linux/smp.h> #include <linux/io.h> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e4e0ddcb1546..eb2789c3f721 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -36,6 +36,7 @@ #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ +#include <linux/slab.h> #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> #endif @@ -1268,6 +1269,14 @@ void __setup_vector_irq(int cpu) /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; + + /* + * If it is a legacy IRQ handled by the legacy PIC, this cpu + * will be part of the irq_cfg's domain. + */ + if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) + cpumask_set_cpu(cpu, cfg->domain); + if (!cpumask_test_cpu(cpu, cfg->domain)) continue; vector = cfg->vector; @@ -2536,6 +2545,9 @@ void irq_force_complete_move(int irq) struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg = desc->chip_data; + if (!cfg) + return; + __irq_complete_move(&desc, cfg->vector); } #else diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 8aa65adbd25d..1edaf15c0b8e 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/sysdev.h> #include <linux/sysctl.h> #include <linux/percpu.h> diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 49dbeaef2a27..c085d52dbaf2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -17,6 +17,7 @@ #include <linux/ctype.h> #include <linux/sched.h> #include <linux/timer.h> +#include <linux/slab.h> #include <linux/cpu.h> #include <linux/init.h> #include <linux/io.h> diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c index 30f25a75fe28..5de7f4c56971 100644 --- a/arch/x86/kernel/bootflag.c +++ b/arch/x86/kernel/bootflag.c @@ -5,7 +5,6 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/acpi.h> #include <asm/io.h> diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1b1920fa7c80..459168083b77 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,6 +33,7 @@ #include <linux/cpufreq.h> #include <linux/compiler.h> #include <linux/dmi.h> +#include <linux/slab.h> #include <trace/events/power.h> #include <linux/acpi.h> diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 006b278b0d5d..c587db472a75 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -20,7 +20,6 @@ #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/delay.h> #include <linux/cpufreq.h> diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ac27ec2264d5..16e3483be9e3 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -80,6 +80,7 @@ #include <linux/cpufreq.h> #include <linux/pci.h> #include <linux/errno.h> +#include <linux/slab.h> #include <asm/processor-cyrix.h> diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index da5f70fcb766..e7b559d74c52 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/cpufreq.h> #include <linux/timex.h> diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 869615193720..7b8a8ba67b07 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -25,7 +25,6 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/cpufreq.h> -#include <linux/slab.h> #include <linux/cpumask.h> #include <linux/timex.h> diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index ff36d2979a90..ce7cde713e71 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -30,6 +30,7 @@ #include <linux/sched.h> #include <linux/cpufreq.h> #include <linux/compiler.h> +#include <linux/slab.h> #include <linux/acpi.h> #include <linux/io.h> diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index cb01dac267d3..b3379d6a5c57 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -13,7 +13,6 @@ #include <linux/init.h> #include <linux/cpufreq.h> #include <linux/ioport.h> -#include <linux/slab.h> #include <linux/timex.h> #include <linux/io.h> diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d360b56e9825..b6215b9798e2 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, powernow_table[i].index = index; /* Frequency may be rounded for these */ - if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { + if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) + || boot_cpu_data.x86 == 0x11) { powernow_table[i].frequency = freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); } else diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8d672ef162ce..9b1ff37de46a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -20,6 +20,7 @@ #include <linux/sched.h> /* current */ #include <linux/delay.h> #include <linux/compiler.h> +#include <linux/gfp.h> #include <asm/msr.h> #include <asm/processor.h> diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2ce8e0b5cc54..561758e95180 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -23,7 +23,6 @@ #include <linux/init.h> #include <linux/cpufreq.h> #include <linux/pci.h> -#include <linux/slab.h> #include <linux/sched.h> #include "speedstep-lib.h" diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index ad0083abfa23..a94ec6be69fa 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -13,7 +13,6 @@ #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/cpufreq.h> -#include <linux/slab.h> #include <asm/msr.h> #include <asm/tsc.h> diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 04d73c114e49..8abd869baabf 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -17,7 +17,6 @@ #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/cpufreq.h> -#include <linux/slab.h> #include <linux/delay.h> #include <linux/io.h> #include <asm/ist.h> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7e1cca13af35..1366c7cfd483 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + /* + * Atom erratum AAE44/AAF40/AAG38/AAH41: + * + * A race condition between speculative fetches and invalidating + * a large page. This is worked around in microcode, but we + * need the microcode to have already been loaded... so if it is + * not, recommend a BIOS update and disable large pages. + */ + if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) { + u32 ucode, junk; + + wrmsr(MSR_IA32_UCODE_REV, 0, 0); + sync_core(); + rdmsr(MSR_IA32_UCODE_REV, junk, ucode); + + if (ucode < 0x20e) { + printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); + clear_cpu_cap(c, X86_FEATURE_PSE); + } + } + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #else diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 73734baa50f2..e7dbde7bfedb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -22,6 +22,7 @@ #include <linux/kdebug.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/gfp.h> #include <asm/mce.h> #include <asm/apic.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3ab9c886b613..8a6f0afa767e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -26,6 +26,7 @@ #include <linux/sched.h> #include <linux/sysfs.h> #include <linux/types.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/poll.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index cda932ca3ade..224392d8fe8c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -21,6 +21,7 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/sysfs.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/cpu.h> #include <linux/smp.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d15df6e49bf0..62b48e40920a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -5,6 +5,7 @@ * Author: Andi Kleen */ +#include <linux/gfp.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/percpu.h> diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9aa5dc76ff4a..fd31a441c61c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -6,7 +6,6 @@ #include <linux/module.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/io.h> #include <linux/mm.h> diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index e006e56f699c..79289632cb27 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/ctype.h> #include <linux/string.h> +#include <linux/slab.h> #include <linux/init.h> #define LINE_SIZE 80 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 42aafd11e170..db5bdc8addf8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -21,6 +21,7 @@ #include <linux/kdebug.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/slab.h> #include <linux/highmem.h> #include <linux/cpu.h> #include <linux/bitops.h> @@ -28,6 +29,7 @@ #include <asm/apic.h> #include <asm/stacktrace.h> #include <asm/nmi.h> +#include <asm/compat.h> static u64 perf_event_mask __read_mostly; @@ -133,8 +135,8 @@ struct x86_pmu { int (*handle_irq)(struct pt_regs *); void (*disable_all)(void); void (*enable_all)(void); - void (*enable)(struct hw_perf_event *, int); - void (*disable)(struct hw_perf_event *, int); + void (*enable)(struct perf_event *); + void (*disable)(struct perf_event *); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); @@ -157,6 +159,11 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + + int (*cpu_prepare)(int cpu); + void (*cpu_starting)(int cpu); + void (*cpu_dying)(int cpu); + void (*cpu_dead)(int cpu); }; static struct x86_pmu x86_pmu __read_mostly; @@ -165,8 +172,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static int x86_perf_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, int idx); +static int x86_perf_event_set_period(struct perf_event *event); /* * Generalized hw caching related hw_event table, filled @@ -189,11 +195,12 @@ static u64 __read_mostly hw_cache_event_ids * Returns the delta events processed. */ static u64 -x86_perf_event_update(struct perf_event *event, - struct hw_perf_event *hwc, int idx) +x86_perf_event_update(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; int shift = 64 - x86_pmu.event_bits; u64 prev_raw_count, new_raw_count; + int idx = hwc->idx; s64 delta; if (idx == X86_PMC_IDX_FIXED_BTS) @@ -293,7 +300,7 @@ static inline bool bts_available(void) return x86_pmu.enable_bts != NULL; } -static inline void init_debug_store_on_cpu(int cpu) +static void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -305,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu) (u32)((u64)(unsigned long)ds >> 32)); } -static inline void fini_debug_store_on_cpu(int cpu) +static void fini_debug_store_on_cpu(int cpu) { if (!per_cpu(cpu_hw_events, cpu).ds) return; @@ -638,7 +645,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (test_bit(hwc->idx, used_mask)) break; - set_bit(hwc->idx, used_mask); + __set_bit(hwc->idx, used_mask); if (assign) assign[i] = hwc->idx; } @@ -687,7 +694,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (j == X86_PMC_IDX_MAX) break; - set_bit(j, used_mask); + __set_bit(j, used_mask); if (assign) assign[i] = j; @@ -780,6 +787,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } +static int x86_pmu_start(struct perf_event *event); static void x86_pmu_stop(struct perf_event *event); void hw_perf_enable(void) @@ -796,6 +804,7 @@ void hw_perf_enable(void) return; if (cpuc->n_added) { + int n_running = cpuc->n_events - cpuc->n_added; /* * apply assignment obtained either from * hw_perf_group_sched_in() or x86_pmu_enable() @@ -803,8 +812,7 @@ void hw_perf_enable(void) * step1: save events moving to new counters * step2: reprogram moved events into new counters */ - for (i = 0; i < cpuc->n_events; i++) { - + for (i = 0; i < n_running; i++) { event = cpuc->event_list[i]; hwc = &event->hw; @@ -819,29 +827,18 @@ void hw_perf_enable(void) continue; x86_pmu_stop(event); - - hwc->idx = -1; } for (i = 0; i < cpuc->n_events; i++) { - event = cpuc->event_list[i]; hwc = &event->hw; - if (hwc->idx == -1) { + if (!match_prev_assignment(hwc, cpuc, i)) x86_assign_hw_event(event, cpuc, i); - x86_perf_event_set_period(event, hwc, hwc->idx); - } - /* - * need to mark as active because x86_pmu_disable() - * clear active_mask and events[] yet it preserves - * idx - */ - set_bit(hwc->idx, cpuc->active_mask); - cpuc->events[hwc->idx] = event; + else if (i < n_running) + continue; - x86_pmu.enable(hwc, hwc->idx); - perf_event_update_userpage(event); + x86_pmu_start(event); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -853,15 +850,16 @@ void hw_perf_enable(void) x86_pmu.enable_all(); } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) { - (void)checking_wrmsrl(hwc->config_base + idx, + (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); } -static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) +static inline void x86_pmu_disable_event(struct perf_event *event) { - (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); + struct hw_perf_event *hwc = &event->hw; + (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -871,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); * To be called with the event disabled in hw: */ static int -x86_perf_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, int idx) +x86_perf_event_set_period(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; - int err, ret = 0; + int err, ret = 0, idx = hwc->idx; if (idx == X86_PMC_IDX_FIXED_BTS) return 0; @@ -922,11 +920,11 @@ x86_perf_event_set_period(struct perf_event *event, return ret; } -static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void x86_pmu_enable_event(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); if (cpuc->enabled) - __x86_pmu_enable_event(hwc, idx); + __x86_pmu_enable_event(&event->hw); } /* @@ -962,34 +960,32 @@ static int x86_pmu_enable(struct perf_event *event) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->n_events = n; - cpuc->n_added = n - n0; + cpuc->n_added += n - n0; return 0; } static int x86_pmu_start(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = event->hw.idx; - if (hwc->idx == -1) + if (idx == -1) return -EAGAIN; - x86_perf_event_set_period(event, hwc, hwc->idx); - x86_pmu.enable(hwc, hwc->idx); + x86_perf_event_set_period(event); + cpuc->events[idx] = event; + __set_bit(idx, cpuc->active_mask); + x86_pmu.enable(event); + perf_event_update_userpage(event); return 0; } static void x86_pmu_unthrottle(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - - if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || - cpuc->events[hwc->idx] != event)) - return; - - x86_pmu.enable(hwc, hwc->idx); + int ret = x86_pmu_start(event); + WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1049,18 +1045,16 @@ static void x86_pmu_stop(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - /* - * Must be done before we disable, otherwise the nmi handler - * could reenable again: - */ - clear_bit(idx, cpuc->active_mask); - x86_pmu.disable(hwc, idx); + if (!__test_and_clear_bit(idx, cpuc->active_mask)) + return; + + x86_pmu.disable(event); /* * Drain the remaining delta count out of a event * that we are disabling: */ - x86_perf_event_update(event, hwc, idx); + x86_perf_event_update(event); cpuc->events[idx] = NULL; } @@ -1108,7 +1102,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) event = cpuc->events[idx]; hwc = &event->hw; - val = x86_perf_event_update(event, hwc, idx); + val = x86_perf_event_update(event); if (val & (1ULL << (x86_pmu.event_bits - 1))) continue; @@ -1118,11 +1112,11 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) handled = 1; data.period = event->hw.last_period; - if (!x86_perf_event_set_period(event, hwc, idx)) + if (!x86_perf_event_set_period(event)) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu.disable(hwc, idx); + x86_pmu_stop(event); } if (handled) @@ -1309,7 +1303,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, memcpy(cpuc->assign, assign, n0*sizeof(int)); cpuc->n_events = n0; - cpuc->n_added = n1; + cpuc->n_added += n1; ctx->nr_active += n1; /* @@ -1337,6 +1331,41 @@ undo: #include "perf_event_p6.c" #include "perf_event_intel.c" +static int __cpuinit +x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + int ret = NOTIFY_OK; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + if (x86_pmu.cpu_prepare) + ret = x86_pmu.cpu_prepare(cpu); + break; + + case CPU_STARTING: + if (x86_pmu.cpu_starting) + x86_pmu.cpu_starting(cpu); + break; + + case CPU_DYING: + if (x86_pmu.cpu_dying) + x86_pmu.cpu_dying(cpu); + break; + + case CPU_UP_CANCELED: + case CPU_DEAD: + if (x86_pmu.cpu_dead) + x86_pmu.cpu_dead(cpu); + break; + + default: + break; + } + + return ret; +} + static void __init pmu_check_apic(void) { if (cpu_has_apic) @@ -1415,11 +1444,13 @@ void __init init_hw_perf_events(void) pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); pr_info("... event mask: %016Lx\n", perf_event_mask); + + perf_cpu_notifier(x86_pmu_notifier); } static inline void x86_pmu_read(struct perf_event *event) { - x86_perf_event_update(event, &event->hw, event->hw.idx); + x86_perf_event_update(event); } static const struct pmu pmu = { @@ -1601,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +#ifdef CONFIG_COMPAT +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) { - unsigned long bytes; + /* 32-bit process in 64-bit kernel. */ + struct stack_frame_ia32 frame; + const void __user *fp; - bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); + if (!test_thread_flag(TIF_IA32)) + return 0; + + fp = compat_ptr(regs->bp); + while (entry->nr < PERF_MAX_STACK_DEPTH) { + unsigned long bytes; + frame.next_frame = 0; + frame.return_address = 0; + + bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); + if (bytes != sizeof(frame)) + break; - return bytes == sizeof(*frame); + if (fp < compat_ptr(regs->sp)) + break; + + callchain_store(entry, frame.return_address); + fp = compat_ptr(frame.next_frame); + } + return 1; } +#else +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + return 0; +} +#endif static void perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) @@ -1624,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, PERF_CONTEXT_USER); callchain_store(entry, regs->ip); + if (perf_callchain_user32(regs, entry)) + return; + while (entry->nr < PERF_MAX_STACK_DEPTH) { + unsigned long bytes; frame.next_frame = NULL; frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) + bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); + if (bytes != sizeof(frame)) break; if ((unsigned long)fp < regs->sp) @@ -1675,28 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } -void hw_perf_event_setup_online(int cpu) -{ - init_debug_store_on_cpu(cpu); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - amd_pmu_cpu_online(cpu); - break; - default: - return; - } -} - -void hw_perf_event_setup_offline(int cpu) +void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) { - init_debug_store_on_cpu(cpu); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - amd_pmu_cpu_offline(cpu); - break; - default: - return; - } + regs->ip = ip; + /* + * perf_arch_fetch_caller_regs adds another call, we need to increment + * the skip level + */ + regs->bp = rewind_frame_pointer(skip + 1); + regs->cs = __KERNEL_CS; + local_save_flags(regs->flags); } diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 8f3dbfda3c4f..db6f7d4056e1 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } +static inline int amd_has_nb(struct cpu_hw_events *cpuc) +{ + struct amd_nb *nb = cpuc->amd_nb; + + return nb && nb->nb_id != -1; +} + static void amd_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, /* * only care about NB events */ - if (!(nb && amd_is_nb_event(hwc))) + if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) return; /* @@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) /* * if not NB event or no NB, then no constraints */ - if (!(nb && amd_is_nb_event(hwc))) + if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) return &unconstrained; /* @@ -271,28 +278,6 @@ done: return &emptyconstraint; } -static __initconst struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_events = 4, - .event_bits = 48, - .event_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints, - .put_event_constraints = amd_put_event_constraints -}; - static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) { struct amd_nb *nb; @@ -309,57 +294,61 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) * initialize all possible NB constraints */ for (i = 0; i < x86_pmu.num_events; i++) { - set_bit(i, nb->event_constraints[i].idxmsk); + __set_bit(i, nb->event_constraints[i].idxmsk); nb->event_constraints[i].weight = 1; } return nb; } -static void amd_pmu_cpu_online(int cpu) +static int amd_pmu_cpu_prepare(int cpu) { - struct cpu_hw_events *cpu1, *cpu2; - struct amd_nb *nb = NULL; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + WARN_ON_ONCE(cpuc->amd_nb); + + if (boot_cpu_data.x86_max_cores < 2) + return NOTIFY_OK; + + cpuc->amd_nb = amd_alloc_nb(cpu, -1); + if (!cpuc->amd_nb) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static void amd_pmu_cpu_starting(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct amd_nb *nb; int i, nb_id; if (boot_cpu_data.x86_max_cores < 2) return; - /* - * function may be called too early in the - * boot process, in which case nb_id is bogus - */ nb_id = amd_get_nb_id(cpu); - if (nb_id == BAD_APICID) - return; - - cpu1 = &per_cpu(cpu_hw_events, cpu); - cpu1->amd_nb = NULL; + WARN_ON_ONCE(nb_id == BAD_APICID); raw_spin_lock(&amd_nb_lock); for_each_online_cpu(i) { - cpu2 = &per_cpu(cpu_hw_events, i); - nb = cpu2->amd_nb; - if (!nb) + nb = per_cpu(cpu_hw_events, i).amd_nb; + if (WARN_ON_ONCE(!nb)) continue; - if (nb->nb_id == nb_id) - goto found; - } - nb = amd_alloc_nb(cpu, nb_id); - if (!nb) { - pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); - raw_spin_unlock(&amd_nb_lock); - return; + if (nb->nb_id == nb_id) { + kfree(cpuc->amd_nb); + cpuc->amd_nb = nb; + break; + } } -found: - nb->refcnt++; - cpu1->amd_nb = nb; + + cpuc->amd_nb->nb_id = nb_id; + cpuc->amd_nb->refcnt++; raw_spin_unlock(&amd_nb_lock); } -static void amd_pmu_cpu_offline(int cpu) +static void amd_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuhw; @@ -370,14 +359,44 @@ static void amd_pmu_cpu_offline(int cpu) raw_spin_lock(&amd_nb_lock); - if (--cpuhw->amd_nb->refcnt == 0) - kfree(cpuhw->amd_nb); + if (cpuhw->amd_nb) { + struct amd_nb *nb = cpuhw->amd_nb; + + if (nb->nb_id == -1 || --nb->refcnt == 0) + kfree(nb); - cpuhw->amd_nb = NULL; + cpuhw->amd_nb = NULL; + } raw_spin_unlock(&amd_nb_lock); } +static __initconst struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .event_map = amd_pmu_event_map, + .raw_event = amd_pmu_raw_event, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_events = 4, + .event_bits = 48, + .event_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = amd_get_event_constraints, + .put_event_constraints = amd_put_event_constraints, + + .cpu_prepare = amd_pmu_cpu_prepare, + .cpu_starting = amd_pmu_cpu_starting, + .cpu_dead = amd_pmu_cpu_dead, +}; + static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -390,11 +409,6 @@ static __init int amd_pmu_init(void) memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - /* - * explicitly initialize the boot cpu, other cpus will get - * the cpu hotplug callbacks from smp_init() - */ - amd_pmu_cpu_online(smp_processor_id()); return 0; } @@ -405,12 +419,4 @@ static int amd_pmu_init(void) return 0; } -static void amd_pmu_cpu_online(int cpu) -{ -} - -static void amd_pmu_cpu_offline(int cpu) -{ -} - #endif diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 44b60c852107..9c794ac87837 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -548,9 +548,9 @@ static inline void intel_pmu_ack_status(u64 ack) } static inline void -intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +intel_pmu_disable_fixed(struct hw_perf_event *hwc) { - int idx = __idx - X86_PMC_IDX_FIXED; + int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; mask = 0xfULL << (idx * 4); @@ -621,26 +621,28 @@ static void intel_pmu_drain_bts_buffer(void) } static inline void -intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +intel_pmu_disable_event(struct perf_event *event) { - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + struct hw_perf_event *hwc = &event->hw; + + if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); intel_pmu_drain_bts_buffer(); return; } if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc, idx); + intel_pmu_disable_fixed(hwc); return; } - x86_pmu_disable_event(hwc, idx); + x86_pmu_disable_event(event); } static inline void -intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +intel_pmu_enable_fixed(struct hw_perf_event *hwc) { - int idx = __idx - X86_PMC_IDX_FIXED; + int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; int err; @@ -670,9 +672,11 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } -static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void intel_pmu_enable_event(struct perf_event *event) { - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + struct hw_perf_event *hwc = &event->hw; + + if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { if (!__get_cpu_var(cpu_hw_events).enabled) return; @@ -681,11 +685,11 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) } if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc, idx); + intel_pmu_enable_fixed(hwc); return; } - __x86_pmu_enable_event(hwc, idx); + __x86_pmu_enable_event(hwc); } /* @@ -694,14 +698,8 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) */ static int intel_pmu_save_and_restart(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - int ret; - - x86_perf_event_update(event, hwc, idx); - ret = x86_perf_event_set_period(event, hwc, idx); - - return ret; + x86_perf_event_update(event); + return x86_perf_event_set_period(event); } static void intel_pmu_reset(void) @@ -745,11 +743,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); - perf_disable(); + intel_pmu_disable_all(); intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); if (!status) { - perf_enable(); + intel_pmu_enable_all(); return 0; } @@ -759,8 +757,7 @@ again: WARN_ONCE(1, "perfevents: irq loop stuck!\n"); perf_event_print_debug(); intel_pmu_reset(); - perf_enable(); - return 1; + goto done; } inc_irq_stat(apic_perf_irqs); @@ -768,7 +765,6 @@ again: for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; - clear_bit(bit, (unsigned long *) &status); if (!test_bit(bit, cpuc->active_mask)) continue; @@ -778,7 +774,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - intel_pmu_disable_event(&event->hw, bit); + x86_pmu_stop(event); } intel_pmu_ack_status(ack); @@ -790,8 +786,8 @@ again: if (status) goto again; - perf_enable(); - +done: + intel_pmu_enable_all(); return 1; } @@ -870,7 +866,10 @@ static __initconst struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, - .get_event_constraints = intel_get_event_constraints + .get_event_constraints = intel_get_event_constraints, + + .cpu_starting = init_debug_store_on_cpu, + .cpu_dying = fini_debug_store_on_cpu, }; static __init int intel_pmu_init(void) @@ -937,6 +936,7 @@ static __init int intel_pmu_init(void) case 26: /* 45 nm nehalem, "Bloomfield" */ case 30: /* 45 nm nehalem, "Lynnfield" */ + case 46: /* 45 nm nehalem-ex, "Beckton" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a4e67b99d91c..a330485d14da 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -77,27 +77,29 @@ static void p6_pmu_enable_all(void) } static inline void -p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) +p6_pmu_disable_event(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; u64 val = P6_NOP_EVENT; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + idx, val); + (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); } -static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void p6_pmu_enable_event(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; u64 val; val = hwc->config; if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + idx, val); + (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); } static __initconst struct x86_pmu p6_pmu = { diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 1cbed97b59cf..dfdb4dba2320 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -22,6 +22,7 @@ */ #include <linux/dmi.h> +#include <linux/module.h> #include <asm/div64.h> #include <asm/vmware.h> #include <asm/x86_init.h> @@ -101,6 +102,7 @@ int vmware_platform(void) return 0; } +EXPORT_SYMBOL(vmware_platform); /* * VMware hypervisor takes care of exporting a reliable TSC to the guest. diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 83e5e628de73..8b862d5900fe 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -40,6 +40,7 @@ #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/uaccess.h> +#include <linux/gfp.h> #include <asm/processor.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a4849c10a77e..ebd4c51d096a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,7 +27,6 @@ #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> -#include <asm/x86_init.h> #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif - -#ifdef CONFIG_X86_64 - x86_platform.iommu_shutdown(); -#endif - crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index cd97ce18c29d..67414550c3cc 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -5,6 +5,7 @@ * Copyright (C) IBM Corporation, 2004. All rights reserved */ +#include <linux/slab.h> #include <linux/errno.h> #include <linux/highmem.h> #include <linux/crash_dump.h> diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 4fd1420faffa..e1a93be4fd44 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,6 +14,8 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#include <linux/uaccess.h> + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); @@ -29,4 +31,26 @@ struct stack_frame { struct stack_frame *next_frame; unsigned long return_address; }; + +struct stack_frame_ia32 { + u32 next_frame; + u32 return_address; +}; + +static inline unsigned long rewind_frame_pointer(int n) +{ + struct stack_frame *frame; + + get_bp(frame); + +#ifdef CONFIG_FRAME_POINTER + while (n--) { + if (probe_kernel_address(&frame->next_frame, frame)) + break; + } #endif + + return (unsigned long)frame; +} + +#endif /* DUMPSTACK_H */ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d5e2a2ebb627..272c9f1f05f3 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -208,7 +208,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; - bp = print_context_stack(tinfo, stack, bp, + bp = ops->walk_stack(tinfo, stack, bp, ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be @@ -229,7 +229,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 740b440fbd73..7bca3c6a02fb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -519,29 +519,45 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", (unsigned long long) start, (unsigned long long) end); - e820_print_type(old_type); + if (checktype) + e820_print_type(old_type); printk(KERN_CONT "\n"); for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; u64 final_start, final_end; + u64 ei_end; if (checktype && ei->type != old_type) continue; + + ei_end = ei->addr + ei->size; /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { + if (ei->addr >= start && ei_end <= end) { real_removed_size += ei->size; memset(ei, 0, sizeof(struct e820entry)); continue; } + + /* new range is totally covered? */ + if (ei->addr < start && ei_end > end) { + e820_add_region(end, ei_end - end, ei->type); + ei->size = start - ei->addr; + real_removed_size += size; + continue; + } + /* partially covered */ final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); + final_end = min(end, ei_end); if (final_start >= final_end) continue; real_removed_size += final_end - final_start; + /* + * left range could be head or tail, so need to update + * size at first. + */ ei->size -= final_end - final_start; if (ei->addr < final_start) continue; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index adedeef1dedc..b2e246037392 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/start_kernel.h> +#include <linux/mm.h> #include <asm/setup.h> #include <asm/sections.h> @@ -44,9 +45,10 @@ void __init i386_start_kernel(void) #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b5a9896ca1e7..7147143fd614 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -103,9 +103,10 @@ void __init x86_64_start_reservations(char *real_mode_data) #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + /* Assume only end is not page aligned */ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ee4fa1bfcb33..23b4ecdffa9b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -4,6 +4,7 @@ #include <linux/sysdev.h> #include <linux/delay.h> #include <linux/errno.h> +#include <linux/slab.h> #include <linux/hpet.h> #include <linux/init.h> #include <linux/cpu.h> @@ -399,9 +400,15 @@ static int hpet_next_event(unsigned long delta, * then we might have a real hardware problem. We can not do * much about it here, but at least alert the user/admin with * a prominent warning. + * An erratum on some chipsets (ICH9,..), results in comparator read + * immediately following a write returning old value. Workaround + * for this is to read this value second time, when first + * read returns old value. */ - WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, + if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { + WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, KERN_WARNING "hpet: compare register read back failed.\n"); + } return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } @@ -1143,6 +1150,7 @@ int hpet_set_periodic_freq(unsigned long freq) do_div(clc, freq); clc >>= hpet_clockevent.shift; hpet_pie_delta = clc; + hpet_pie_limit = 0; } return 1; } diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index c01a2b846d47..54c31c285488 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/regset.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/sigcontext.h> #include <asm/processor.h> diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index fb725ee15f55..7c9f02c130f3 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -5,7 +5,6 @@ #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/timex.h> -#include <linux/slab.h> #include <linux/random.h> #include <linux/init.h> #include <linux/kernel_stat.h> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index ef257fc2921b..0ed2d300cd46 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -5,7 +5,6 @@ #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/timex.h> -#include <linux/slab.h> #include <linux/random.h> #include <linux/kprobes.h> #include <linux/init.h> @@ -141,6 +140,28 @@ void __init init_IRQ(void) x86_init.irqs.intr_init(); } +/* + * Setup the vector to irq mappings. + */ +void setup_vector_irq(int cpu) +{ +#ifndef CONFIG_X86_IO_APIC + int irq; + + /* + * On most of the platforms, legacy PIC delivers the interrupts on the + * boot cpu. But there are certain platforms where PIC interrupts are + * delivered to multiple cpu's. If the legacy IRQ is handled by the + * legacy PIC, for the new cpu that is coming online, setup the static + * legacy vector to irq mapping: + */ + for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) + per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; +#endif + + __setup_vector_irq(cpu); +} + static void __init smp_intr_init(void) { #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 9b895464dd03..0f7bc20cfcde 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -2,8 +2,8 @@ * Shared support code for AMD K8 northbridges and derivates. * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. */ -#include <linux/gfp.h> #include <linux/types.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/module.h> diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index e444357375ce..8afd9f321f10 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -9,6 +9,7 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/stat.h> #include <linux/io.h> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index bfba6019d762..b2258ca91003 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -618,8 +618,8 @@ int kgdb_arch_init(void) * portion of kgdb because this operation requires mutexs to * complete. */ + hw_breakpoint_init(&attr); attr.bp_addr = (unsigned long)kgdb_arch_init; - attr.type = PERF_TYPE_BREAKPOINT; attr.bp_len = HW_BREAKPOINT_LEN_1; attr.bp_type = HW_BREAKPOINT_W; attr.disabled = 1; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ec6ef60cbd17..ea697263b373 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -7,6 +7,7 @@ */ #include <linux/errno.h> +#include <linux/gfp.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/mm.h> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4a8bb82248ae..035c8c529181 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/kexec.h> #include <linux/string.h> +#include <linux/gfp.h> #include <linux/reboot.h> #include <linux/numa.h> #include <linux/ftrace.h> diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 845d80ce1ef1..63eaf6596233 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -42,6 +42,7 @@ #include <linux/kernel.h> #include <linux/mca.h> #include <linux/kprobes.h> +#include <linux/slab.h> #include <asm/system.h> #include <asm/io.h> #include <linux/proc_fs.h> diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 89f386f044e4..e0bc186d7501 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/bug.h> #include <linux/mm.h> +#include <linux/gfp.h> #include <asm/system.h> #include <asm/page.h> diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a2c1edd2d3ac..e81030f71a8f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -664,7 +664,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf) { unsigned long size = get_mpc_size(mpf->physptr); - reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); + reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); } static int __init smp_scan_config(unsigned long base, unsigned long length) @@ -693,7 +693,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf, (u64)virt_to_phys(mpf)); mem = virt_to_phys(mpf); - reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); + reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); if (mpf->physptr) smp_reserve_memory(mpf); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 206735ac8cbd..4d4468e9f47c 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -37,6 +37,7 @@ #include <linux/cpu.h> #include <linux/notifier.h> #include <linux/uaccess.h> +#include <linux/gfp.h> #include <asm/processor.h> #include <asm/msr.h> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a4ac764a6880..4b7e3d8b01dd 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -2,6 +2,7 @@ #include <linux/dma-debug.h> #include <linux/dmar.h> #include <linux/bootmem.h> +#include <linux/gfp.h> #include <linux/pci.h> #include <linux/kmemleak.h> diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index f3af115a573a..0f7f130caa67 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -29,6 +29,7 @@ #include <linux/iommu-helper.h> #include <linux/sysdev.h> #include <linux/io.h> +#include <linux/gfp.h> #include <asm/atomic.h> #include <asm/mtrr.h> #include <asm/pgtable.h> @@ -564,6 +565,9 @@ static void enable_gart_translations(void) enable_gart_translation(dev, __pa(agp_gatt_table)); } + + /* Flush the GART-TLB to remove stale entries */ + k8_flush_garts(); } /* diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 22be12b60a8f..3af4af810c07 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -4,6 +4,7 @@ #include <linux/scatterlist.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/gfp.h> #include <linux/pci.h> #include <linux/mm.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ad9540676fcc..28ad9f4d8b94 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -526,21 +526,37 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) } /* - * Check for AMD CPUs, which have potentially C1E support + * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. + * For more information see + * - Erratum #400 for NPT family 0xf and family 0x10 CPUs + * - Erratum #365 for family 0x11 (not affected because C1e not in use) */ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) { + u64 val; if (c->x86_vendor != X86_VENDOR_AMD) - return 0; - - if (c->x86 < 0x0F) - return 0; + goto no_c1e_idle; /* Family 0x0f models < rev F do not have C1E */ - if (c->x86 == 0x0f && c->x86_model < 0x40) - return 0; + if (c->x86 == 0x0F && c->x86_model >= 0x40) + return 1; - return 1; + if (c->x86 == 0x10) { + /* + * check OSVW bit for CPUs that are not affected + * by erratum #400 + */ + rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); + if (val >= 2) { + rdmsrl(MSR_AMD64_OSVW_STATUS, val); + if (!(val & BIT(1))) + goto no_c1e_idle; + } + return 1; + } + +no_c1e_idle: + return 0; } static cpumask_var_t c1e_mask; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index dc9690b4c4cc..17cb3295cbf7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -276,12 +276,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, set_tsk_thread_flag(p, TIF_FORK); - p->thread.fs = me->thread.fs; - p->thread.gs = me->thread.gs; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); + p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; savesegment(fs, p->thread.fsindex); + p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a503b1fd04e5..2e9b55027b7e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> +#include <linux/slab.h> #include <linux/ptrace.h> #include <linux/regset.h> #include <linux/tracehook.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d7ba1a449bd..c4851eff57b3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -55,7 +55,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/delay.h> @@ -314,16 +313,17 @@ static void __init reserve_brk(void) #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { - + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 area_size = PAGE_ALIGN(ramdisk_size); u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; /* We need to move the initrd down into lowmem */ - ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, + ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, PAGE_SIZE); if (ramdisk_here == -1ULL) @@ -332,7 +332,7 @@ static void __init relocate_initrd(void) /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, + reserve_early(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; @@ -376,9 +376,10 @@ static void __init relocate_initrd(void) static void __init reserve_initrd(void) { + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; if (!boot_params.hdr.type_of_loader || @@ -606,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif +static __init void reserve_ibft_region(void) +{ + unsigned long addr, size = 0; + + addr = find_ibft_region(&size); + + if (size) + reserve_early_overlap_ok(addr, addr + size, "ibft"); +} + #ifdef CONFIG_X86_RESERVE_LOW_64K static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { @@ -908,6 +919,8 @@ void __init setup_arch(char **cmdline_p) */ find_smp_config(); + reserve_ibft_region(); + reserve_trampoline_memory(); #ifdef CONFIG_ACPI_SLEEP @@ -975,8 +988,6 @@ void __init setup_arch(char **cmdline_p) dma32_reserve_bootmem(); - reserve_ibft_region(); - #ifdef CONFIG_KVM_CLOCK kvmclock_init(); #endif diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97600e7..d801210945d6 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -21,6 +21,7 @@ #include <linux/cache.h> #include <linux/interrupt.h> #include <linux/cpu.h> +#include <linux/gfp.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a02e80c3c54b..763d815e27a0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -49,6 +49,7 @@ #include <linux/nmi.h> #include <linux/tboot.h> #include <linux/stackprotector.h> +#include <linux/gfp.h> #include <asm/acpi.h> #include <asm/desc.h> @@ -242,12 +243,10 @@ static void __cpuinit smp_callin(void) end_local_APIC_setup(); map_cpu_to_logical_apicid(); - notify_cpu_starting(cpuid); - /* * Need to setup vector mappings before we enable interrupts. */ - __setup_vector_irq(smp_processor_id()); + setup_vector_irq(smp_processor_id()); /* * Get our bogomips. * @@ -264,6 +263,8 @@ static void __cpuinit smp_callin(void) */ smp_store_cpu_info(cpuid); + notify_cpu_starting(cpuid); + /* * Allow the master to continue. */ diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 364d015efebc..17b03dd3a6b5 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -9,6 +9,7 @@ #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <asm/mmu_context.h> #include <asm/uv/uv.h> diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index ece73d8e3240..1d40336b030a 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/rbtree.h> +#include <linux/slab.h> #include <linux/irq.h> #include <asm/apic.h> diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 2b75ef638dbc..56e421bc379b 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -19,6 +19,7 @@ * Copyright (c) Dimitri Sivanich */ #include <linux/clockchips.h> +#include <linux/slab.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 7dd599deca4a..ce9fbacb7526 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -28,6 +28,7 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/sched.h> +#include <linux/gfp.h> #include <asm/vmi.h> #include <asm/io.h> #include <asm/fixmap.h> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 44879df55696..2cc249718c46 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -291,8 +291,8 @@ SECTIONS .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { __smp_locks = .; *(.smp_locks) - __smp_locks_end = .; . = ALIGN(PAGE_SIZE); + __smp_locks_end = .; } #ifdef CONFIG_X86_64 |