diff options
Diffstat (limited to 'arch/x86')
77 files changed, 734 insertions, 430 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore new file mode 100644 index 000000000000..028079065af6 --- /dev/null +++ b/arch/x86/.gitignore @@ -0,0 +1,3 @@ +boot/compressed/vmlinux +tools/test_get_len + diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index bcbd36c41432..5c228129d175 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */ offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ - printf(".section \".rodata.compressed\",\"a\",@progbits\n"); + printf(".section \".rodata..compressed\",\"a\",@progbits\n"); printf(".globl z_input_len\n"); printf("z_input_len = %lu\n", ilen); printf(".globl z_output_len\n"); diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index a6f1a59a5b0c..5ddabceee124 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -26,8 +26,8 @@ SECTIONS HEAD_TEXT _ehead = . ; } - .rodata.compressed : { - *(.rodata.compressed) + .rodata..compressed : { + *(.rodata..compressed) } .text : { _text = .; /* Text */ diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index ed7aeff786b2..45bc9402aa49 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -41,13 +41,12 @@ static __videocard video_vga; static u8 vga_set_basic_mode(void) { struct biosregs ireg, oreg; - u16 ax; u8 mode; initregs(&ireg); /* Query current mode */ - ax = 0x0f00; + ireg.ax = 0x0f00; intcall(0x10, &ireg, &oreg); mode = oreg.al; diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 2f9047cfaaca..48f99f15452e 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -7,7 +7,7 @@ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 942255310e6a..528a11e8d3e3 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,10 +20,10 @@ struct arch_hw_breakpoint { #include <linux/list.h> /* Available HW breakpoint length encodings */ +#define X86_BREAKPOINT_LEN_X 0x00 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c -#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 #define X86_BREAKPOINT_LEN_8 0x48 diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 63cb4096c3dc..9cb2edb87c2f 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -183,7 +183,7 @@ struct mp_ioapic_gsi{ u32 gsi_end; }; extern struct mp_ioapic_gsi mp_gsi_routing[]; -extern u32 gsi_end; +extern u32 gsi_top; int mp_find_ioapic(u32 gsi); int mp_find_ioapic_pin(int ioapic, u32 gsi); void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); @@ -197,7 +197,7 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } static inline void ioapic_insert_resources(void) { } static inline void probe_nr_irqs_gsi(void) { } -#define gsi_end (NR_IRQS_LEGACY - 1) +#define gsi_top (NR_IRQS_LEGACY) static inline int mp_find_ioapic(u32 gsi) { return 0; } struct io_apic_irq_attr; diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h new file mode 100644 index 000000000000..36c93b5cc239 --- /dev/null +++ b/arch/x86/include/asm/local64.h @@ -0,0 +1 @@ +#include <asm-generic/local64.h> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b49d8ca228f6..8c7ae4318629 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -110,6 +110,7 @@ #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 8d8797eae5d7..cd2a31dc5fb8 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -53,6 +53,8 @@ extern int pcibios_last_bus; extern struct pci_bus *pci_root_bus; extern struct pci_ops pci_root_ops; +void pcibios_scan_specific_bus(int busn); + /* pci-irq.c */ struct irq_info { diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0797e748d280..cd28f9ad910d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -77,6 +77,7 @@ do { \ if (0) { \ pto_T__ pto_tmp__; \ pto_tmp__ = (val); \ + (void)pto_tmp__; \ } \ switch (sizeof(var)) { \ case 1: \ @@ -115,6 +116,7 @@ do { \ if (0) { \ pao_T__ pao_tmp__; \ pao_tmp__ = (val); \ + (void)pao_tmp__; \ } \ switch (sizeof(var)) { \ case 1: \ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 254883d0c7e0..6e742cc4251b 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -68,8 +68,9 @@ union cpuid10_eax { union cpuid10_edx { struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; + unsigned int num_counters_fixed:5; + unsigned int bit_width_fixed:8; + unsigned int reserved:19; } split; unsigned int full; }; @@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#include <asm/stacktrace.h> + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ip = (__ip); \ + (regs)->bp = caller_frame_pointer(); \ + (regs)->cs = __KERNEL_CS; \ + regs->flags = 0; \ +} + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 64a8ebff06fc..def500776b16 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -19,7 +19,6 @@ #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) #define ARCH_P4_MAX_CCCR (18) -#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) #define P4_ESCR_EVENT_MASK 0x7e000000U #define P4_ESCR_EVENT_SHIFT 25 @@ -71,10 +70,6 @@ #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Custom bits in reerved CCCR area */ -#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU - - /* Non HT mask */ #define P4_CCCR_MASK \ (P4_CCCR_OVF | \ @@ -106,8 +101,7 @@ * ESCR and CCCR but rather an only packed value should * be unpacked and written to a proper addresses * - * the base idea is to pack as much info as - * possible + * the base idea is to pack as much info as possible */ #define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) @@ -130,8 +124,6 @@ t; \ }) -#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) - #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) @@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) return escr; } +/* + * This are the events which should be used in "Event Select" + * field of ESCR register, they are like unique keys which allow + * the kernel to determinate which CCCR and COUNTER should be + * used to track an event + */ enum P4_EVENTS { P4_EVENT_TC_DELIVER_MODE, P4_EVENT_BPU_FETCH_REQUEST, @@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES { * a caller should use P4_ESCR_EMASK_NAME helper to * pick the EventMask needed, for example * - * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) + * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) */ enum P4_ESCR_EMASKS { P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), @@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS { P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), }; -/* P4 PEBS: stale for a while */ -#define P4_PEBS_METRIC_MASK 0x00001fffU -#define P4_PEBS_UOB_TAG 0x01000000U -#define P4_PEBS_ENABLE 0x02000000U - -/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ -#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 -#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 -#define P4_PEBS__dtlb_load_miss_retired 0x3000004 -#define P4_PEBS__dtlb_store_miss_retired 0x3000004 -#define P4_PEBS__dtlb_all_miss_retired 0x3000004 -#define P4_PEBS__tagged_mispred_branch 0x3018000 -#define P4_PEBS__mob_load_replay_retired 0x3000200 -#define P4_PEBS__split_load_retired 0x3000400 -#define P4_PEBS__split_store_retired 0x3000400 - -#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 -#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 -#define P4_VERT__dtlb_load_miss_retired 0x0000001 -#define P4_VERT__dtlb_store_miss_retired 0x0000002 -#define P4_VERT__dtlb_all_miss_retired 0x0000003 -#define P4_VERT__tagged_mispred_branch 0x0000010 -#define P4_VERT__mob_load_replay_retired 0x0000001 -#define P4_VERT__split_load_retired 0x0000001 -#define P4_VERT__split_store_retired 0x0000002 - -enum P4_CACHE_EVENTS { - P4_CACHE__NONE, - - P4_CACHE__1stl_cache_load_miss_retired, - P4_CACHE__2ndl_cache_load_miss_retired, - P4_CACHE__dtlb_load_miss_retired, - P4_CACHE__dtlb_store_miss_retired, - P4_CACHE__itlb_reference_hit, - P4_CACHE__itlb_reference_miss, - - P4_CACHE__MAX +/* + * P4 PEBS specifics (Replay Event only) + * + * Format (bits): + * 0-6: metric from P4_PEBS_METRIC enum + * 7 : reserved + * 8 : reserved + * 9-11 : reserved + * + * Note we have UOP and PEBS bits reserved for now + * just in case if we will need them once + */ +#define P4_PEBS_CONFIG_ENABLE (1 << 7) +#define P4_PEBS_CONFIG_UOP_TAG (1 << 8) +#define P4_PEBS_CONFIG_METRIC_MASK 0x3f +#define P4_PEBS_CONFIG_MASK 0xff + +/* + * mem: Only counters MSR_IQ_COUNTER4 (16) and + * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling + */ +#define P4_PEBS_ENABLE 0x02000000U +#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U + +#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) +#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) + +#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask)) + +enum P4_PEBS_METRIC { + P4_PEBS_METRIC__none, + + P4_PEBS_METRIC__1stl_cache_load_miss_retired, + P4_PEBS_METRIC__2ndl_cache_load_miss_retired, + P4_PEBS_METRIC__dtlb_load_miss_retired, + P4_PEBS_METRIC__dtlb_store_miss_retired, + P4_PEBS_METRIC__dtlb_all_miss_retired, + P4_PEBS_METRIC__tagged_mispred_branch, + P4_PEBS_METRIC__mob_load_replay_retired, + P4_PEBS_METRIC__split_load_retired, + P4_PEBS_METRIC__split_store_retired, + + P4_PEBS_METRIC__max }; #endif /* PERF_EVENT_P4_H */ + diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 5e67c1532314..ed5903be26fe 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -26,7 +26,7 @@ */ #define VMALLOC_OFFSET (8 * 1024 * 1024) -#ifndef __ASSEMBLER__ +#ifndef __ASSEMBLY__ extern bool __vmalloc_start_set; /* set once high_memory is set */ #endif diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 4dab78edbad9..2b16a2ad23dc 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -1,6 +1,13 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ + #ifndef _ASM_X86_STACKTRACE_H #define _ASM_X86_STACKTRACE_H +#include <linux/uaccess.h> + extern int kstack_depth_to_print; struct thread_info; @@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) +#else +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) +#endif + +extern void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl); + +extern void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl); + +extern unsigned int code_bytes; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + +struct stack_frame_ia32 { + u32 next_frame; + u32 return_address; +}; + +static inline unsigned long caller_frame_pointer(void) +{ + struct stack_frame *frame; + + get_bp(frame); + +#ifdef CONFIG_FRAME_POINTER + frame = frame->next_frame; +#endif + + return (unsigned long)frame; +} + #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 48dcfa62ea07..fd921c3a6841 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -15,6 +15,8 @@ static inline int arch_prepare_suspend(void) { return 0; } struct saved_context { u16 es, fs, gs, ss; unsigned long cr0, cr2, cr3, cr4; + u64 misc_enable; + bool misc_enable_saved; struct desc_ptr gdt; struct desc_ptr idt; u16 ldt; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 06284f42b759..8d942afae681 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -27,6 +27,8 @@ struct saved_context { u16 ds, es, fs, gs, ss; unsigned long gs_base, gs_kernel_base, fs_base; unsigned long cr0, cr2, cr3, cr4, cr8; + u64 misc_enable; + bool misc_enable_saved; unsigned long efer; u16 gdt_pad; u16 gdt_limit; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b8fe48ee2ed9..e7f4d33c55ed 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -451,7 +451,7 @@ void stop_this_cpu(void *dummy); * * (Could use an alternative three way for this if there was one.) */ -static inline void rdtsc_barrier(void) +static __always_inline void rdtsc_barrier(void) { alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 519b54327d75..baa579c8e038 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,7 @@ struct x86_cpuinit_ops { * @set_wallclock: set time back to HW clock * @is_untracked_pat_range exclude from PAT logic * @nmi_init enable NMI on cpus + * @i8042_detect pre-detect if i8042 controller exists */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); @@ -150,6 +151,7 @@ struct x86_platform_ops { void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); + int (*i8042_detect)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 60cc4058ed5f..c05872aa3ce0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -118,7 +118,7 @@ static unsigned int gsi_to_irq(unsigned int gsi) if (gsi >= NR_IRQS_LEGACY) irq = gsi; else - irq = gsi_end + 1 + gsi; + irq = gsi_top + gsi; return irq; } @@ -129,10 +129,10 @@ static u32 irq_to_gsi(int irq) if (irq < NR_IRQS_LEGACY) gsi = isa_irq_to_gsi[irq]; - else if (irq <= gsi_end) + else if (irq < gsi_top) gsi = irq; - else if (irq <= (gsi_end + NR_IRQS_LEGACY)) - gsi = irq - gsi_end; + else if (irq < (gsi_top + NR_IRQS_LEGACY)) + gsi = irq - gsi_top; else gsi = 0xffffffff; diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2e837f5080fe..fb7a5f052e2b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, percpu_entry->states[cx->index].eax = cx->address; percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; } + + /* + * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared, + * then we should skip checking BM_STS for this C-state. + * ref: "Intel Processor Vendor-Specific ACPI Interface Specification" + */ + if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2)) + cx->bm_sts_skip = 1; + return retval; } EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b91..fcc3c61fdecc 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); + if (strncmp(str, "s4_nonvs", 8) == 0) { + pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " + "please use acpi_sleep=nonvs instead"); + acpi_nvs_nosave(); + } #endif + if (strncmp(str, "nonvs", 5) == 0) + acpi_nvs_nosave(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); str = strchr(str, ','); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 8ded418b0593..13ab720573e3 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -1,4 +1,4 @@ - .section .text.page_aligned + .section .text..page_aligned #include <linux/linkage.h> #include <asm/segment.h> #include <asm/page_types.h> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa5a1474cd18..0d20286d78c6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1487,6 +1487,7 @@ static int __attach_device(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data, *alias_data; + int ret; dev_data = get_dev_data(dev); alias_data = get_dev_data(dev_data->alias); @@ -1498,13 +1499,14 @@ static int __attach_device(struct device *dev, spin_lock(&domain->lock); /* Some sanity checks */ + ret = -EBUSY; if (alias_data->domain != NULL && alias_data->domain != domain) - return -EBUSY; + goto out_unlock; if (dev_data->domain != NULL && dev_data->domain != domain) - return -EBUSY; + goto out_unlock; /* Do real assignment */ if (dev_data->alias != dev) { @@ -1520,10 +1522,14 @@ static int __attach_device(struct device *dev, atomic_inc(&dev_data->bind); + ret = 0; + +out_unlock: + /* ready */ spin_unlock(&domain->lock); - return 0; + return ret; } /* @@ -2324,10 +2330,6 @@ int __init amd_iommu_init_dma_ops(void) iommu_detected = 1; swiotlb = 0; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3bacb4d0844c..3cc63e2b8dd4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -287,8 +287,12 @@ static u8 * __init iommu_map_mmio_space(u64 address) { u8 *ret; - if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) + if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { + pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", + address); + pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); return NULL; + } ret = ioremap_nocache(address, MMIO_REGION_LENGTH); if (ret != NULL) @@ -1314,7 +1318,7 @@ static int __init amd_iommu_init(void) ret = amd_iommu_init_dma_ops(); if (ret) - goto free; + goto free_disable; amd_iommu_init_api(); @@ -1332,9 +1336,10 @@ static int __init amd_iommu_init(void) out: return ret; -free: +free_disable: disable_iommus(); +free: amd_iommu_uninit_devices(); free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, @@ -1353,6 +1358,15 @@ free: free_unity_maps(); +#ifdef CONFIG_GART_IOMMU + /* + * We failed to initialize the AMD IOMMU - try fallback to GART + * if possible. + */ + gart_iommu_init(); + +#endif + goto out; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c02cc692985c..a96489ee6cab 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -921,7 +921,7 @@ void disable_local_APIC(void) unsigned int value; /* APIC hasn't been mapped yet */ - if (!apic_phys) + if (!x2apic_mode && !apic_phys) return; clear_local_APIC(); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 33f3563a2a52..e41ed24ab26d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -89,8 +89,8 @@ int nr_ioapics; /* IO APIC gsi routing info */ struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; -/* The last gsi number used */ -u32 gsi_end; +/* The one past the highest gsi number used */ +u32 gsi_top; /* MP IRQ source entries */ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -1035,7 +1035,7 @@ static int pin_2_irq(int idx, int apic, int pin) if (gsi >= NR_IRQS_LEGACY) irq = gsi; else - irq = gsi_end + 1 + gsi; + irq = gsi_top + gsi; } #ifdef CONFIG_X86_32 @@ -3853,7 +3853,7 @@ void __init probe_nr_irqs_gsi(void) { int nr; - nr = gsi_end + 1 + NR_IRQS_LEGACY; + nr = gsi_top + NR_IRQS_LEGACY; if (nr > nr_irqs_gsi) nr_irqs_gsi = nr; @@ -4294,8 +4294,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) */ nr_ioapic_registers[idx] = entries; - if (mp_gsi_routing[idx].gsi_end > gsi_end) - gsi_end = mp_gsi_routing[idx].gsi_end; + if (mp_gsi_routing[idx].gsi_end >= gsi_top) + gsi_top = mp_gsi_routing[idx].gsi_end + 1; printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1d3cddaa40ee..cee5263927c1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -34,7 +34,6 @@ #include <linux/compiler.h> #include <linux/dmi.h> #include <linux/slab.h> -#include <trace/events/power.h> #include <linux/acpi.h> #include <linux/io.h> @@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } - trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); - switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index ce7cde713e71..a36de5bbb622 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -368,22 +368,16 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) return -ENODEV; out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) { - ret = -ENODEV; - goto out_free; - } + if (out_obj->type != ACPI_TYPE_BUFFER) + return -ENODEV; errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) { - ret = -ENODEV; - goto out_free; - } + if (errors) + return -ENODEV; supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) { - ret = -ENODEV; - goto out_free; - } + if (!(supported & 0x1)) + return -ENODEV; out_free: kfree(output.pointer); @@ -397,13 +391,17 @@ static int __init pcc_cpufreq_probe(void) struct pcc_memory_resource *mem_resource; struct pcc_register_resource *reg_resource; union acpi_object *out_obj, *member; - acpi_handle handle, osc_handle; + acpi_handle handle, osc_handle, pcch_handle; int ret = 0; status = acpi_get_handle(NULL, "\\_SB", &handle); if (ACPI_FAILURE(status)) return -ENODEV; + status = acpi_get_handle(handle, "PCCH", &pcch_handle); + if (ACPI_FAILURE(status)) + return -ENODEV; + status = acpi_get_handle(handle, "_OSC", &osc_handle); if (ACPI_SUCCESS(status)) { ret = pcc_cpufreq_do_osc(&osc_handle); @@ -543,13 +541,13 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!pcch_virt_addr) { result = -1; - goto pcch_null; + goto out; } result = pcc_get_offset(cpu); if (result) { dprintk("init: PCCP evaluation failed\n"); - goto free; + goto out; } policy->max = policy->cpuinfo.max_freq = @@ -558,14 +556,15 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) ioread32(&pcch_hdr->minimum_frequency) * 1000; policy->cur = pcc_get_freq(cpu); + if (!policy->cur) { + dprintk("init: Unable to get current CPU frequency\n"); + result = -EINVAL; + goto out; + } + dprintk("init: policy->max is %d, policy->min is %d\n", policy->max, policy->min); - - return 0; -free: - pcc_clear_mapping(); - free_percpu(pcc_cpu_info); -pcch_null: +out: return result; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 7ec2123838e6..3e90cce3dc8b 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1023,13 +1023,12 @@ static int get_transition_latency(struct powernow_k8_data *data) } if (max_latency == 0) { /* - * Fam 11h always returns 0 as transition latency. - * This is intended and means "very fast". While cpufreq core - * and governors currently can handle that gracefully, better - * set it to 1 to avoid problems in the future. - * For all others it's a BIOS bug. + * Fam 11h and later may return 0 as transition latency. This + * is intended and means "very fast". While cpufreq core and + * governors currently can handle that gracefully, better set it + * to 1 to avoid problems in the future. */ - if (boot_cpu_data.x86 != 0x11) + if (boot_cpu_data.x86 < 0x11) printk(KERN_ERR FW_WARN PFX "Invalid zero transition " "latency\n"); max_latency = 1; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 707165dbc203..18cc42562250 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -36,6 +36,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/debugfs.h> +#include <linux/edac_mce.h> #include <asm/processor.h> #include <asm/hw_irq.h> @@ -169,6 +170,15 @@ void mce_log(struct mce *mce) entry = rcu_dereference_check_mce(mcelog.next); for (;;) { /* + * If edac_mce is enabled, it will check the error type + * and will process it, if it is a known error. + * Otherwise, the error will be sent through mcelog + * interface + */ + if (edac_mce_parse(mce)) + return; + + /* * When the buffer fills up discard new entries. * Assume that the earlier errors are the more * interesting ones: diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5db5b7d65a18..f2da20fda02d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -220,6 +220,7 @@ struct x86_pmu { struct perf_event *event); struct event_constraint *event_constraints; void (*quirks)(void); + int perfctr_second_write; int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); @@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event) * count to the generic event atomically: */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); rdmsrl(hwc->event_base + idx, new_raw_count); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -313,8 +314,8 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } else { /* * If we have a PMU initialized but no APIC @@ -885,7 +886,7 @@ static int x86_perf_event_set_period(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - s64 left = atomic64_read(&hwc->period_left); + s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0, idx = hwc->idx; @@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event) */ if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event) * The hw event starts counting from this event offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)-left); + local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base + idx, + wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + + /* + * Due to erratum on certan cpu we need + * a second write to be sure the register + * is updated properly + */ + if (x86_pmu.perfctr_second_write) { + wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + } perf_event_update_userpage(event); @@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event) * skip the schedulability test here, it will be peformed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) goto out; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event) * The events never got scheduled and ->cancel_txn will truncate * the event_list. */ - if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) + if (cpuc->group_flag & PERF_EVENT_TXN) return; x86_pmu_stop(event); @@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag |= PERF_EVENT_TXN_STARTED; + cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; + cpuc->group_flag &= ~PERF_EVENT_TXN; /* * Truncate the collected events. */ @@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - /* - * Clear out the txn count so that ->cancel_txn() which gets - * run after ->commit_txn() doesn't undo things. - */ - cpuc->n_txn = 0; + cpuc->group_flag &= ~PERF_EVENT_TXN; return 0; } @@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -#include "../dumpstack.h" - static void perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { @@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } -void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) -{ - regs->ip = ip; - /* - * perf_arch_fetch_caller_regs adds another call, we need to increment - * the skip level - */ - regs->bp = rewind_frame_pointer(skip + 1); - regs->cs = __KERNEL_CS; - /* - * We abuse bit 3 to pass exact information, see perf_misc_flags - * and the comment with PERF_EFLAGS_EXACT. - */ - regs->flags = 0; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 611df11ba15e..c2897b7b4a3b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -102,8 +102,8 @@ static const u64 amd_perfmon_event_map[] = [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, }; static u64 amd_pmu_event_map(int hw_event) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index fdbc652d3feb..214ac860ebe0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -72,6 +72,7 @@ static struct event_constraint intel_westmere_event_constraints[] = INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ EVENT_CONSTRAINT_END }; diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ae85d69644d1..107711bf0ee8 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -21,22 +21,36 @@ struct p4_event_bind { char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; -struct p4_cache_event_bind { +struct p4_pebs_bind { unsigned int metric_pebs; unsigned int metric_vert; }; -#define P4_GEN_CACHE_EVENT_BIND(name) \ - [P4_CACHE__##name] = { \ - .metric_pebs = P4_PEBS__##name, \ - .metric_vert = P4_VERT__##name, \ +/* it sets P4_PEBS_ENABLE_UOP_TAG as well */ +#define P4_GEN_PEBS_BIND(name, pebs, vert) \ + [P4_PEBS_METRIC__##name] = { \ + .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ + .metric_vert = vert, \ } -static struct p4_cache_event_bind p4_cache_event_bind_map[] = { - P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), - P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), - P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), - P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), +/* + * note we have P4_PEBS_ENABLE_UOP_TAG always set here + * + * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of + * event configuration to find out which values are to be + * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT + * resgisters + */ +static struct p4_pebs_bind p4_pebs_bind_map[] = { + P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), + P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), + P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), + P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), + P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), + P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), + P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), + P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), + P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), }; /* @@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = { }, }; -#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ +#define P4_GEN_CACHE_EVENT(event, bit, metric) \ p4_config_pack_escr(P4_ESCR_EVENT(event) | \ P4_ESCR_EMASK_BIT(event, bit)) | \ - p4_config_pack_cccr(cache_event | \ + p4_config_pack_cccr(metric | \ P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) static __initconst const u64 p4_hw_cache_event_ids @@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0, [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_CACHE__1stl_cache_load_miss_retired), + P4_PEBS_METRIC__1stl_cache_load_miss_retired), }, }, [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0, [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_CACHE__2ndl_cache_load_miss_retired), + P4_PEBS_METRIC__2ndl_cache_load_miss_retired), }, }, [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0, [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_CACHE__dtlb_load_miss_retired), + P4_PEBS_METRIC__dtlb_load_miss_retired), }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0x0, [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, - P4_CACHE__dtlb_store_miss_retired), + P4_PEBS_METRIC__dtlb_store_miss_retired), }, }, [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, - P4_CACHE__itlb_reference_hit), + P4_PEBS_METRIC__none), [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, - P4_CACHE__itlb_reference_miss), + P4_PEBS_METRIC__none), }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event) return config; } +static int p4_validate_raw_event(struct perf_event *event) +{ + unsigned int v; + + /* user data may have out-of-bound event index */ + v = p4_config_unpack_event(event->attr.config); + if (v >= ARRAY_SIZE(p4_event_bind_map)) { + pr_warning("P4 PMU: Unknown event code: %d\n", v); + return -EINVAL; + } + + /* + * it may have some screwed PEBS bits + */ + if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { + pr_warning("P4 PMU: PEBS are not supported yet\n"); + return -EINVAL; + } + v = p4_config_unpack_metric(event->attr.config); + if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { + pr_warning("P4 PMU: Unknown metric code: %d\n", v); + return -EINVAL; + } + + return 0; +} + static int p4_hw_config(struct perf_event *event) { int cpu = get_cpu(); int rc = 0; - unsigned int evnt; u32 escr, cccr; /* @@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) { - /* user data may have out-of-bound event index */ - evnt = p4_config_unpack_event(event->attr.config); - if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { - rc = -EINVAL; + rc = p4_validate_raw_event(event); + if (rc) goto out; - } /* * We don't control raw events so it's up to the caller @@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event) * on HT machine but allow HT-compatible specifics to be * passed on) * + * Note that for RAW events we allow user to use P4_CCCR_RESERVED + * bits since we keep additional info here (for cache events and etc) + * * XXX: HT wide things should check perf_paranoid_cpu() && * CAP_SYS_ADMIN */ event->hw.config |= event->attr.config & (p4_config_pack_escr(P4_ESCR_MASK_HT) | - p4_config_pack_cccr(P4_CCCR_MASK_HT)); + p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); } rc = x86_setup_perfctr(event); @@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) return overflow; } +static void p4_pmu_disable_pebs(void) +{ + /* + * FIXME + * + * It's still allowed that two threads setup same cache + * events so we can't simply clear metrics until we knew + * noone is depending on us, so we need kind of counter + * for "ReplayEvent" users. + * + * What is more complex -- RAW events, if user (for some + * reason) will pass some cache event metric with improper + * event opcode -- it's fine from hardware point of view + * but completely nonsence from "meaning" of such action. + * + * So at moment let leave metrics turned on forever -- it's + * ok for now but need to be revisited! + * + * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); + * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); + */ +} + static inline void p4_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void) continue; p4_pmu_disable_event(event); } + + p4_pmu_disable_pebs(); +} + +/* configuration must be valid */ +static void p4_pmu_enable_pebs(u64 config) +{ + struct p4_pebs_bind *bind; + unsigned int idx; + + BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); + + idx = p4_config_unpack_metric(config); + if (idx == P4_PEBS_METRIC__none) + return; + + bind = &p4_pebs_bind_map[idx]; + + (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); + (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); } static void p4_pmu_enable_event(struct perf_event *event) @@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event) int thread = p4_ht_config_thread(hwc->config); u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); unsigned int idx = p4_config_unpack_event(hwc->config); - unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); struct p4_event_bind *bind; - struct p4_cache_event_bind *bind_cache; u64 escr_addr, cccr; bind = &p4_event_bind_map[idx]; @@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event) cccr = p4_config_unpack_cccr(hwc->config); /* - * it could be Cache event so that we need to - * set metrics into additional MSRs + * it could be Cache event so we need to write metrics + * into additional MSRs */ - BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); - if (idx_cache > P4_CACHE__NONE && - idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { - bind_cache = &p4_cache_event_bind_map[idx_cache]; - (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); - (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); - } + p4_pmu_enable_pebs(hwc->config); (void)checking_wrmsrl(escr_addr, escr_conf); (void)checking_wrmsrl(hwc->config_base + hwc->idx, @@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = { .max_period = (1ULL << 39) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, + /* + * This handles erratum N15 in intel doc 249199-029, + * the counter may not be updated correctly on write + * so we need a second write operation to do the trick + * (the official workaround didn't work) + * + * the former idea is taken from OProfile code + */ + .perfctr_second_write = 1, }; static __init int p4_pmu_init(void) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c89a386930b7..6e8752c1bd52 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,7 +18,6 @@ #include <asm/stacktrace.h> -#include "dumpstack.h" int panic_on_unrecovered_nmi; int panic_on_io_nmi; diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h deleted file mode 100644 index e1a93be4fd44..000000000000 --- a/arch/x86/kernel/dumpstack.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs - */ - -#ifndef DUMPSTACK_H -#define DUMPSTACK_H - -#ifdef CONFIG_X86_32 -#define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) -#else -#define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) -#endif - -#include <linux/uaccess.h> - -extern void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); - -extern void -show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); - -extern unsigned int code_bytes; - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -struct stack_frame_ia32 { - u32 next_frame; - u32 return_address; -}; - -static inline unsigned long rewind_frame_pointer(int n) -{ - struct stack_frame *frame; - - get_bp(frame); - -#ifdef CONFIG_FRAME_POINTER - while (n--) { - if (probe_kernel_address(&frame->next_frame, frame)) - break; - } -#endif - - return (unsigned long)frame; -} - -#endif /* DUMPSTACK_H */ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 11540a189d93..0f6376ffa2d9 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,8 +16,6 @@ #include <asm/stacktrace.h> -#include "dumpstack.h" - void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 272c9f1f05f3..57a21f11c791 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -16,7 +16,6 @@ #include <asm/stacktrace.h> -#include "dumpstack.h" #define N_EXCEPTION_STACKS_END \ (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7bca3c6a02fb..0d6fc71bedb1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -729,7 +729,7 @@ static int __init e820_mark_nvs_memory(void) struct e820entry *ei = &e820.map[i]; if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); + suspend_nvs_register(ei->addr, ei->size); } return 0; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ebdb85cf2686..e5cc7e82e60d 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,6 +18,7 @@ #include <asm/apic.h> #include <asm/iommu.h> #include <asm/gart.h> +#include <asm/hpet.h> static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif +/* + * Force the read back of the CMP register in hpet_next_event() + * to work around the problem that the CMP register write seems to be + * delayed. See hpet_next_event() for details. + * + * We do this on all SMBUS incarnations for now until we have more + * information about the affected chipsets. + */ +static void __init ati_hpet_bugs(int num, int slot, int func) +{ +#ifdef CONFIG_HPET_TIMER + hpet_readback_cmp = 1; +#endif +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, + { PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, {} }; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0697ff139837..4db7c4d12ffa 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -571,8 +571,8 @@ auditsys: * masked off. */ sysret_audit: - movq %rax,%rsi /* second arg, syscall return value */ - cmpq $0,%rax /* is it < 0? */ + movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ + cmpq $0,%rsi /* is it < 0? */ setl %al /* 1 if so, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a198b7c87a12..ba390d731175 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -964,7 +964,7 @@ fs_initcall(hpet_late_init); void hpet_disable(void) { - if (is_hpet_capable()) { + if (is_hpet_capable() && hpet_virt_address) { unsigned int cfg = hpet_readl(HPET_CFG); if (hpet_legacy_int_enabled) { diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a8f1b803d2fd..a474ec37c32f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type, { /* Len */ switch (x86_len) { + case X86_BREAKPOINT_LEN_X: + *gen_len = sizeof(long); + break; case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp) info->address = bp->attr.bp_addr; + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + /* + * x86 inst breakpoints need to have a specific undefined len. + * But we still need to check userspace is not trying to setup + * an unsupported length, to get a range breakpoint for example. + */ + if (bp->attr.bp_len == sizeof(long)) { + info->len = X86_BREAKPOINT_LEN_X; + return 0; + } + default: + return -EINVAL; + } + /* Len */ switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: @@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } - /* Type */ - switch (bp->attr.bp_type) { - case HW_BREAKPOINT_W: - info->type = X86_BREAKPOINT_WRITE; - break; - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - info->type = X86_BREAKPOINT_RW; - break; - case HW_BREAKPOINT_X: - info->type = X86_BREAKPOINT_EXECUTE; - break; - default: - return -EINVAL; - } - return 0; } /* @@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { + case X86_BREAKPOINT_LEN_X: + align = sizeof(long) -1; + break; case X86_BREAKPOINT_LEN_1: align = 0; break; @@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) perf_bp_event(bp, args->regs); + /* + * Set up resume flag to avoid breakpoint recursion when + * returning back to origin. + */ + if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) + args->regs->flags |= X86_EFLAGS_RF; + rcu_read_unlock(); } /* diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 7c9f02c130f3..cafa7c80ac95 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -276,16 +276,6 @@ static struct sys_device device_i8259A = { .cls = &i8259_sysdev_class, }; -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - static void mask_8259A(void) { unsigned long flags; @@ -407,3 +397,18 @@ struct legacy_pic default_legacy_pic = { }; struct legacy_pic *legacy_pic = &default_legacy_pic; + +static int __init i8259A_init_sysfs(void) +{ + int error; + + if (legacy_pic != &default_legacy_pic) + return 0; + + error = sysdev_class_register(&i8259_sysdev_class); + if (!error) + error = sysdev_register(&device_i8259A); + return error; +} + +device_initcall(i8259A_init_sysfs); diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3a54dcb9cd0e..43e9ccf44947 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -34,7 +34,7 @@ EXPORT_SYMBOL(init_task); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data.cacheline_aligned + * so they are allowed to end up in the .data..cacheline_aligned * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 4f4af75b9482..01ab17ae2ae7 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -572,7 +572,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_STOP; } -#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP int kgdb_ll_trap(int cmd, const char *str, struct pt_regs *regs, long err, int trap, int sig) { @@ -590,7 +589,6 @@ int kgdb_ll_trap(int cmd, const char *str, return __kgdb_notify(&args, cmd); } -#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) @@ -625,6 +623,12 @@ int kgdb_arch_init(void) return register_die_notifier(&kgdb_notifier); } +static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, + struct perf_sample_data *data, struct pt_regs *regs) +{ + kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP); +} + void kgdb_arch_late(void) { int i, cpu; @@ -655,6 +659,7 @@ void kgdb_arch_late(void) for_each_online_cpu(cpu) { pevent = per_cpu_ptr(breakinfo[i].pev, cpu); pevent[0]->hw.sample_period = 1; + pevent[0]->overflow_handler = kgdb_hw_overflow_handler; if (pevent[0]->destroy != NULL) { pevent[0]->destroy = NULL; release_bp_slot(*pevent); diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 345a4b1fe144..1bfb6cf4dd55 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to) } /* - * Check for the REX prefix which can only exist on X86_64 - * X86_32 always returns 0 + * Skip the prefixes of the instruction. */ -static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) +static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) { + insn_attr_t attr; + + attr = inat_get_opcode_attribute((insn_byte_t)*insn); + while (inat_is_legacy_prefix(attr)) { + insn++; + attr = inat_get_opcode_attribute((insn_byte_t)*insn); + } #ifdef CONFIG_X86_64 - if ((*insn & 0xf0) == 0x40) - return 1; + if (inat_is_rex_prefix(attr)) + insn++; #endif - return 0; + return insn; } /* @@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr) */ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) { + /* Skip prefixes */ + insn = skip_prefixes(insn); + switch (*insn) { case 0xfa: /* cli */ case 0xfb: /* sti */ @@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) return 1; } - /* - * on X86_64, 0x40-0x4f are REX prefixes so we need to look - * at the next byte instead.. but of course not recurse infinitely - */ - if (is_REX_prefix(insn)) - return is_IF_modifier(++insn); - return 0; } @@ -640,8 +642,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) /* Skip cs, ip, orig_ax and gs. */ \ " subl $16, %esp\n" \ " pushl %fs\n" \ - " pushl %ds\n" \ " pushl %es\n" \ + " pushl %ds\n" \ " pushl %eax\n" \ " pushl %ebp\n" \ " pushl %edi\n" \ @@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p, unsigned long orig_ip = (unsigned long)p->addr; kprobe_opcode_t *insn = p->ainsn.insn; - /*skip the REX prefix*/ - if (is_REX_prefix(insn)) - insn++; + /* Skip prefixes */ + insn = skip_prefixes(insn); regs->flags &= ~X86_EFLAGS_TF; switch (*insn) { diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5ae5d2426edf..d86dbf7e54be 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -123,7 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->apicid, m->apicver, m->apicaddr); - mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); } static void print_MP_intsrc_info(struct mpc_intsrc *m) diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index e796448f0eb5..5915e0b33303 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c @@ -216,6 +216,12 @@ static void __init mrst_setup_boot_clock(void) setup_boot_APIC_clock(); }; +/* MID systems don't have i8042 controller */ +static int mrst_i8042_detect(void) +{ + return 0; +} + /* * Moorestown specific x86_init function overrides and early setup * calls. @@ -233,6 +239,7 @@ void __init x86_mrst_early_setup(void) x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; x86_platform.calibrate_tsc = mrst_calibrate_tsc; + x86_platform.i8042_detect = mrst_i8042_detect; x86_init.pci.init = pci_mrst_init; x86_init.pci.fixup_irqs = x86_init_noop; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fb99f7edb341..078d4ec1a9d9 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -103,11 +103,16 @@ int use_calgary __read_mostly = 0; #define PMR_SOFTSTOPFAULT 0x40000000 #define PMR_HARDSTOP 0x20000000 -#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */ -#define MAX_NUM_CHASSIS 8 /* max number of chassis */ -/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */ -#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2) -#define PHBS_PER_CALGARY 4 +/* + * The maximum PHB bus number. + * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384 + * x3950M2: 4 chassis, 48 PHBs per chassis = 192 + * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256 + * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128 + */ +#define MAX_PHB_BUS_NUM 256 + +#define PHBS_PER_CALGARY 4 /* register offsets in Calgary's internal register space */ static const unsigned long tar_offsets[] = { @@ -1051,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev) struct iommu_table *tbl; int ret; - BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32f..787572d43d9c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -371,7 +371,7 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { - trace_power_start(POWER_CSTATE, 1); + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1); + trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -457,7 +457,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) static void mwait_idle(void) { if (!need_resched()) { - trace_power_start(POWER_CSTATE, 1); + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -478,7 +478,7 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0); + trace_power_start(POWER_CSTATE, 0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8d128783af47..96586c3cbbbf 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,6 +57,8 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <trace/events/power.h> + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); /* @@ -111,6 +113,8 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); + + trace_power_end(smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c2422a99f1f..3d9ea531ddd1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,6 +51,8 @@ #include <asm/syscalls.h> #include <asm/debugreg.h> +#include <trace/events/power.h> + asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); @@ -138,6 +140,9 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); + + trace_power_end(smp_processor_id()); + /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e72d3fc6547d..939b9e98245f 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -498,15 +498,10 @@ void force_hpet_resume(void) * See erratum #27 (Misinterpreted MSI Requests May Result in * Corrupted LPC DMA Data) in AMD Publication #46837, * "SB700 Family Product Errata", Rev. 1.0, March 2010. - * - * Also force the read back of the CMP register in hpet_next_event() - * to work around the problem that the CMP register write seems to be - * delayed. See hpet_next_event() for details. */ static void force_disable_hpet_msi(struct pci_dev *unused) { hpet_msi_disable = 1; - hpet_readback_cmp = 1; } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 8e1aac86b50c..e3af342fe83a 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -228,6 +228,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), }, }, + { /* Handle problems with rebooting on Dell T7400's */ + .callback = set_bios_reboot, + .ident = "Dell Precision T7400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"), + }, + }, { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, .ident = "HP Compaq Laptop", diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index a867940a6dfc..a60df9ae6454 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -21,12 +21,6 @@ #include <asm/cpu.h> #include <asm/stackprotector.h> -#ifdef CONFIG_DEBUG_PER_CPU_MAPS -# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) -#else -# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) -#endif - DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); @@ -244,10 +238,19 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); + /* + * Ensure that the boot cpu numa_node is correct when the boot + * cpu is on a node that doesn't have memory installed. + * Also cpu_up() will call cpu_to_node() for APs when + * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set + * up later with c_init aka intel_init/amd_init. + * So set them all (boot cpu and all APs). + */ + set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif #endif /* - * Up to this point, the boot CPU has been using .data.init + * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) @@ -263,14 +266,6 @@ void __init setup_per_cpu_areas(void) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) - /* - * make sure boot cpu numa_node is right, when boot cpu is on the - * node that doesn't have mem installed - */ - set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id)); -#endif - /* Setup node to cpumask map */ setup_node_to_cpumask_map(); diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 7ded57896c0a..cb22acf3ed09 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c @@ -93,7 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) pentry = (struct sfi_apic_table_entry *)sb->pentry; for (i = 0; i < num; i++) { - mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); + mp_register_ioapic(i, pentry->phys_addr, gsi_top); pentry++; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 37462f1ddba5..c4f33b2e77d6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -686,7 +686,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) static void __cpuinit announce_cpu(int cpu, int apicid) { static int current_node = -1; - int node = cpu_to_node(cpu); + int node = early_cpu_to_node(cpu); if (system_state == SYSTEM_BOOTING) { if (node != current_node) { diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 922eefbb3f6c..b53c525368a7 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name) return 0; } -static void save_stack_address(void *data, unsigned long addr, int reliable) +static void +__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) { struct stack_trace *trace = data; +#ifdef CONFIG_FRAME_POINTER if (!reliable) return; +#endif + if (nosched && in_sched_functions(addr)) + return; if (trace->skip > 0) { trace->skip--; return; @@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable) trace->entries[trace->nr_entries++] = addr; } +static void save_stack_address(void *data, unsigned long addr, int reliable) +{ + return __save_stack_address(data, addr, reliable, false); +} + static void save_stack_address_nosched(void *data, unsigned long addr, int reliable) { - struct stack_trace *trace = (struct stack_trace *)data; - if (!reliable) - return; - if (in_sched_functions(addr)) - return; - if (trace->skip > 0) { - trace->skip--; - return; - } - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = addr; + return __save_stack_address(data, addr, reliable, true); } static const struct stacktrace_ops save_stack_ops = { @@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ -struct stack_frame { +struct stack_frame_user { const void __user *next_fp; unsigned long ret_addr; }; -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +static int +copy_stack_frame(const void __user *fp, struct stack_frame_user *frame) { int ret; @@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace) trace->entries[trace->nr_entries++] = regs->ip; while (trace->nr_entries < trace->max_entries) { - struct stack_frame frame; + struct stack_frame_user frame; frame.next_fp = NULL; frame.ret_addr = 0; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 142d70c74b02..725ef4d17cd5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -526,6 +526,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + int user_icebp = 0; unsigned long dr6; int si_code; @@ -534,6 +535,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + if (!dr6 && user_mode(regs)) + user_icebp = 1; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; @@ -575,7 +584,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) regs->flags &= ~X86_EFLAGS_TF; } si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 2cc249718c46..d0bb52296fa3 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -97,7 +97,7 @@ SECTIONS HEAD_TEXT #ifdef CONFIG_X86_32 . = ALIGN(PAGE_SIZE); - *(.text.page_aligned) + *(.text..page_aligned) #endif . = ALIGN(8); _stext = .; @@ -305,7 +305,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .bss : AT(ADDR(.bss) - LOAD_OFFSET) { __bss_start = .; - *(.bss.page_aligned) + *(.bss..page_aligned) *(.bss) . = ALIGN(4); __bss_stop = .; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 61a1e8c7e19f..cd6da6bf3eca 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -5,6 +5,7 @@ */ #include <linux/init.h> #include <linux/ioport.h> +#include <linux/module.h> #include <asm/bios_ebda.h> #include <asm/paravirt.h> @@ -85,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { }; static void default_nmi_init(void) { }; +static int default_i8042_detect(void) { return 1; }; struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, @@ -92,5 +94,8 @@ struct x86_platform_ops x86_platform = { .set_wallclock = mach_set_rtc_mmss, .iommu_shutdown = iommu_shutdown_noop, .is_untracked_pat_range = is_ISA_range, - .nmi_init = default_nmi_init + .nmi_init = default_nmi_init, + .i8042_detect = default_i8042_detect }; + +EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 81563e76e28f..b1ed0a1a5913 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_WRITABLE_MASK; + if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) + spte &= ~PT_USER_MASK; + /* * Optimization: for pte sync, if spte was writable the hash * lookup is unnecessary (and expensive). Write protection @@ -1870,10 +1873,14 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, child = page_header(pte & PT64_BASE_ADDR_MASK); mmu_page_remove_parent_pte(child, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else if (pfn != spte_to_pfn(*sptep)) { pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*sptep), pfn); rmap_remove(vcpu->kvm, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else was_rmapped = 1; } @@ -2919,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) return kvm_mmu_zap_page(kvm, page) + 1; } -static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) +static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { struct kvm *kvm; struct kvm *kvm_freed = NULL; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 89d66ca4d87c..2331bdc2b549 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -342,6 +342,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, /* advance table_gfn when emulating 1gb pages with 4k */ if (delta == 0) table_gfn += PT_INDEX(addr, level); + access &= gw->pte_access; } else { direct = 0; table_gfn = gw->table_gfn[level - 2]; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 96dc232bfc56..ce438e0fdd26 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -28,6 +28,7 @@ #include <linux/ftrace_event.h> #include <linux/slab.h> +#include <asm/tlbflush.h> #include <asm/desc.h> #include <asm/virtext.h> @@ -56,6 +57,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +static bool erratum_383_found __read_mostly; + static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, @@ -374,6 +377,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, svm->vmcb->control.event_inj_err = error_code; } +static void svm_init_erratum_383(void) +{ + u32 low, high; + int err; + u64 val; + + /* Only Fam10h is affected */ + if (boot_cpu_data.x86 != 0x10) + return; + + /* Use _safe variants to not break nested virtualization */ + val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); + if (err) + return; + + val |= (1ULL << 47); + + low = lower_32_bits(val); + high = upper_32_bits(val); + + native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); + + erratum_383_found = true; +} + static int has_svm(void) { const char *msg; @@ -429,6 +457,8 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + svm_init_erratum_383(); + return 0; } @@ -1410,8 +1440,59 @@ static int nm_interception(struct vcpu_svm *svm) return 1; } -static int mc_interception(struct vcpu_svm *svm) +static bool is_erratum_383(void) { + int err, i; + u64 value; + + if (!erratum_383_found) + return false; + + value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); + if (err) + return false; + + /* Bit 62 may or may not be set for this mce */ + value &= ~(1ULL << 62); + + if (value != 0xb600000000010015ULL) + return false; + + /* Clear MCi_STATUS registers */ + for (i = 0; i < 6; ++i) + native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); + + value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); + if (!err) { + u32 low, high; + + value &= ~(1ULL << 2); + low = lower_32_bits(value); + high = upper_32_bits(value); + + native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); + } + + /* Flush tlb to evict multi-match entries */ + __flush_tlb_all(); + + return true; +} + +static void svm_handle_mce(struct vcpu_svm *svm) +{ + if (is_erratum_383()) { + /* + * Erratum 383 triggered. Guest state is corrupt so kill the + * guest. + */ + pr_err("KVM: Guest triggered AMD Erratum 383\n"); + + set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); + + return; + } + /* * On an #MC intercept the MCE handler is not called automatically in * the host. So do it by hand here. @@ -1420,6 +1501,11 @@ static int mc_interception(struct vcpu_svm *svm) "int $0x12\n"); /* not sure if we ever come back to this point */ + return; +} + +static int mc_interception(struct vcpu_svm *svm) +{ return 1; } @@ -3088,6 +3174,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); } + + /* + * We need to handle MC intercepts here before the vcpu has a chance to + * change the physical cpu + */ + if (unlikely(svm->vmcb->control.exit_code == + SVM_EXIT_EXCP_BASE + MC_VECTOR)) + svm_handle_mce(svm); } #undef R diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 859a01a07dbf..ee03679efe78 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1744,18 +1744,15 @@ static void enter_lmode(struct kvm_vcpu *vcpu) (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); } - vcpu->arch.efer |= EFER_LMA; - vmx_set_efer(vcpu, vcpu->arch.efer); + vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); } static void exit_lmode(struct kvm_vcpu *vcpu) { - vcpu->arch.efer &= ~EFER_LMA; - vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) & ~VM_ENTRY_IA32E_MODE); - vmx_set_efer(vcpu, vcpu->arch.efer); + vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05d571f6f196..7fa89c39c64f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1562,7 +1562,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, r = -ENOMEM; size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; - entries = vmalloc(size); + entries = kmalloc(size, GFP_KERNEL); if (!entries) goto out; @@ -1581,7 +1581,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, r = n; out_free: - vfree(entries); + kfree(entries); out: return r; } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 550df481accd..787c52ca49c3 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -3,12 +3,6 @@ #include <linux/module.h> #include <linux/bootmem.h> -#ifdef CONFIG_DEBUG_PER_CPU_MAPS -# define DBG(x...) printk(KERN_DEBUG x) -#else -# define DBG(x...) -#endif - /* * Which logical CPUs are on which nodes */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index acc15b23b743..64121a18b8cb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -302,7 +302,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return -EINVAL; } - new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + new = kzalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) return -ENOMEM; diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index f537087bb740..8acaddd0fb21 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -34,8 +34,7 @@ * memtype_lock protects the rbtree. */ -static void memtype_rb_augment_cb(struct rb_node *node); -static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); +static struct rb_root memtype_rbroot = RB_ROOT; static int is_node_overlap(struct memtype *node, u64 start, u64 end) { @@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node) } /* Update 'subtree_max_end' for a node, based on node and its children */ -static void update_node_max_end(struct rb_node *node) +static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) { struct memtype *data; u64 max_end, child_max_end; @@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node) data->subtree_max_end = max_end; } -/* Update 'subtree_max_end' for a node and all its ancestors */ -static void update_path_max_end(struct rb_node *node) -{ - u64 old_max_end, new_max_end; - - while (node) { - struct memtype *data = container_of(node, struct memtype, rb); - - old_max_end = data->subtree_max_end; - update_node_max_end(node); - new_max_end = data->subtree_max_end; - - if (new_max_end == old_max_end) - break; - - node = rb_parent(node); - } -} - /* Find the first (lowest start addr) overlapping range from rb tree */ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, u64 start, u64 end) @@ -190,12 +170,6 @@ failure: return -EBUSY; } -static void memtype_rb_augment_cb(struct rb_node *node) -{ - if (node) - update_path_max_end(node); -} - static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) { struct rb_node **node = &(root->rb_node); @@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) rb_link_node(&newdata->rb, parent, node); rb_insert_color(&newdata->rb, root); + rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); } int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) @@ -226,6 +201,7 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) if (ret_type) new->type = *ret_type; + new->subtree_max_end = new->end; memtype_rb_insert(&memtype_rbroot, new); } return err; @@ -233,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) struct memtype *rbt_memtype_erase(u64 start, u64 end) { + struct rb_node *deepest; struct memtype *data; data = memtype_rb_exact_match(&memtype_rbroot, start, end); if (!data) goto out; + deepest = rb_augment_erase_begin(&data->rb); rb_erase(&data->rb, &memtype_rbroot); + rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); out: return data; } diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index 308e32570d84..38e6d174c497 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = { static unsigned int reg_rop[] = { 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F }; -static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB }; static unsigned int imm_wop[] = { 0xC6, 0xC7 }; /* IA32 Manual 3, 3-432*/ -static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; +static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA }; static unsigned int rw32[] = { - 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F + 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB }; -static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; +static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA }; static unsigned int mw16[] = { 0xB70F, 0xBF0F }; -static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; +static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB }; static unsigned int mw64[] = {}; #else /* not __i386__ */ static unsigned char prefix_codes[] = { @@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = { static unsigned int reg_rop[] = { 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F }; -static unsigned int reg_wop[] = { 0x88, 0x89 }; +static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB }; static unsigned int imm_wop[] = { 0xC6, 0xC7 }; -static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; +static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA }; static unsigned int rw32[] = { - 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F + 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB }; /* 8 bit only */ -static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; +static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA }; /* 16 bit only */ static unsigned int mw16[] = { 0xB70F, 0xBF0F }; /* 16 or 32 bit */ static unsigned int mw32[] = { 0xC7 }; /* 16, 32 or 64 bit */ -static unsigned int mw64[] = { 0x89, 0x8B }; +static unsigned int mw64[] = { 0x89, 0x8B, 0xAB }; #endif /* not __i386__ */ struct prefix_bits { @@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs) unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) { unsigned int opcode; - unsigned char mod_rm; int reg; unsigned char *p; struct prefix_bits prf; @@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) goto err; do_work: - mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); + /* for STOS, source register is fixed */ + if (opcode == 0xAA || opcode == 0xAB) { + reg = arg_AX; + } else { + unsigned char mod_rm = *p; + reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); + } switch (get_ins_reg_width(ins_addr)) { case 1: return *get_reg_w8(reg, prf.rex, regs); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 97da2ba9344b..55253095be84 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -96,6 +96,7 @@ EXPORT_SYMBOL(pcibios_align_resource); * the fact the PCI specs explicitly allow address decoders to be * shared between expansion ROMs and other resource regions, it's * at least dangerous) + * - bad resource sizes or overlaps with other regions * * Our solution: * (1) Allocate resources for all buses behind PCI-to-PCI bridges. @@ -136,6 +137,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) * child resource allocations in this * range. */ + r->start = r->end = 0; r->flags = 0; } } @@ -182,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass) idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { /* We'll assign a new address later */ + dev->fw_addr[idx] = r->start; r->end -= r->start; r->start = 0; } diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 0db5eaf54560..8d460eaf524f 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -11,28 +11,14 @@ */ static void __devinit pcibios_fixup_peer_bridges(void) { - int n, devfn; - long node; + int n; if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff) return; DBG("PCI: Peer bridge fixup\n"); - for (n=0; n <= pcibios_last_bus; n++) { - u32 l; - if (pci_find_bus(0, n)) - continue; - node = get_mp_bus_to_node(n); - for (devfn = 0; devfn < 256; devfn += 8) { - if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && - l != 0x0000 && l != 0xffff) { - DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_on_node(n, &pci_root_ops, node); - break; - } - } - } + for (n=0; n <= pcibios_last_bus; n++) + pcibios_scan_specific_bus(n); } int __init pci_legacy_init(void) @@ -50,6 +36,28 @@ int __init pci_legacy_init(void) return 0; } +void pcibios_scan_specific_bus(int busn) +{ + int devfn; + long node; + u32 l; + + if (pci_find_bus(0, busn)) + return; + + node = get_mp_bus_to_node(busn); + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); + printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); + pci_scan_bus_on_node(busn, &pci_root_ops, node); + return; + } + } +} +EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus); + int __init pci_subsys_init(void) { /* diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 7ef3a2735df3..cb29191cee58 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c @@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) devfn, pos, 4, &pcie_cap)) return 0; - if (pcie_cap == 0xffffffff) - return 0; + if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 || + PCI_EXT_CAP_ID(pcie_cap) == 0xffff) + break; if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) { raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number, @@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn) return pos; } - pos = pcie_cap >> 20; + pos = PCI_EXT_CAP_NEXT(pcie_cap); } return 0; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0a979f3e5b8a..1290ba54b350 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,6 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif + ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, + &ctxt->misc_enable); } /* Needed by apm.c */ @@ -152,6 +154,8 @@ static void fix_processor_context(void) */ static void __restore_processor_state(struct saved_context *ctxt) { + if (ctxt->misc_enable_saved) + wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); /* * control registers */ diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 987267f79bf5..a9c661108034 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -60,6 +60,6 @@ static void xen_vcpu_notify_restore(void *data) void xen_arch_resume(void) { - smp_call_function(xen_vcpu_notify_restore, - (void *)CLOCK_EVT_NOTIFY_RESUME, 1); + on_each_cpu(xen_vcpu_notify_restore, + (void *)CLOCK_EVT_NOTIFY_RESUME, 1); } |