summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/.gitignore3
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c2
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S4
-rw-r--r--arch/x86/boot/video-vga.c3
-rw-r--r--arch/x86/include/asm/cache.h2
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/include/asm/io_apic.h4
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/perf_event.h18
-rw-r--r--arch/x86/include/asm/perf_event_p4.h99
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/stacktrace.h49
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h2
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/acpi/cstate.c9
-rw-r--r--arch/x86/kernel/acpi/sleep.c9
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/amd_iommu.c16
-rw-r--r--arch/x86/kernel/amd_iommu_init.c20
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c41
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c11
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c156
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/i8259.c25
-rw-r--r--arch/x86/kernel/init_task.c2
-rw-r--r--arch/x86/kernel/kgdb.c9
-rw-r--r--arch/x86/kernel/kprobes.c35
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/mrst.c7
-rw-r--r--arch/x86/kernel/pci-calgary_64.c17
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/quirks.c5
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup_percpu.c25
-rw-r--r--arch/x86/kernel/sfi.c2
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/x86/kernel/traps.c11
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kernel/x86_init.c7
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/svm.c96
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/mm/numa.c6
-rw-r--r--arch/x86/mm/pat.c2
-rw-r--r--arch/x86/mm/pat_rbtree.c35
-rw-r--r--arch/x86/mm/pf_in.c30
-rw-r--r--arch/x86/pci/i386.c3
-rw-r--r--arch/x86/pci/legacy.c42
-rw-r--r--arch/x86/pci/mrst.c7
-rw-r--r--arch/x86/power/cpu.c4
-rw-r--r--arch/x86/xen/suspend.c4
77 files changed, 734 insertions, 430 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
new file mode 100644
index 000000000000..028079065af6
--- /dev/null
+++ b/arch/x86/.gitignore
@@ -0,0 +1,3 @@
+boot/compressed/vmlinux
+tools/test_get_len
+
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index bcbd36c41432..5c228129d175 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -77,7 +77,7 @@ int main(int argc, char *argv[])
offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */
offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
- printf(".section \".rodata.compressed\",\"a\",@progbits\n");
+ printf(".section \".rodata..compressed\",\"a\",@progbits\n");
printf(".globl z_input_len\n");
printf("z_input_len = %lu\n", ilen);
printf(".globl z_output_len\n");
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index a6f1a59a5b0c..5ddabceee124 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -26,8 +26,8 @@ SECTIONS
HEAD_TEXT
_ehead = . ;
}
- .rodata.compressed : {
- *(.rodata.compressed)
+ .rodata..compressed : {
+ *(.rodata..compressed)
}
.text : {
_text = .; /* Text */
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index ed7aeff786b2..45bc9402aa49 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -41,13 +41,12 @@ static __videocard video_vga;
static u8 vga_set_basic_mode(void)
{
struct biosregs ireg, oreg;
- u16 ax;
u8 mode;
initregs(&ireg);
/* Query current mode */
- ax = 0x0f00;
+ ireg.ax = 0x0f00;
intcall(0x10, &ireg, &oreg);
mode = oreg.al;
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 2f9047cfaaca..48f99f15452e 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -7,7 +7,7 @@
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 942255310e6a..528a11e8d3e3 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
#include <linux/list.h>
/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_X 0x00
#define X86_BREAKPOINT_LEN_1 0x40
#define X86_BREAKPOINT_LEN_2 0x44
#define X86_BREAKPOINT_LEN_4 0x4c
-#define X86_BREAKPOINT_LEN_EXECUTE 0x40
#ifdef CONFIG_X86_64
#define X86_BREAKPOINT_LEN_8 0x48
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 63cb4096c3dc..9cb2edb87c2f 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -183,7 +183,7 @@ struct mp_ioapic_gsi{
u32 gsi_end;
};
extern struct mp_ioapic_gsi mp_gsi_routing[];
-extern u32 gsi_end;
+extern u32 gsi_top;
int mp_find_ioapic(u32 gsi);
int mp_find_ioapic_pin(int ioapic, u32 gsi);
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
@@ -197,7 +197,7 @@ static const int timer_through_8259 = 0;
static inline void ioapic_init_mappings(void) { }
static inline void ioapic_insert_resources(void) { }
static inline void probe_nr_irqs_gsi(void) { }
-#define gsi_end (NR_IRQS_LEGACY - 1)
+#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
struct io_apic_irq_attr;
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b49d8ca228f6..8c7ae4318629 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -110,6 +110,7 @@
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 8d8797eae5d7..cd2a31dc5fb8 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -53,6 +53,8 @@ extern int pcibios_last_bus;
extern struct pci_bus *pci_root_bus;
extern struct pci_ops pci_root_ops;
+void pcibios_scan_specific_bus(int busn);
+
/* pci-irq.c */
struct irq_info {
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0797e748d280..cd28f9ad910d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -77,6 +77,7 @@ do { \
if (0) { \
pto_T__ pto_tmp__; \
pto_tmp__ = (val); \
+ (void)pto_tmp__; \
} \
switch (sizeof(var)) { \
case 1: \
@@ -115,6 +116,7 @@ do { \
if (0) { \
pao_T__ pao_tmp__; \
pao_tmp__ = (val); \
+ (void)pao_tmp__; \
} \
switch (sizeof(var)) { \
case 1: \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e0..6e742cc4251b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -68,8 +68,9 @@ union cpuid10_eax {
union cpuid10_edx {
struct {
- unsigned int num_counters_fixed:4;
- unsigned int reserved:28;
+ unsigned int num_counters_fixed:5;
+ unsigned int bit_width_fixed:8;
+ unsigned int reserved:19;
} split;
unsigned int full;
};
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->ip = (__ip); \
+ (regs)->bp = caller_frame_pointer(); \
+ (regs)->cs = __KERNEL_CS; \
+ regs->flags = 0; \
+}
+
#else
static inline void init_hw_perf_events(void) { }
static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 64a8ebff06fc..def500776b16 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -19,7 +19,6 @@
#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
#define ARCH_P4_MAX_CCCR (18)
-#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
#define P4_ESCR_EVENT_MASK 0x7e000000U
#define P4_ESCR_EVENT_SHIFT 25
@@ -71,10 +70,6 @@
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
-/* Custom bits in reerved CCCR area */
-#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
-
-
/* Non HT mask */
#define P4_CCCR_MASK \
(P4_CCCR_OVF | \
@@ -106,8 +101,7 @@
* ESCR and CCCR but rather an only packed value should
* be unpacked and written to a proper addresses
*
- * the base idea is to pack as much info as
- * possible
+ * the base idea is to pack as much info as possible
*/
#define p4_config_pack_escr(v) (((u64)(v)) << 32)
#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
@@ -130,8 +124,6 @@
t; \
})
-#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
-
#define P4_CONFIG_HT_SHIFT 63
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
return escr;
}
+/*
+ * This are the events which should be used in "Event Select"
+ * field of ESCR register, they are like unique keys which allow
+ * the kernel to determinate which CCCR and COUNTER should be
+ * used to track an event
+ */
enum P4_EVENTS {
P4_EVENT_TC_DELIVER_MODE,
P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
* a caller should use P4_ESCR_EMASK_NAME helper to
* pick the EventMask needed, for example
*
- * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
+ * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
*/
enum P4_ESCR_EMASKS {
P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
};
-/* P4 PEBS: stale for a while */
-#define P4_PEBS_METRIC_MASK 0x00001fffU
-#define P4_PEBS_UOB_TAG 0x01000000U
-#define P4_PEBS_ENABLE 0x02000000U
-
-/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
-#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001
-#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002
-#define P4_PEBS__dtlb_load_miss_retired 0x3000004
-#define P4_PEBS__dtlb_store_miss_retired 0x3000004
-#define P4_PEBS__dtlb_all_miss_retired 0x3000004
-#define P4_PEBS__tagged_mispred_branch 0x3018000
-#define P4_PEBS__mob_load_replay_retired 0x3000200
-#define P4_PEBS__split_load_retired 0x3000400
-#define P4_PEBS__split_store_retired 0x3000400
-
-#define P4_VERT__1stl_cache_load_miss_retired 0x0000001
-#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001
-#define P4_VERT__dtlb_load_miss_retired 0x0000001
-#define P4_VERT__dtlb_store_miss_retired 0x0000002
-#define P4_VERT__dtlb_all_miss_retired 0x0000003
-#define P4_VERT__tagged_mispred_branch 0x0000010
-#define P4_VERT__mob_load_replay_retired 0x0000001
-#define P4_VERT__split_load_retired 0x0000001
-#define P4_VERT__split_store_retired 0x0000002
-
-enum P4_CACHE_EVENTS {
- P4_CACHE__NONE,
-
- P4_CACHE__1stl_cache_load_miss_retired,
- P4_CACHE__2ndl_cache_load_miss_retired,
- P4_CACHE__dtlb_load_miss_retired,
- P4_CACHE__dtlb_store_miss_retired,
- P4_CACHE__itlb_reference_hit,
- P4_CACHE__itlb_reference_miss,
-
- P4_CACHE__MAX
+/*
+ * P4 PEBS specifics (Replay Event only)
+ *
+ * Format (bits):
+ * 0-6: metric from P4_PEBS_METRIC enum
+ * 7 : reserved
+ * 8 : reserved
+ * 9-11 : reserved
+ *
+ * Note we have UOP and PEBS bits reserved for now
+ * just in case if we will need them once
+ */
+#define P4_PEBS_CONFIG_ENABLE (1 << 7)
+#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
+#define P4_PEBS_CONFIG_MASK 0xff
+
+/*
+ * mem: Only counters MSR_IQ_COUNTER4 (16) and
+ * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
+ */
+#define P4_PEBS_ENABLE 0x02000000U
+#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
+
+#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
+#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
+
+#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask))
+
+enum P4_PEBS_METRIC {
+ P4_PEBS_METRIC__none,
+
+ P4_PEBS_METRIC__1stl_cache_load_miss_retired,
+ P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
+ P4_PEBS_METRIC__dtlb_load_miss_retired,
+ P4_PEBS_METRIC__dtlb_store_miss_retired,
+ P4_PEBS_METRIC__dtlb_all_miss_retired,
+ P4_PEBS_METRIC__tagged_mispred_branch,
+ P4_PEBS_METRIC__mob_load_replay_retired,
+ P4_PEBS_METRIC__split_load_retired,
+ P4_PEBS_METRIC__split_store_retired,
+
+ P4_PEBS_METRIC__max
};
#endif /* PERF_EVENT_P4_H */
+
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 5e67c1532314..ed5903be26fe 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -26,7 +26,7 @@
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
-#ifndef __ASSEMBLER__
+#ifndef __ASSEMBLY__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 4dab78edbad9..2b16a2ad23dc 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -1,6 +1,13 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
#ifndef _ASM_X86_STACKTRACE_H
#define _ASM_X86_STACKTRACE_H
+#include <linux/uaccess.h>
+
extern int kstack_depth_to_print;
struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data);
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+
+struct stack_frame_ia32 {
+ u32 next_frame;
+ u32 return_address;
+};
+
+static inline unsigned long caller_frame_pointer(void)
+{
+ struct stack_frame *frame;
+
+ get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+ frame = frame->next_frame;
+#endif
+
+ return (unsigned long)frame;
+}
+
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 48dcfa62ea07..fd921c3a6841 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -15,6 +15,8 @@ static inline int arch_prepare_suspend(void) { return 0; }
struct saved_context {
u16 es, fs, gs, ss;
unsigned long cr0, cr2, cr3, cr4;
+ u64 misc_enable;
+ bool misc_enable_saved;
struct desc_ptr gdt;
struct desc_ptr idt;
u16 ldt;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 06284f42b759..8d942afae681 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -27,6 +27,8 @@ struct saved_context {
u16 ds, es, fs, gs, ss;
unsigned long gs_base, gs_kernel_base, fs_base;
unsigned long cr0, cr2, cr3, cr4, cr8;
+ u64 misc_enable;
+ bool misc_enable_saved;
unsigned long efer;
u16 gdt_pad;
u16 gdt_limit;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index b8fe48ee2ed9..e7f4d33c55ed 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -451,7 +451,7 @@ void stop_this_cpu(void *dummy);
*
* (Could use an alternative three way for this if there was one.)
*/
-static inline void rdtsc_barrier(void)
+static __always_inline void rdtsc_barrier(void)
{
alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 519b54327d75..baa579c8e038 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,7 @@ struct x86_cpuinit_ops {
* @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
* @nmi_init enable NMI on cpus
+ * @i8042_detect pre-detect if i8042 controller exists
*/
struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
@@ -150,6 +151,7 @@ struct x86_platform_ops {
void (*iommu_shutdown)(void);
bool (*is_untracked_pat_range)(u64 start, u64 end);
void (*nmi_init)(void);
+ int (*i8042_detect)(void);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 60cc4058ed5f..c05872aa3ce0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -118,7 +118,7 @@ static unsigned int gsi_to_irq(unsigned int gsi)
if (gsi >= NR_IRQS_LEGACY)
irq = gsi;
else
- irq = gsi_end + 1 + gsi;
+ irq = gsi_top + gsi;
return irq;
}
@@ -129,10 +129,10 @@ static u32 irq_to_gsi(int irq)
if (irq < NR_IRQS_LEGACY)
gsi = isa_irq_to_gsi[irq];
- else if (irq <= gsi_end)
+ else if (irq < gsi_top)
gsi = irq;
- else if (irq <= (gsi_end + NR_IRQS_LEGACY))
- gsi = irq - gsi_end;
+ else if (irq < (gsi_top + NR_IRQS_LEGACY))
+ gsi = irq - gsi_top;
else
gsi = 0xffffffff;
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 2e837f5080fe..fb7a5f052e2b 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -145,6 +145,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
percpu_entry->states[cx->index].eax = cx->address;
percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
}
+
+ /*
+ * For _CST FFH on Intel, if GAS.access_size bit 1 is cleared,
+ * then we should skip checking BM_STS for this C-state.
+ * ref: "Intel Processor Vendor-Specific ACPI Interface Specification"
+ */
+ if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2))
+ cx->bm_sts_skip = 1;
+
return retval;
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..fcc3c61fdecc 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -157,9 +157,14 @@ static int __init acpi_sleep_setup(char *str)
#ifdef CONFIG_HIBERNATION
if (strncmp(str, "s4_nohwsig", 10) == 0)
acpi_no_s4_hw_signature();
- if (strncmp(str, "s4_nonvs", 8) == 0)
- acpi_s4_no_nvs();
+ if (strncmp(str, "s4_nonvs", 8) == 0) {
+ pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, "
+ "please use acpi_sleep=nonvs instead");
+ acpi_nvs_nosave();
+ }
#endif
+ if (strncmp(str, "nonvs", 5) == 0)
+ acpi_nvs_nosave();
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
str = strchr(str, ',');
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 8ded418b0593..13ab720573e3 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,4 +1,4 @@
- .section .text.page_aligned
+ .section .text..page_aligned
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fa5a1474cd18..0d20286d78c6 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1487,6 +1487,7 @@ static int __attach_device(struct device *dev,
struct protection_domain *domain)
{
struct iommu_dev_data *dev_data, *alias_data;
+ int ret;
dev_data = get_dev_data(dev);
alias_data = get_dev_data(dev_data->alias);
@@ -1498,13 +1499,14 @@ static int __attach_device(struct device *dev,
spin_lock(&domain->lock);
/* Some sanity checks */
+ ret = -EBUSY;
if (alias_data->domain != NULL &&
alias_data->domain != domain)
- return -EBUSY;
+ goto out_unlock;
if (dev_data->domain != NULL &&
dev_data->domain != domain)
- return -EBUSY;
+ goto out_unlock;
/* Do real assignment */
if (dev_data->alias != dev) {
@@ -1520,10 +1522,14 @@ static int __attach_device(struct device *dev,
atomic_inc(&dev_data->bind);
+ ret = 0;
+
+out_unlock:
+
/* ready */
spin_unlock(&domain->lock);
- return 0;
+ return ret;
}
/*
@@ -2324,10 +2330,6 @@ int __init amd_iommu_init_dma_ops(void)
iommu_detected = 1;
swiotlb = 0;
-#ifdef CONFIG_GART_IOMMU
- gart_iommu_aperture_disabled = 1;
- gart_iommu_aperture = 0;
-#endif
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3bacb4d0844c..3cc63e2b8dd4 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -287,8 +287,12 @@ static u8 * __init iommu_map_mmio_space(u64 address)
{
u8 *ret;
- if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
+ if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
+ pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
+ address);
+ pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
return NULL;
+ }
ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
if (ret != NULL)
@@ -1314,7 +1318,7 @@ static int __init amd_iommu_init(void)
ret = amd_iommu_init_dma_ops();
if (ret)
- goto free;
+ goto free_disable;
amd_iommu_init_api();
@@ -1332,9 +1336,10 @@ static int __init amd_iommu_init(void)
out:
return ret;
-free:
+free_disable:
disable_iommus();
+free:
amd_iommu_uninit_devices();
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
@@ -1353,6 +1358,15 @@ free:
free_unity_maps();
+#ifdef CONFIG_GART_IOMMU
+ /*
+ * We failed to initialize the AMD IOMMU - try fallback to GART
+ * if possible.
+ */
+ gart_iommu_init();
+
+#endif
+
goto out;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c02cc692985c..a96489ee6cab 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -921,7 +921,7 @@ void disable_local_APIC(void)
unsigned int value;
/* APIC hasn't been mapped yet */
- if (!apic_phys)
+ if (!x2apic_mode && !apic_phys)
return;
clear_local_APIC();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 33f3563a2a52..e41ed24ab26d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -89,8 +89,8 @@ int nr_ioapics;
/* IO APIC gsi routing info */
struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
-/* The last gsi number used */
-u32 gsi_end;
+/* The one past the highest gsi number used */
+u32 gsi_top;
/* MP IRQ source entries */
struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
@@ -1035,7 +1035,7 @@ static int pin_2_irq(int idx, int apic, int pin)
if (gsi >= NR_IRQS_LEGACY)
irq = gsi;
else
- irq = gsi_end + 1 + gsi;
+ irq = gsi_top + gsi;
}
#ifdef CONFIG_X86_32
@@ -3853,7 +3853,7 @@ void __init probe_nr_irqs_gsi(void)
{
int nr;
- nr = gsi_end + 1 + NR_IRQS_LEGACY;
+ nr = gsi_top + NR_IRQS_LEGACY;
if (nr > nr_irqs_gsi)
nr_irqs_gsi = nr;
@@ -4294,8 +4294,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
*/
nr_ioapic_registers[idx] = entries;
- if (mp_gsi_routing[idx].gsi_end > gsi_end)
- gsi_end = mp_gsi_routing[idx].gsi_end;
+ if (mp_gsi_routing[idx].gsi_end >= gsi_top)
+ gsi_top = mp_gsi_routing[idx].gsi_end + 1;
printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
"GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1d3cddaa40ee..cee5263927c1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -34,7 +34,6 @@
#include <linux/compiler.h>
#include <linux/dmi.h>
#include <linux/slab.h>
-#include <trace/events/power.h>
#include <linux/acpi.h>
#include <linux/io.h>
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
- trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
-
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index ce7cde713e71..a36de5bbb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -368,22 +368,16 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
return -ENODEV;
out_obj = output.pointer;
- if (out_obj->type != ACPI_TYPE_BUFFER) {
- ret = -ENODEV;
- goto out_free;
- }
+ if (out_obj->type != ACPI_TYPE_BUFFER)
+ return -ENODEV;
errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
- if (errors) {
- ret = -ENODEV;
- goto out_free;
- }
+ if (errors)
+ return -ENODEV;
supported = *((u32 *)(out_obj->buffer.pointer + 4));
- if (!(supported & 0x1)) {
- ret = -ENODEV;
- goto out_free;
- }
+ if (!(supported & 0x1))
+ return -ENODEV;
out_free:
kfree(output.pointer);
@@ -397,13 +391,17 @@ static int __init pcc_cpufreq_probe(void)
struct pcc_memory_resource *mem_resource;
struct pcc_register_resource *reg_resource;
union acpi_object *out_obj, *member;
- acpi_handle handle, osc_handle;
+ acpi_handle handle, osc_handle, pcch_handle;
int ret = 0;
status = acpi_get_handle(NULL, "\\_SB", &handle);
if (ACPI_FAILURE(status))
return -ENODEV;
+ status = acpi_get_handle(handle, "PCCH", &pcch_handle);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
status = acpi_get_handle(handle, "_OSC", &osc_handle);
if (ACPI_SUCCESS(status)) {
ret = pcc_cpufreq_do_osc(&osc_handle);
@@ -543,13 +541,13 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (!pcch_virt_addr) {
result = -1;
- goto pcch_null;
+ goto out;
}
result = pcc_get_offset(cpu);
if (result) {
dprintk("init: PCCP evaluation failed\n");
- goto free;
+ goto out;
}
policy->max = policy->cpuinfo.max_freq =
@@ -558,14 +556,15 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
ioread32(&pcch_hdr->minimum_frequency) * 1000;
policy->cur = pcc_get_freq(cpu);
+ if (!policy->cur) {
+ dprintk("init: Unable to get current CPU frequency\n");
+ result = -EINVAL;
+ goto out;
+ }
+
dprintk("init: policy->max is %d, policy->min is %d\n",
policy->max, policy->min);
-
- return 0;
-free:
- pcc_clear_mapping();
- free_percpu(pcc_cpu_info);
-pcch_null:
+out:
return result;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7ec2123838e6..3e90cce3dc8b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1023,13 +1023,12 @@ static int get_transition_latency(struct powernow_k8_data *data)
}
if (max_latency == 0) {
/*
- * Fam 11h always returns 0 as transition latency.
- * This is intended and means "very fast". While cpufreq core
- * and governors currently can handle that gracefully, better
- * set it to 1 to avoid problems in the future.
- * For all others it's a BIOS bug.
+ * Fam 11h and later may return 0 as transition latency. This
+ * is intended and means "very fast". While cpufreq core and
+ * governors currently can handle that gracefully, better set it
+ * to 1 to avoid problems in the future.
*/
- if (boot_cpu_data.x86 != 0x11)
+ if (boot_cpu_data.x86 < 0x11)
printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
"latency\n");
max_latency = 1;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 707165dbc203..18cc42562250 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,6 +36,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/debugfs.h>
+#include <linux/edac_mce.h>
#include <asm/processor.h>
#include <asm/hw_irq.h>
@@ -169,6 +170,15 @@ void mce_log(struct mce *mce)
entry = rcu_dereference_check_mce(mcelog.next);
for (;;) {
/*
+ * If edac_mce is enabled, it will check the error type
+ * and will process it, if it is a known error.
+ * Otherwise, the error will be sent through mcelog
+ * interface
+ */
+ if (edac_mce_parse(mce))
+ return;
+
+ /*
* When the buffer fills up discard new entries.
* Assume that the earlier errors are the more
* interesting ones:
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a18..f2da20fda02d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
struct perf_event *event);
struct event_constraint *event_constraints;
void (*quirks)(void);
+ int perfctr_second_write;
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
* count to the generic event atomically:
*/
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
rdmsrl(hwc->event_base + idx, new_raw_count);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
@@ -313,8 +314,8 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &hwc->period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if (!hwc->sample_period) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
} else {
/*
* If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
x86_perf_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- s64 left = atomic64_read(&hwc->period_left);
+ s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
*/
if (unlikely(left <= -period)) {
left = period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
* The hw event starts counting from this event offset,
* mark it to be able to extra future deltas:
*/
- atomic64_set(&hwc->prev_count, (u64)-left);
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+ /*
+ * Due to erratum on certan cpu we need
+ * a second write to be sure the register
+ * is updated properly
+ */
+ if (x86_pmu.perfctr_second_write) {
+ wrmsrl(hwc->event_base + idx,
(u64)(-left) & x86_pmu.cntval_mask);
+ }
perf_event_update_userpage(event);
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
goto out;
ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
return;
x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag |= PERF_EVENT_TXN;
cpuc->n_txn = 0;
}
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
/*
* Truncate the collected events.
*/
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
- /*
- * Clear out the txn count so that ->cancel_txn() which gets
- * run after ->commit_txn() doesn't undo things.
- */
- cpuc->n_txn = 0;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
return 0;
}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
.walk_stack = print_context_stack_bp,
};
-#include "../dumpstack.h"
-
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
- regs->ip = ip;
- /*
- * perf_arch_fetch_caller_regs adds another call, we need to increment
- * the skip level
- */
- regs->bp = rewind_frame_pointer(skip + 1);
- regs->cs = __KERNEL_CS;
- /*
- * We abuse bit 3 to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
- */
- regs->flags = 0;
-}
-
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 611df11ba15e..c2897b7b4a3b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -102,8 +102,8 @@ static const u64 amd_perfmon_event_map[] =
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
};
static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index fdbc652d3feb..214ac860ebe0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -72,6 +72,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d1..107711bf0ee8 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -21,22 +21,36 @@ struct p4_event_bind {
char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
};
-struct p4_cache_event_bind {
+struct p4_pebs_bind {
unsigned int metric_pebs;
unsigned int metric_vert;
};
-#define P4_GEN_CACHE_EVENT_BIND(name) \
- [P4_CACHE__##name] = { \
- .metric_pebs = P4_PEBS__##name, \
- .metric_vert = P4_VERT__##name, \
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert) \
+ [P4_PEBS_METRIC__##name] = { \
+ .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
+ .metric_vert = vert, \
}
-static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
- P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
- P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
- P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
- P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+ P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
+ P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
+ P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
+ P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
+ P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
+ P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
+ P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
+ P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
+ P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
};
/*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
},
};
-#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
+#define P4_GEN_CACHE_EVENT(event, bit, metric) \
p4_config_pack_escr(P4_ESCR_EVENT(event) | \
P4_ESCR_EMASK_BIT(event, bit)) | \
- p4_config_pack_cccr(cache_event | \
+ p4_config_pack_cccr(metric | \
P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__1stl_cache_load_miss_retired),
+ P4_PEBS_METRIC__1stl_cache_load_miss_retired),
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__2ndl_cache_load_miss_retired),
+ P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__dtlb_load_miss_retired),
+ P4_PEBS_METRIC__dtlb_load_miss_retired),
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__dtlb_store_miss_retired),
+ P4_PEBS_METRIC__dtlb_store_miss_retired),
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
- P4_CACHE__itlb_reference_hit),
+ P4_PEBS_METRIC__none),
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
- P4_CACHE__itlb_reference_miss),
+ P4_PEBS_METRIC__none),
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
return config;
}
+static int p4_validate_raw_event(struct perf_event *event)
+{
+ unsigned int v;
+
+ /* user data may have out-of-bound event index */
+ v = p4_config_unpack_event(event->attr.config);
+ if (v >= ARRAY_SIZE(p4_event_bind_map)) {
+ pr_warning("P4 PMU: Unknown event code: %d\n", v);
+ return -EINVAL;
+ }
+
+ /*
+ * it may have some screwed PEBS bits
+ */
+ if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
+ pr_warning("P4 PMU: PEBS are not supported yet\n");
+ return -EINVAL;
+ }
+ v = p4_config_unpack_metric(event->attr.config);
+ if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
+ pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int p4_hw_config(struct perf_event *event)
{
int cpu = get_cpu();
int rc = 0;
- unsigned int evnt;
u32 escr, cccr;
/*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW) {
- /* user data may have out-of-bound event index */
- evnt = p4_config_unpack_event(event->attr.config);
- if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
- rc = -EINVAL;
+ rc = p4_validate_raw_event(event);
+ if (rc)
goto out;
- }
/*
* We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
* on HT machine but allow HT-compatible specifics to be
* passed on)
*
+ * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+ * bits since we keep additional info here (for cache events and etc)
+ *
* XXX: HT wide things should check perf_paranoid_cpu() &&
* CAP_SYS_ADMIN
*/
event->hw.config |= event->attr.config &
(p4_config_pack_escr(P4_ESCR_MASK_HT) |
- p4_config_pack_cccr(P4_CCCR_MASK_HT));
+ p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
}
rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
return overflow;
}
+static void p4_pmu_disable_pebs(void)
+{
+ /*
+ * FIXME
+ *
+ * It's still allowed that two threads setup same cache
+ * events so we can't simply clear metrics until we knew
+ * noone is depending on us, so we need kind of counter
+ * for "ReplayEvent" users.
+ *
+ * What is more complex -- RAW events, if user (for some
+ * reason) will pass some cache event metric with improper
+ * event opcode -- it's fine from hardware point of view
+ * but completely nonsence from "meaning" of such action.
+ *
+ * So at moment let leave metrics turned on forever -- it's
+ * ok for now but need to be revisited!
+ *
+ * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
+ * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+ */
+}
+
static inline void p4_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
continue;
p4_pmu_disable_event(event);
}
+
+ p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+ struct p4_pebs_bind *bind;
+ unsigned int idx;
+
+ BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+ idx = p4_config_unpack_metric(config);
+ if (idx == P4_PEBS_METRIC__none)
+ return;
+
+ bind = &p4_pebs_bind_map[idx];
+
+ (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+ (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
}
static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
int thread = p4_ht_config_thread(hwc->config);
u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
unsigned int idx = p4_config_unpack_event(hwc->config);
- unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
struct p4_event_bind *bind;
- struct p4_cache_event_bind *bind_cache;
u64 escr_addr, cccr;
bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
cccr = p4_config_unpack_cccr(hwc->config);
/*
- * it could be Cache event so that we need to
- * set metrics into additional MSRs
+ * it could be Cache event so we need to write metrics
+ * into additional MSRs
*/
- BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
- if (idx_cache > P4_CACHE__NONE &&
- idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
- bind_cache = &p4_cache_event_bind_map[idx_cache];
- (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
- (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
- }
+ p4_pmu_enable_pebs(hwc->config);
(void)checking_wrmsrl(escr_addr, escr_conf);
(void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
.max_period = (1ULL << 39) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
+ /*
+ * This handles erratum N15 in intel doc 249199-029,
+ * the counter may not be updated correctly on write
+ * so we need a second write operation to do the trick
+ * (the official workaround didn't work)
+ *
+ * the former idea is taken from OProfile code
+ */
+ .perfctr_second_write = 1,
};
static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b7..6e8752c1bd52 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd44..000000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- */
-
-#ifndef DUMPSTACK_H
-#define DUMPSTACK_H
-
-#ifdef CONFIG_X86_32
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-#else
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-#endif
-
-#include <linux/uaccess.h>
-
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl);
-
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl);
-
-extern unsigned int code_bytes;
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-struct stack_frame_ia32 {
- u32 next_frame;
- u32 return_address;
-};
-
-static inline unsigned long rewind_frame_pointer(int n)
-{
- struct stack_frame *frame;
-
- get_bp(frame);
-
-#ifdef CONFIG_FRAME_POINTER
- while (n--) {
- if (probe_kernel_address(&frame->next_frame, frame))
- break;
- }
-#endif
-
- return (unsigned long)frame;
-}
-
-#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d93..0f6376ffa2d9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f3..57a21f11c791 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
#define N_EXCEPTION_STACKS_END \
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7bca3c6a02fb..0d6fc71bedb1 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -729,7 +729,7 @@ static int __init e820_mark_nvs_memory(void)
struct e820entry *ei = &e820.map[i];
if (ei->type == E820_NVS)
- hibernate_nvs_register(ei->addr, ei->size);
+ suspend_nvs_register(ei->addr, ei->size);
}
return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ebdb85cf2686..e5cc7e82e60d 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,6 +18,7 @@
#include <asm/apic.h>
#include <asm/iommu.h>
#include <asm/gart.h>
+#include <asm/hpet.h>
static void __init fix_hypertransport_config(int num, int slot, int func)
{
@@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
+/*
+ * Force the read back of the CMP register in hpet_next_event()
+ * to work around the problem that the CMP register write seems to be
+ * delayed. See hpet_next_event() for details.
+ *
+ * We do this on all SMBUS incarnations for now until we have more
+ * information about the affected chipsets.
+ */
+static void __init ati_hpet_bugs(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+ hpet_readback_cmp = 1;
+#endif
+}
+
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
+ { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+ PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
{}
};
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0697ff139837..4db7c4d12ffa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -571,8 +571,8 @@ auditsys:
* masked off.
*/
sysret_audit:
- movq %rax,%rsi /* second arg, syscall return value */
- cmpq $0,%rax /* is it < 0? */
+ movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
+ cmpq $0,%rsi /* is it < 0? */
setl %al /* 1 if so, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a198b7c87a12..ba390d731175 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -964,7 +964,7 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
- if (is_hpet_capable()) {
+ if (is_hpet_capable() && hpet_virt_address) {
unsigned int cfg = hpet_readl(HPET_CFG);
if (hpet_legacy_int_enabled) {
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2fd..a474ec37c32f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
{
/* Len */
switch (x86_len) {
+ case X86_BREAKPOINT_LEN_X:
+ *gen_len = sizeof(long);
+ break;
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
info->address = bp->attr.bp_addr;
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ /*
+ * x86 inst breakpoints need to have a specific undefined len.
+ * But we still need to check userspace is not trying to setup
+ * an unsupported length, to get a range breakpoint for example.
+ */
+ if (bp->attr.bp_len == sizeof(long)) {
+ info->len = X86_BREAKPOINT_LEN_X;
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+
/* Len */
switch (bp->attr.bp_len) {
case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
return -EINVAL;
}
- /* Type */
- switch (bp->attr.bp_type) {
- case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
- break;
- case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
- break;
- case HW_BREAKPOINT_X:
- info->type = X86_BREAKPOINT_EXECUTE;
- break;
- default:
- return -EINVAL;
- }
-
return 0;
}
/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
ret = -EINVAL;
switch (info->len) {
+ case X86_BREAKPOINT_LEN_X:
+ align = sizeof(long) -1;
+ break;
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
perf_bp_event(bp, args->regs);
+ /*
+ * Set up resume flag to avoid breakpoint recursion when
+ * returning back to origin.
+ */
+ if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+ args->regs->flags |= X86_EFLAGS_RF;
+
rcu_read_unlock();
}
/*
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 7c9f02c130f3..cafa7c80ac95 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -276,16 +276,6 @@ static struct sys_device device_i8259A = {
.cls = &i8259_sysdev_class,
};
-static int __init i8259A_init_sysfs(void)
-{
- int error = sysdev_class_register(&i8259_sysdev_class);
- if (!error)
- error = sysdev_register(&device_i8259A);
- return error;
-}
-
-device_initcall(i8259A_init_sysfs);
-
static void mask_8259A(void)
{
unsigned long flags;
@@ -407,3 +397,18 @@ struct legacy_pic default_legacy_pic = {
};
struct legacy_pic *legacy_pic = &default_legacy_pic;
+
+static int __init i8259A_init_sysfs(void)
+{
+ int error;
+
+ if (legacy_pic != &default_legacy_pic)
+ return 0;
+
+ error = sysdev_class_register(&i8259_sysdev_class);
+ if (!error)
+ error = sysdev_register(&device_i8259A);
+ return error;
+}
+
+device_initcall(i8259A_init_sysfs);
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3a54dcb9cd0e..43e9ccf44947 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -34,7 +34,7 @@ EXPORT_SYMBOL(init_task);
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
+ * so they are allowed to end up in the .data..cacheline_aligned
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 4f4af75b9482..01ab17ae2ae7 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -572,7 +572,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
return NOTIFY_STOP;
}
-#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
int kgdb_ll_trap(int cmd, const char *str,
struct pt_regs *regs, long err, int trap, int sig)
{
@@ -590,7 +589,6 @@ int kgdb_ll_trap(int cmd, const char *str,
return __kgdb_notify(&args, cmd);
}
-#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
static int
kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
@@ -625,6 +623,12 @@ int kgdb_arch_init(void)
return register_die_notifier(&kgdb_notifier);
}
+static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP);
+}
+
void kgdb_arch_late(void)
{
int i, cpu;
@@ -655,6 +659,7 @@ void kgdb_arch_late(void)
for_each_online_cpu(cpu) {
pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
pevent[0]->hw.sample_period = 1;
+ pevent[0]->overflow_handler = kgdb_hw_overflow_handler;
if (pevent[0]->destroy != NULL) {
pevent[0]->destroy = NULL;
release_bp_slot(*pevent);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 345a4b1fe144..1bfb6cf4dd55 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
}
/*
- * Check for the REX prefix which can only exist on X86_64
- * X86_32 always returns 0
+ * Skip the prefixes of the instruction.
*/
-static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
{
+ insn_attr_t attr;
+
+ attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+ while (inat_is_legacy_prefix(attr)) {
+ insn++;
+ attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+ }
#ifdef CONFIG_X86_64
- if ((*insn & 0xf0) == 0x40)
- return 1;
+ if (inat_is_rex_prefix(attr))
+ insn++;
#endif
- return 0;
+ return insn;
}
/*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
*/
static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
{
+ /* Skip prefixes */
+ insn = skip_prefixes(insn);
+
switch (*insn) {
case 0xfa: /* cli */
case 0xfb: /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
return 1;
}
- /*
- * on X86_64, 0x40-0x4f are REX prefixes so we need to look
- * at the next byte instead.. but of course not recurse infinitely
- */
- if (is_REX_prefix(insn))
- return is_IF_modifier(++insn);
-
return 0;
}
@@ -640,8 +642,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
/* Skip cs, ip, orig_ax and gs. */ \
" subl $16, %esp\n" \
" pushl %fs\n" \
- " pushl %ds\n" \
" pushl %es\n" \
+ " pushl %ds\n" \
" pushl %eax\n" \
" pushl %ebp\n" \
" pushl %edi\n" \
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
unsigned long orig_ip = (unsigned long)p->addr;
kprobe_opcode_t *insn = p->ainsn.insn;
- /*skip the REX prefix*/
- if (is_REX_prefix(insn))
- insn++;
+ /* Skip prefixes */
+ insn = skip_prefixes(insn);
regs->flags &= ~X86_EFLAGS_TF;
switch (*insn) {
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5ae5d2426edf..d86dbf7e54be 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -123,7 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
m->apicid, m->apicver, m->apicaddr);
- mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1);
+ mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
}
static void print_MP_intsrc_info(struct mpc_intsrc *m)
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index e796448f0eb5..5915e0b33303 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -216,6 +216,12 @@ static void __init mrst_setup_boot_clock(void)
setup_boot_APIC_clock();
};
+/* MID systems don't have i8042 controller */
+static int mrst_i8042_detect(void)
+{
+ return 0;
+}
+
/*
* Moorestown specific x86_init function overrides and early setup
* calls.
@@ -233,6 +239,7 @@ void __init x86_mrst_early_setup(void)
x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
x86_platform.calibrate_tsc = mrst_calibrate_tsc;
+ x86_platform.i8042_detect = mrst_i8042_detect;
x86_init.pci.init = pci_mrst_init;
x86_init.pci.fixup_irqs = x86_init_noop;
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index fb99f7edb341..078d4ec1a9d9 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -103,11 +103,16 @@ int use_calgary __read_mostly = 0;
#define PMR_SOFTSTOPFAULT 0x40000000
#define PMR_HARDSTOP 0x20000000
-#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
-#define MAX_NUM_CHASSIS 8 /* max number of chassis */
-/* MAX_PHB_BUS_NUM is the maximal possible dev->bus->number */
-#define MAX_PHB_BUS_NUM (MAX_NUM_OF_PHBS * MAX_NUM_CHASSIS * 2)
-#define PHBS_PER_CALGARY 4
+/*
+ * The maximum PHB bus number.
+ * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384
+ * x3950M2: 4 chassis, 48 PHBs per chassis = 192
+ * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256
+ * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128
+ */
+#define MAX_PHB_BUS_NUM 256
+
+#define PHBS_PER_CALGARY 4
/* register offsets in Calgary's internal register space */
static const unsigned long tar_offsets[] = {
@@ -1051,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev)
struct iommu_table *tbl;
int ret;
- BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
-
bbar = busno_to_bbar(dev->bus->number);
ret = calgary_setup_tar(dev, bbar);
if (ret)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e7e35219b32f..787572d43d9c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -371,7 +371,7 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
- trace_power_start(POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
- trace_power_start(POWER_CSTATE, (ax>>4)+1);
+ trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
if (!need_resched()) {
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -457,7 +457,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
static void mwait_idle(void)
{
if (!need_resched()) {
- trace_power_start(POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1, smp_processor_id());
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -478,7 +478,7 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- trace_power_start(POWER_CSTATE, 0);
+ trace_power_start(POWER_CSTATE, 0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af47..96586c3cbbbf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1f..3d9ea531ddd1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
+
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e72d3fc6547d..939b9e98245f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -498,15 +498,10 @@ void force_hpet_resume(void)
* See erratum #27 (Misinterpreted MSI Requests May Result in
* Corrupted LPC DMA Data) in AMD Publication #46837,
* "SB700 Family Product Errata", Rev. 1.0, March 2010.
- *
- * Also force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
*/
static void force_disable_hpet_msi(struct pci_dev *unused)
{
hpet_msi_disable = 1;
- hpet_readback_cmp = 1;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 8e1aac86b50c..e3af342fe83a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -228,6 +228,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
},
},
+ { /* Handle problems with rebooting on Dell T7400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T7400",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
+ },
+ },
{ /* Handle problems with rebooting on HP laptops */
.callback = set_bios_reboot,
.ident = "HP Compaq Laptop",
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a867940a6dfc..a60df9ae6454 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -21,12 +21,6 @@
#include <asm/cpu.h>
#include <asm/stackprotector.h>
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
-#else
-# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
DEFINE_PER_CPU(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
@@ -244,10 +238,19 @@ void __init setup_per_cpu_areas(void)
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
+ /*
+ * Ensure that the boot cpu numa_node is correct when the boot
+ * cpu is on a node that doesn't have memory installed.
+ * Also cpu_up() will call cpu_to_node() for APs when
+ * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
+ * up later with c_init aka intel_init/amd_init.
+ * So set them all (boot cpu and all APs).
+ */
+ set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
#endif
/*
- * Up to this point, the boot CPU has been using .data.init
+ * Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
*/
if (cpu == boot_cpu_id)
@@ -263,14 +266,6 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
- /*
- * make sure boot cpu numa_node is right, when boot cpu is on the
- * node that doesn't have mem installed
- */
- set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id));
-#endif
-
/* Setup node to cpumask map */
setup_node_to_cpumask_map();
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
index 7ded57896c0a..cb22acf3ed09 100644
--- a/arch/x86/kernel/sfi.c
+++ b/arch/x86/kernel/sfi.c
@@ -93,7 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table)
pentry = (struct sfi_apic_table_entry *)sb->pentry;
for (i = 0; i < num; i++) {
- mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1);
+ mp_register_ioapic(i, pentry->phys_addr, gsi_top);
pentry++;
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 37462f1ddba5..c4f33b2e77d6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -686,7 +686,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
static void __cpuinit announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
- int node = cpu_to_node(cpu);
+ int node = early_cpu_to_node(cpu);
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6c..b53c525368a7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
return 0;
}
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static void
+__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
{
struct stack_trace *trace = data;
+#ifdef CONFIG_FRAME_POINTER
if (!reliable)
return;
+#endif
+ if (nosched && in_sched_functions(addr))
+ return;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
trace->entries[trace->nr_entries++] = addr;
}
+static void save_stack_address(void *data, unsigned long addr, int reliable)
+{
+ return __save_stack_address(data, addr, reliable, false);
+}
+
static void
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
- struct stack_trace *trace = (struct stack_trace *)data;
- if (!reliable)
- return;
- if (in_sched_functions(addr))
- return;
- if (trace->skip > 0) {
- trace->skip--;
- return;
- }
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = addr;
+ return __save_stack_address(data, addr, reliable, true);
}
static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
-struct stack_frame {
+struct stack_frame_user {
const void __user *next_fp;
unsigned long ret_addr;
};
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+static int
+copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
{
int ret;
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
trace->entries[trace->nr_entries++] = regs->ip;
while (trace->nr_entries < trace->max_entries) {
- struct stack_frame frame;
+ struct stack_frame_user frame;
frame.next_fp = NULL;
frame.ret_addr = 0;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 142d70c74b02..725ef4d17cd5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -526,6 +526,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
+ int user_icebp = 0;
unsigned long dr6;
int si_code;
@@ -534,6 +535,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
/* Filter out all the reserved bits which are preset to 1 */
dr6 &= ~DR6_RESERVED;
+ /*
+ * If dr6 has no reason to give us about the origin of this trap,
+ * then it's very likely the result of an icebp/int01 trap.
+ * User wants a sigtrap for that.
+ */
+ if (!dr6 && user_mode(regs))
+ user_icebp = 1;
+
/* Catch kmemcheck conditions first of all! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
@@ -575,7 +584,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(tsk->thread.debugreg6);
- if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 2cc249718c46..d0bb52296fa3 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -97,7 +97,7 @@ SECTIONS
HEAD_TEXT
#ifdef CONFIG_X86_32
. = ALIGN(PAGE_SIZE);
- *(.text.page_aligned)
+ *(.text..page_aligned)
#endif
. = ALIGN(8);
_stext = .;
@@ -305,7 +305,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
- *(.bss.page_aligned)
+ *(.bss..page_aligned)
*(.bss)
. = ALIGN(4);
__bss_stop = .;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 61a1e8c7e19f..cd6da6bf3eca 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -5,6 +5,7 @@
*/
#include <linux/init.h>
#include <linux/ioport.h>
+#include <linux/module.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
@@ -85,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
};
static void default_nmi_init(void) { };
+static int default_i8042_detect(void) { return 1; };
struct x86_platform_ops x86_platform = {
.calibrate_tsc = native_calibrate_tsc,
@@ -92,5 +94,8 @@ struct x86_platform_ops x86_platform = {
.set_wallclock = mach_set_rtc_mmss,
.iommu_shutdown = iommu_shutdown_noop,
.is_untracked_pat_range = is_ISA_range,
- .nmi_init = default_nmi_init
+ .nmi_init = default_nmi_init,
+ .i8042_detect = default_i8042_detect
};
+
+EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 81563e76e28f..b1ed0a1a5913 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= PT_WRITABLE_MASK;
+ if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK))
+ spte &= ~PT_USER_MASK;
+
/*
* Optimization: for pte sync, if spte was writable the hash
* lookup is unnecessary (and expensive). Write protection
@@ -1870,10 +1873,14 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
child = page_header(pte & PT64_BASE_ADDR_MASK);
mmu_page_remove_parent_pte(child, sptep);
+ __set_spte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
} else if (pfn != spte_to_pfn(*sptep)) {
pgprintk("hfn old %lx new %lx\n",
spte_to_pfn(*sptep), pfn);
rmap_remove(vcpu->kvm, sptep);
+ __set_spte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
} else
was_rmapped = 1;
}
@@ -2919,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
return kvm_mmu_zap_page(kvm, page) + 1;
}
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
{
struct kvm *kvm;
struct kvm *kvm_freed = NULL;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 89d66ca4d87c..2331bdc2b549 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -342,6 +342,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
/* advance table_gfn when emulating 1gb pages with 4k */
if (delta == 0)
table_gfn += PT_INDEX(addr, level);
+ access &= gw->pte_access;
} else {
direct = 0;
table_gfn = gw->table_gfn[level - 2];
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 96dc232bfc56..ce438e0fdd26 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
+#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/virtext.h>
@@ -56,6 +57,8 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+static bool erratum_383_found __read_mostly;
+
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -374,6 +377,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
svm->vmcb->control.event_inj_err = error_code;
}
+static void svm_init_erratum_383(void)
+{
+ u32 low, high;
+ int err;
+ u64 val;
+
+ /* Only Fam10h is affected */
+ if (boot_cpu_data.x86 != 0x10)
+ return;
+
+ /* Use _safe variants to not break nested virtualization */
+ val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
+ if (err)
+ return;
+
+ val |= (1ULL << 47);
+
+ low = lower_32_bits(val);
+ high = upper_32_bits(val);
+
+ native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
+
+ erratum_383_found = true;
+}
+
static int has_svm(void)
{
const char *msg;
@@ -429,6 +457,8 @@ static int svm_hardware_enable(void *garbage)
wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+ svm_init_erratum_383();
+
return 0;
}
@@ -1410,8 +1440,59 @@ static int nm_interception(struct vcpu_svm *svm)
return 1;
}
-static int mc_interception(struct vcpu_svm *svm)
+static bool is_erratum_383(void)
{
+ int err, i;
+ u64 value;
+
+ if (!erratum_383_found)
+ return false;
+
+ value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
+ if (err)
+ return false;
+
+ /* Bit 62 may or may not be set for this mce */
+ value &= ~(1ULL << 62);
+
+ if (value != 0xb600000000010015ULL)
+ return false;
+
+ /* Clear MCi_STATUS registers */
+ for (i = 0; i < 6; ++i)
+ native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
+
+ value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
+ if (!err) {
+ u32 low, high;
+
+ value &= ~(1ULL << 2);
+ low = lower_32_bits(value);
+ high = upper_32_bits(value);
+
+ native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
+ }
+
+ /* Flush tlb to evict multi-match entries */
+ __flush_tlb_all();
+
+ return true;
+}
+
+static void svm_handle_mce(struct vcpu_svm *svm)
+{
+ if (is_erratum_383()) {
+ /*
+ * Erratum 383 triggered. Guest state is corrupt so kill the
+ * guest.
+ */
+ pr_err("KVM: Guest triggered AMD Erratum 383\n");
+
+ set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests);
+
+ return;
+ }
+
/*
* On an #MC intercept the MCE handler is not called automatically in
* the host. So do it by hand here.
@@ -1420,6 +1501,11 @@ static int mc_interception(struct vcpu_svm *svm)
"int $0x12\n");
/* not sure if we ever come back to this point */
+ return;
+}
+
+static int mc_interception(struct vcpu_svm *svm)
+{
return 1;
}
@@ -3088,6 +3174,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
}
+
+ /*
+ * We need to handle MC intercepts here before the vcpu has a chance to
+ * change the physical cpu
+ */
+ if (unlikely(svm->vmcb->control.exit_code ==
+ SVM_EXIT_EXCP_BASE + MC_VECTOR))
+ svm_handle_mce(svm);
}
#undef R
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 859a01a07dbf..ee03679efe78 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1744,18 +1744,15 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
}
- vcpu->arch.efer |= EFER_LMA;
- vmx_set_efer(vcpu, vcpu->arch.efer);
+ vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
}
static void exit_lmode(struct kvm_vcpu *vcpu)
{
- vcpu->arch.efer &= ~EFER_LMA;
-
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS)
& ~VM_ENTRY_IA32E_MODE);
- vmx_set_efer(vcpu, vcpu->arch.efer);
+ vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
}
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 05d571f6f196..7fa89c39c64f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1562,7 +1562,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
r = -ENOMEM;
size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
- entries = vmalloc(size);
+ entries = kmalloc(size, GFP_KERNEL);
if (!entries)
goto out;
@@ -1581,7 +1581,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
r = n;
out_free:
- vfree(entries);
+ kfree(entries);
out:
return r;
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 550df481accd..787c52ca49c3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -3,12 +3,6 @@
#include <linux/module.h>
#include <linux/bootmem.h>
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-# define DBG(x...) printk(KERN_DEBUG x)
-#else
-# define DBG(x...)
-#endif
-
/*
* Which logical CPUs are on which nodes
*/
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index acc15b23b743..64121a18b8cb 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -302,7 +302,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
return -EINVAL;
}
- new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
+ new = kzalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
return -ENOMEM;
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index f537087bb740..8acaddd0fb21 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -34,8 +34,7 @@
* memtype_lock protects the rbtree.
*/
-static void memtype_rb_augment_cb(struct rb_node *node);
-static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb);
+static struct rb_root memtype_rbroot = RB_ROOT;
static int is_node_overlap(struct memtype *node, u64 start, u64 end)
{
@@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node)
}
/* Update 'subtree_max_end' for a node, based on node and its children */
-static void update_node_max_end(struct rb_node *node)
+static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
{
struct memtype *data;
u64 max_end, child_max_end;
@@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node)
data->subtree_max_end = max_end;
}
-/* Update 'subtree_max_end' for a node and all its ancestors */
-static void update_path_max_end(struct rb_node *node)
-{
- u64 old_max_end, new_max_end;
-
- while (node) {
- struct memtype *data = container_of(node, struct memtype, rb);
-
- old_max_end = data->subtree_max_end;
- update_node_max_end(node);
- new_max_end = data->subtree_max_end;
-
- if (new_max_end == old_max_end)
- break;
-
- node = rb_parent(node);
- }
-}
-
/* Find the first (lowest start addr) overlapping range from rb tree */
static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
u64 start, u64 end)
@@ -190,12 +170,6 @@ failure:
return -EBUSY;
}
-static void memtype_rb_augment_cb(struct rb_node *node)
-{
- if (node)
- update_path_max_end(node);
-}
-
static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
{
struct rb_node **node = &(root->rb_node);
@@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
rb_link_node(&newdata->rb, parent, node);
rb_insert_color(&newdata->rb, root);
+ rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
}
int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -226,6 +201,7 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
if (ret_type)
new->type = *ret_type;
+ new->subtree_max_end = new->end;
memtype_rb_insert(&memtype_rbroot, new);
}
return err;
@@ -233,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
struct memtype *rbt_memtype_erase(u64 start, u64 end)
{
+ struct rb_node *deepest;
struct memtype *data;
data = memtype_rb_exact_match(&memtype_rbroot, start, end);
if (!data)
goto out;
+ deepest = rb_augment_erase_begin(&data->rb);
rb_erase(&data->rb, &memtype_rbroot);
+ rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
out:
return data;
}
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index 308e32570d84..38e6d174c497 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {
static unsigned int reg_rop[] = {
0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
};
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
static unsigned int imm_wop[] = { 0xC6, 0xC7 };
/* IA32 Manual 3, 3-432*/
-static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };
static unsigned int rw32[] = {
- 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+ 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
};
-static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };
static unsigned int mw16[] = { 0xB70F, 0xBF0F };
-static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };
static unsigned int mw64[] = {};
#else /* not __i386__ */
static unsigned char prefix_codes[] = {
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {
static unsigned int reg_rop[] = {
0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
};
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
static unsigned int imm_wop[] = { 0xC6, 0xC7 };
-static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };
static unsigned int rw32[] = {
- 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+ 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
};
/* 8 bit only */
-static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };
/* 16 bit only */
static unsigned int mw16[] = { 0xB70F, 0xBF0F };
/* 16 or 32 bit */
static unsigned int mw32[] = { 0xC7 };
/* 16, 32 or 64 bit */
-static unsigned int mw64[] = { 0x89, 0x8B };
+static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };
#endif /* not __i386__ */
struct prefix_bits {
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
{
unsigned int opcode;
- unsigned char mod_rm;
int reg;
unsigned char *p;
struct prefix_bits prf;
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
goto err;
do_work:
- mod_rm = *p;
- reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+ /* for STOS, source register is fixed */
+ if (opcode == 0xAA || opcode == 0xAB) {
+ reg = arg_AX;
+ } else {
+ unsigned char mod_rm = *p;
+ reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+ }
switch (get_ins_reg_width(ins_addr)) {
case 1:
return *get_reg_w8(reg, prf.rex, regs);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 97da2ba9344b..55253095be84 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -96,6 +96,7 @@ EXPORT_SYMBOL(pcibios_align_resource);
* the fact the PCI specs explicitly allow address decoders to be
* shared between expansion ROMs and other resource regions, it's
* at least dangerous)
+ * - bad resource sizes or overlaps with other regions
*
* Our solution:
* (1) Allocate resources for all buses behind PCI-to-PCI bridges.
@@ -136,6 +137,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
* child resource allocations in this
* range.
*/
+ r->start = r->end = 0;
r->flags = 0;
}
}
@@ -182,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass)
idx, r, disabled, pass);
if (pci_claim_resource(dev, idx) < 0) {
/* We'll assign a new address later */
+ dev->fw_addr[idx] = r->start;
r->end -= r->start;
r->start = 0;
}
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 0db5eaf54560..8d460eaf524f 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -11,28 +11,14 @@
*/
static void __devinit pcibios_fixup_peer_bridges(void)
{
- int n, devfn;
- long node;
+ int n;
if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff)
return;
DBG("PCI: Peer bridge fixup\n");
- for (n=0; n <= pcibios_last_bus; n++) {
- u32 l;
- if (pci_find_bus(0, n))
- continue;
- node = get_mp_bus_to_node(n);
- for (devfn = 0; devfn < 256; devfn += 8) {
- if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
- l != 0x0000 && l != 0xffff) {
- DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
- printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
- pci_scan_bus_on_node(n, &pci_root_ops, node);
- break;
- }
- }
- }
+ for (n=0; n <= pcibios_last_bus; n++)
+ pcibios_scan_specific_bus(n);
}
int __init pci_legacy_init(void)
@@ -50,6 +36,28 @@ int __init pci_legacy_init(void)
return 0;
}
+void pcibios_scan_specific_bus(int busn)
+{
+ int devfn;
+ long node;
+ u32 l;
+
+ if (pci_find_bus(0, busn))
+ return;
+
+ node = get_mp_bus_to_node(busn);
+ for (devfn = 0; devfn < 256; devfn += 8) {
+ if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
+ l != 0x0000 && l != 0xffff) {
+ DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
+ printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn);
+ pci_scan_bus_on_node(busn, &pci_root_ops, node);
+ return;
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(pcibios_scan_specific_bus);
+
int __init pci_subsys_init(void)
{
/*
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 7ef3a2735df3..cb29191cee58 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
devfn, pos, 4, &pcie_cap))
return 0;
- if (pcie_cap == 0xffffffff)
- return 0;
+ if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 ||
+ PCI_EXT_CAP_ID(pcie_cap) == 0xffff)
+ break;
if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
@@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
return pos;
}
- pos = pcie_cap >> 20;
+ pos = PCI_EXT_CAP_NEXT(pcie_cap);
}
return 0;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 0a979f3e5b8a..1290ba54b350 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,6 +105,8 @@ static void __save_processor_state(struct saved_context *ctxt)
ctxt->cr4 = read_cr4();
ctxt->cr8 = read_cr8();
#endif
+ ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE,
+ &ctxt->misc_enable);
}
/* Needed by apm.c */
@@ -152,6 +154,8 @@ static void fix_processor_context(void)
*/
static void __restore_processor_state(struct saved_context *ctxt)
{
+ if (ctxt->misc_enable_saved)
+ wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
/*
* control registers
*/
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 987267f79bf5..a9c661108034 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -60,6 +60,6 @@ static void xen_vcpu_notify_restore(void *data)
void xen_arch_resume(void)
{
- smp_call_function(xen_vcpu_notify_restore,
- (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
+ on_each_cpu(xen_vcpu_notify_restore,
+ (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
}