summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/early_serial_console.c14
-rw-r--r--arch/x86/ia32/ia32_aout.c22
-rw-r--r--arch/x86/ia32/ia32entry.S22
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h6
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h12
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/include/asm/iomap.h4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/include/asm/pgtable_32.h1
-rw-r--r--arch/x86/include/asm/syscalls.h5
-rw-r--r--arch/x86/include/asm/trampoline.h5
-rw-r--r--arch/x86/include/asm/tsc.h2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/amd_iommu.c4
-rw-r--r--arch/x86/kernel/amd_iommu_init.c67
-rw-r--r--arch/x86/kernel/apic/io_apic.c13
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c18
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c71
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c96
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c10
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c33
-rw-r--r--arch/x86/kernel/hw_breakpoint.c40
-rw-r--r--arch/x86/kernel/i387.c1
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kprobes.c25
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c51
-rw-r--r--arch/x86/kernel/sys_i386_32.c4
-rw-r--r--arch/x86/kernel/trampoline.c17
-rw-r--r--arch/x86/kernel/tsc.c38
-rw-r--r--arch/x86/kvm/emulate.c9
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/i8259.c3
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/svm.c17
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lguest/boot.c13
-rw-r--r--arch/x86/mm/iomap_32.c6
-rw-r--r--arch/x86/mm/srat_64.c8
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/xen/platform-pci-unplug.c18
-rw-r--r--arch/x86/xen/time.c5
65 files changed, 566 insertions, 296 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a84fc34c8f77..cea0cd9a316f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -245,6 +245,11 @@ config ARCH_HWEIGHT_CFLAGS
config KTIME_SCALAR
def_bool X86_32
+
+config ARCH_CPU_PROBE_RELEASE
+ def_bool y
+ depends on HOTPLUG_CPU
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -749,11 +754,11 @@ config IOMMU_API
def_bool (AMD_IOMMU || DMAR)
config MAXSMP
- bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+ bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
select CPUMASK_OFFSTACK
---help---
- Configure maximum number of CPUS and NUMA Nodes for this architecture.
+ Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
config NR_CPUS
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8aa1b59b9074..e8c8881351b3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -74,7 +74,7 @@ endif
ifdef CONFIG_CC_STACKPROTECTOR
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
- ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
+ ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y)
stackp-y := -fstack-protector
KBUILD_CFLAGS += $(stackp-y)
else
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
index 030f4b93e255..5df2869c874b 100644
--- a/arch/x86/boot/early_serial_console.c
+++ b/arch/x86/boot/early_serial_console.c
@@ -58,7 +58,19 @@ static void parse_earlyprintk(void)
if (arg[pos] == ',')
pos++;
- if (!strncmp(arg, "ttyS", 4)) {
+ /*
+ * make sure we have
+ * "serial,0x3f8,115200"
+ * "serial,ttyS0,115200"
+ * "ttyS0,115200"
+ */
+ if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
+ port = simple_strtoull(arg + pos, &e, 16);
+ if (port == 0 || arg + pos == e)
+ port = DEFAULT_SERIAL_PORT;
+ else
+ pos = e - arg;
+ } else if (!strncmp(arg + pos, "ttyS", 4)) {
static const int bases[] = { 0x3f8, 0x2f8 };
int idx = 0;
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 0350311906ae..2d93bdbc9ac0 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -34,7 +34,7 @@
#include <asm/ia32.h>
#undef WARN_OLD
-#undef CORE_DUMP /* probably broken */
+#undef CORE_DUMP /* definitely broken */
static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
static int load_aout_library(struct file *);
@@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end)
* macros to write out all the necessary info.
*/
-static int dump_write(struct file *file, const void *addr, int nr)
-{
- return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
+#include <linux/coredump.h>
#define DUMP_WRITE(addr, nr) \
if (!dump_write(file, (void *)(addr), (nr))) \
goto end_coredump;
-#define DUMP_SEEK(offset) \
- if (file->f_op->llseek) { \
- if (file->f_op->llseek(file, (offset), 0) != (offset)) \
- goto end_coredump; \
- } else \
- file->f_pos = (offset)
+#define DUMP_SEEK(offset) \
+ if (!dump_seek(file, offset)) \
+ goto end_coredump;
#define START_DATA() (u.u_tsize << PAGE_SHIFT)
#define START_STACK(u) (u.start_stack)
@@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
dump_size = dump.u_ssize << PAGE_SHIFT;
DUMP_WRITE(dump_start, dump_size);
}
- /*
- * Finally dump the task struct. Not be used by gdb, but
- * could be useful
- */
- set_fs(KERNEL_DS);
- DUMP_WRITE(current, sizeof(*current));
end_coredump:
set_fs(fs);
return has_dumped;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index b86feabed69b..518bb99c3394 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -50,7 +50,12 @@
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
- * the value it wants us to use in the table lookup.
+ * the %rax value we should see. Instead, we just truncate that
+ * value to 32 bits again as we did on entry from user mode.
+ * If it's a new value set by user_regset during entry tracing,
+ * this matches the normal truncation of the user-mode value.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
@@ -60,6 +65,7 @@
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
+ movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple
@@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
IA32_ARG_FIXUP
@@ -195,7 +201,7 @@ sysexit_from_sys_call:
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
@@ -248,7 +254,7 @@ sysenter_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
CFI_ENDPROC
@@ -314,7 +320,7 @@ ENTRY(ia32_cstar_target)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
CFI_REMEMBER_STATE
jnz cstar_tracesys
- cmpl $IA32_NR_syscalls-1,%eax
+ cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
@@ -367,7 +373,7 @@ cstar_tracesys:
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@@ -425,7 +431,7 @@ ENTRY(ia32_syscall)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
@@ -444,7 +450,7 @@ ia32_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index d2544f1d705d..cb030374b90a 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { }
#endif /* !CONFIG_AMD_IOMMU_STATS */
+static inline bool is_rd890_iommu(struct pci_dev *pdev)
+{
+ return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
+ (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
+}
+
#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 7014e88bc779..08616180deaf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -368,6 +368,9 @@ struct amd_iommu {
/* capabilities of that IOMMU read from ACPI */
u32 cap;
+ /* flags read from acpi table */
+ u8 acpi_flags;
+
/*
* Capability pointer. There could be more than one IOMMU per PCI
* device function if there are more than one AMD IOMMU capability
@@ -411,6 +414,15 @@ struct amd_iommu {
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
+
+ /*
+ * This array is required to work around a potential BIOS bug.
+ * The BIOS may miss to restore parts of the PCI configuration
+ * space when the system resumes from S3. The result is that the
+ * IOMMU does not execute commands anymore which leads to system
+ * failure.
+ */
+ u32 cache_cfg[4];
};
/*
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 545776efeb16..bafd80defa43 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -309,7 +309,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
{
return ((1UL << (nr % BITS_PER_LONG)) &
- (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
+ (addr[nr / BITS_PER_LONG])) != 0;
}
static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 306160e58b48..1d9cd27c2920 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-static inline void __user *compat_alloc_user_space(long len)
+static inline void __user *arch_compat_alloc_user_space(long len)
{
struct pt_regs *regs = task_pt_regs(current);
return (void __user *)regs->sp - len;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 781a50b29a49..3f76523589af 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
@@ -296,6 +297,7 @@ extern const char * const x86_power_flags[32];
#endif /* CONFIG_X86_64 */
+#if __GNUC__ >= 4
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These are only valid after alternatives have run, but will statically
@@ -304,7 +306,7 @@ extern const char * const x86_power_flags[32];
*/
static __always_inline __pure bool __static_cpu_has(u16 bit)
{
-#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5
asm goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
@@ -345,7 +347,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
#endif
}
-#if __GNUC__ >= 4
#define static_cpu_has(bit) \
( \
__builtin_constant_p(boot_cpu_has(bit)) ? \
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 004e6e25e913..1d5c08a1bdfd 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -68,7 +68,6 @@ extern unsigned long force_hpet_address;
extern u8 hpet_blockid;
extern int hpet_force_user;
extern u8 hpet_msi_disable;
-extern u8 hpet_readback_cmp;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 528a11e8d3e3..824ca07860d0 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,7 +20,7 @@ struct arch_hw_breakpoint {
#include <linux/list.h>
/* Available HW breakpoint length encodings */
-#define X86_BREAKPOINT_LEN_X 0x00
+#define X86_BREAKPOINT_LEN_X 0x40
#define X86_BREAKPOINT_LEN_1 0x40
#define X86_BREAKPOINT_LEN_2 0x44
#define X86_BREAKPOINT_LEN_4 0x4c
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index f35eb45d6576..c4191b3b7056 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -26,11 +26,11 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
-void *
+void __iomem *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
void
-iounmap_atomic(void *kvaddr, enum km_type type);
+iounmap_atomic(void __iomem *kvaddr, enum km_type type);
int
iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 51cfd730ac5d..1f99ecfc48e1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -152,9 +152,14 @@ struct x86_emulate_ops {
struct operand {
enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
unsigned int bytes;
- unsigned long orig_val, *ptr;
+ union {
+ unsigned long orig_val;
+ u64 orig_val64;
+ };
+ unsigned long *ptr;
union {
unsigned long val;
+ u64 val64;
char valptr[sizeof(unsigned long) + 2];
};
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 502e53f999cf..c52e2eb40a1e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -652,20 +652,6 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
return (struct kvm_mmu_page *)page_private(page);
}
-static inline u16 kvm_read_fs(void)
-{
- u16 seg;
- asm("mov %%fs, %0" : "=g"(seg));
- return seg;
-}
-
-static inline u16 kvm_read_gs(void)
-{
- u16 seg;
- asm("mov %%gs, %0" : "=g"(seg));
- return seg;
-}
-
static inline u16 kvm_read_ldt(void)
{
u16 ldt;
@@ -673,16 +659,6 @@ static inline u16 kvm_read_ldt(void)
return ldt;
}
-static inline void kvm_load_fs(u16 sel)
-{
- asm("mov %0, %%fs" : : "rm"(sel));
-}
-
-static inline void kvm_load_gs(u16 sel)
-{
- asm("mov %0, %%gs" : : "rm"(sel));
-}
-
static inline void kvm_load_ldt(u16 sel)
{
asm("lldt %0" : : "rm"(sel));
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 404a880ea325..d395540ff894 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -27,6 +27,9 @@ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
int node);
extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
+#ifdef CONFIG_PCI
+
+#ifdef CONFIG_PCI_DOMAINS
static inline int pci_domain_nr(struct pci_bus *bus)
{
struct pci_sysdata *sd = bus->sysdata;
@@ -37,13 +40,12 @@ static inline int pci_proc_domain(struct pci_bus *bus)
{
return pci_domain_nr(bus);
}
-
+#endif
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */
-#ifdef CONFIG_PCI
extern unsigned int pcibios_assign_all_busses(void);
extern int pci_legacy_init(void);
# ifdef CONFIG_ACPI
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 2984a25ff383..f686f49e8b7b 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -26,6 +26,7 @@ struct mm_struct;
struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024];
+extern pgd_t trampoline_pg_dir[1024];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index feb2ff9bfc2d..f1d8b441fc77 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *);
/* kernel/process.c */
int sys_fork(struct pt_regs *);
int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *, char __user * __user *,
- char __user * __user *, struct pt_regs *);
+long sys_execve(const char __user *,
+ const char __user *const __user *,
+ const char __user *const __user *, struct pt_regs *);
long sys_clone(unsigned long, unsigned long, void __user *,
void __user *, struct pt_regs *);
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index cb507bb05d79..4dde797c0578 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,14 +13,17 @@ extern unsigned char *trampoline_base;
extern unsigned long init_rsp;
extern unsigned long initial_code;
+extern unsigned long initial_page_table;
extern unsigned long initial_gs;
#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
extern unsigned long setup_trampoline(void);
+extern void __init setup_trampoline_page_table(void);
extern void __init reserve_trampoline_memory(void);
#else
-static inline void reserve_trampoline_memory(void) {};
+static inline void setup_trampoline_page_table(void) {}
+static inline void reserve_trampoline_memory(void) {}
#endif /* CONFIG_X86_TRAMPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index c0427295e8f5..1ca132fc0d03 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
extern int notsc_setup(char *);
+extern void save_sched_clock_state(void);
+extern void restore_sched_clock_state(void);
#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0925676266bd..fedf32a8c3ec 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
+CFLAGS_REMOVE_pvclock.o = -pg
+CFLAGS_REMOVE_kvmclock.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index fb7a5f052e2b..fb16f17e59be 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -61,7 +61,7 @@ struct cstate_entry {
unsigned int ecx;
} states[ACPI_PROCESSOR_MAX_POWER];
};
-static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */
+static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fa044e1e30a2..679b6450382b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
size_t size,
int dir)
{
+ dma_addr_t flush_addr;
dma_addr_t i, start;
unsigned int pages;
@@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
(dma_addr + size > dma_dom->aperture_size))
return;
+ flush_addr = dma_addr;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
- iommu_flush_pages(&dma_dom->domain, dma_addr, size);
+ iommu_flush_pages(&dma_dom->domain, flush_addr, size);
dma_dom->need_flush = false;
}
}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3cc63e2b8dd4..5a170cbbbed8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
iommu->last_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_LD(range));
iommu->evt_msi_num = MMIO_MSI_NUM(misc);
+
+ if (is_rd890_iommu(iommu->dev)) {
+ pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]);
+ pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]);
+ pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]);
+ pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]);
+ }
}
/*
@@ -649,29 +656,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
struct ivhd_entry *e;
/*
- * First set the recommended feature enable bits from ACPI
- * into the IOMMU control registers
+ * First save the recommended feature enable bits from ACPI
*/
- h->flags & IVHD_FLAG_HT_TUN_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
- iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
-
- h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
- iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
-
- h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
- iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
-
- h->flags & IVHD_FLAG_ISOC_EN_MASK ?
- iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
- iommu_feature_disable(iommu, CONTROL_ISOC_EN);
-
- /*
- * make IOMMU memory accesses cache coherent
- */
- iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+ iommu->acpi_flags = h->flags;
/*
* Done. Now parse the device entries
@@ -1116,6 +1103,40 @@ static void init_device_table(void)
}
}
+static void iommu_init_flags(struct amd_iommu *iommu)
+{
+ iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
+ iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
+ iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
+
+ iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
+ iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
+ iommu_feature_disable(iommu, CONTROL_ISOC_EN);
+
+ /*
+ * make IOMMU memory accesses cache coherent
+ */
+ iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+}
+
+static void iommu_apply_quirks(struct amd_iommu *iommu)
+{
+ if (is_rd890_iommu(iommu->dev)) {
+ pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]);
+ pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]);
+ pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]);
+ pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]);
+ }
+}
+
/*
* This function finally enables all IOMMUs found in the system after
* they have been initialized
@@ -1126,6 +1147,8 @@ static void enable_iommus(void)
for_each_iommu(iommu) {
iommu_disable(iommu);
+ iommu_apply_quirks(iommu);
+ iommu_init_flags(iommu);
iommu_set_device_table(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4dc0084ec1b1..5c5b8f3dddb5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
old_cfg = old_desc->chip_data;
- memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ cfg->vector = old_cfg->vector;
+ cfg->move_in_progress = old_cfg->move_in_progress;
+ cpumask_copy(cfg->domain, old_cfg->domain);
+ cpumask_copy(cfg->old_domain, old_cfg->old_domain);
init_copy_irq_2_pin(old_cfg, cfg, node);
}
-static void free_irq_cfg(struct irq_cfg *old_cfg)
+static void free_irq_cfg(struct irq_cfg *cfg)
{
- kfree(old_cfg);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
}
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -1728,6 +1733,8 @@ __apicdebuginit(void) print_IO_APIC(void)
struct irq_pin_list *entry;
cfg = desc->chip_data;
+ if (!cfg)
+ continue;
entry = cfg->irq_2_pin;
if (!entry)
continue;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 7b598b84c902..f744f54cb248 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -698,9 +698,11 @@ void __init uv_system_init(void)
for (j = 0; j < 64; j++) {
if (!test_bit(j, &present))
continue;
- uv_blade_info[blade].pnode = (i * 64 + j);
+ pnode = (i * 64 + j);
+ uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
+ max_pnode = max(pnode, max_pnode);
blade++;
}
}
@@ -738,7 +740,6 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
uv_node_to_blade[nid] = blade;
uv_cpu_to_blade[cpu] = blade;
- max_pnode = max(pnode, max_pnode);
}
/* Add blade/pnode info for nodes without cpus */
@@ -750,7 +751,6 @@ void __init uv_system_init(void)
pnode = (paddr >> m_val) & pnode_mask;
blade = boot_pnode_to_blade(pnode);
uv_node_to_blade[nid] = blade;
- max_pnode = max(pnode, max_pnode);
}
map_gru_high(max_pnode);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60a57b13082d..ba5f62f45f01 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -669,7 +669,7 @@ bool cpu_has_amd_erratum(const int *erratum)
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 8) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_mask;
while ((range = *erratum++))
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 490dac63c2d2..f2f9ac7da25c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
+void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
u32 ebx;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 3624e8a0f71b..f668bb1f7d43 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void get_cpu_cap(struct cpuinfo_x86 *c);
#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 994230d4dc4e..4f6f679f2799 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
return -ENODEV;
out_obj = output.pointer;
- if (out_obj->type != ACPI_TYPE_BUFFER)
- return -ENODEV;
+ if (out_obj->type != ACPI_TYPE_BUFFER) {
+ ret = -ENODEV;
+ goto out_free;
+ }
errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
- if (errors)
- return -ENODEV;
+ if (errors) {
+ ret = -ENODEV;
+ goto out_free;
+ }
supported = *((u32 *)(out_obj->buffer.pointer + 4));
- if (!(supported & 0x1))
- return -ENODEV;
+ if (!(supported & 0x1)) {
+ ret = -ENODEV;
+ goto out_free;
+ }
out_free:
kfree(output.pointer);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 85f69cdeae10..b4389441efbb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
c->cpuid_level = cpuid_eax(0);
+ get_cpu_cap(c);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 224392d8fe8c..39aaee5c1ab2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
address = (low & MASK_BLKPTR_LO) >> 21;
if (!address)
break;
+
address += MCG_XBLK_ADDR;
} else
++address;
@@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (rdmsr_safe(address, &low, &high))
break;
- if (!(high & MASK_VALID_HI)) {
- if (block)
- continue;
- else
- break;
- }
+ if (!(high & MASK_VALID_HI))
+ continue;
if (!(high & MASK_CNTP_HI) ||
(high & MASK_LOCKED_HI))
@@ -530,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
err = -ENOMEM;
goto out;
}
- if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
+ if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
kfree(b);
err = -ENOMEM;
goto out;
@@ -543,7 +540,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifndef CONFIG_SMP
cpumask_setall(b->cpus);
#else
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_set_cpu(cpu, b->cpus);
#endif
per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index c2a8b26d4fea..169d8804a9f8 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level)
#ifdef CONFIG_SYSFS
/* Add/Remove thermal_throttle interface for CPU device: */
-static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
+static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
+ unsigned int cpu)
{
int err;
- struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
if (err)
@@ -215,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
err = sysfs_add_file_to_group(&sys_dev->kobj,
&attr_core_power_limit_count.attr,
thermal_attr_group.name);
- if (cpu_has(c, X86_FEATURE_PTS))
+ if (cpu_has(c, X86_FEATURE_PTS)) {
err = sysfs_add_file_to_group(&sys_dev->kobj,
&attr_package_throttle_count.attr,
thermal_attr_group.name);
@@ -223,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
err = sysfs_add_file_to_group(&sys_dev->kobj,
&attr_package_power_limit_count.attr,
thermal_attr_group.name);
+ }
return err;
}
@@ -251,7 +253,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
mutex_lock(&therm_cpu_lock);
- err = thermal_throttle_add_dev(sys_dev);
+ err = thermal_throttle_add_dev(sys_dev, cpu);
mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
@@ -287,7 +289,7 @@ static __init int thermal_throttle_init_device(void)
#endif
/* connect live CPUs to sysfs */
for_each_online_cpu(cpu) {
- err = thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+ err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu);
WARN_ON(err);
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f2da20fda02d..03a5b0385ad6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -102,6 +102,7 @@ struct cpu_hw_events {
*/
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
int n_events;
@@ -1010,6 +1011,7 @@ static int x86_pmu_start(struct perf_event *event)
x86_perf_event_set_period(event);
cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
+ __set_bit(idx, cpuc->running);
x86_pmu.enable(event);
perf_event_update_userpage(event);
@@ -1141,8 +1143,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active_mask)) {
+ /*
+ * Though we deactivated the counter some cpus
+ * might still deliver spurious interrupts still
+ * in flight. Catch them:
+ */
+ if (__test_and_clear_bit(idx, cpuc->running))
+ handled++;
continue;
+ }
event = cpuc->events[idx];
hwc = &event->hw;
@@ -1154,7 +1164,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
/*
* event overflow
*/
- handled = 1;
+ handled++;
data.period = event->hw.last_period;
if (!x86_perf_event_set_period(event))
@@ -1200,12 +1210,20 @@ void perf_events_lapic_init(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+struct pmu_nmi_state {
+ unsigned int marked;
+ int handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
static int __kprobes
perf_event_nmi_handler(struct notifier_block *self,
unsigned long cmd, void *__args)
{
struct die_args *args = __args;
- struct pt_regs *regs;
+ unsigned int this_nmi;
+ int handled;
if (!atomic_read(&active_events))
return NOTIFY_DONE;
@@ -1214,22 +1232,47 @@ perf_event_nmi_handler(struct notifier_block *self,
case DIE_NMI:
case DIE_NMI_IPI:
break;
-
+ case DIE_NMIUNKNOWN:
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+ /* let the kernel handle the unknown nmi */
+ return NOTIFY_DONE;
+ /*
+ * This one is a PMU back-to-back nmi. Two events
+ * trigger 'simultaneously' raising two back-to-back
+ * NMIs. If the first NMI handles both, the latter
+ * will be empty and daze the CPU. So, we drop it to
+ * avoid false-positive 'unknown nmi' messages.
+ */
+ return NOTIFY_STOP;
default:
return NOTIFY_DONE;
}
- regs = args->regs;
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
- /*
- * Can't rely on the handled return value to say it was our NMI, two
- * events could trigger 'simultaneously' raising two back-to-back NMIs.
- *
- * If the first NMI handles both, the latter will be empty and daze
- * the CPU.
- */
- x86_pmu.handle_irq(regs);
+
+ handled = x86_pmu.handle_irq(args->regs);
+ if (!handled)
+ return NOTIFY_DONE;
+
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if ((handled > 1) ||
+ /* the next nmi could be a back-to-back nmi */
+ ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
+ (__get_cpu_var(pmu_nmi).handled > 1))) {
+ /*
+ * We could have two subsequent back-to-back nmis: The
+ * first handles more than one counter, the 2nd
+ * handles only one counter and the 3rd handles no
+ * counter.
+ *
+ * This is the 2nd nmi because the previous was
+ * handling more than one counter. We will mark the
+ * next (3rd) and then drop it if unhandled.
+ */
+ __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
+ __get_cpu_var(pmu_nmi).handled = handled;
+ }
return NOTIFY_STOP;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac860ebe0..ee05c90012d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
* Intel Errata AAP53 (model 30)
* Intel Errata BD53 (model 44)
*
- * These chips need to be 'reset' when adding counters by programming
- * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
- * either in sequence on the same PMC or on different PMCs.
+ * The official story:
+ * These chips need to be 'reset' when adding counters by programming the
+ * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ * in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
*/
-static void intel_pmu_nhm_enable_all(int added)
+static void intel_pmu_nhm_workaround(void)
{
- if (added) {
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- int i;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ static const unsigned long nhm_magic[4] = {
+ 0x4300B5,
+ 0x4300D2,
+ 0x4300B1,
+ 0x4300B1
+ };
+ struct perf_event *event;
+ int i;
+
+ /*
+ * The Errata requires below steps:
+ * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+ * 2) Configure 4 PERFEVTSELx with the magic events and clear
+ * the corresponding PMCx;
+ * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+ * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+ * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+ */
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+ /*
+ * The real steps we choose are a little different from above.
+ * A) To reduce MSR operations, we don't run step 1) as they
+ * are already cleared before this function is called;
+ * B) Call x86_perf_event_update to save PMCx before configuring
+ * PERFEVTSELx with magic number;
+ * C) With step 5), we do clear only when the PERFEVTSELx is
+ * not used currently.
+ * D) Call x86_perf_event_set_period to restore PMCx;
+ */
+
+ /* We always operate 4 pairs of PERF Counters */
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event)
+ x86_perf_event_update(event);
+ }
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ for (i = 0; i < 4; i++) {
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ }
- for (i = 0; i < 3; i++) {
- struct perf_event *event = cpuc->events[i];
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
- if (!event)
- continue;
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event) {
+ x86_perf_event_set_period(event);
__x86_pmu_enable_event(&event->hw,
- ARCH_PERFMON_EVENTSEL_ENABLE);
- }
+ ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+ if (added)
+ intel_pmu_nhm_workaround();
intel_pmu_enable_all(added);
}
@@ -667,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
int bit, loops;
- u64 ack, status;
+ u64 status;
+ int handled = 0;
perf_sample_data_init(&data, 0);
@@ -683,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
loops = 0;
again:
+ intel_pmu_ack_status(status);
if (++loops > 100) {
WARN_ONCE(1, "perfevents: irq loop stuck!\n");
perf_event_print_debug();
@@ -691,19 +738,22 @@ again:
}
inc_irq_stat(apic_perf_irqs);
- ack = status;
intel_pmu_lbr_read();
/*
* PEBS overflow sets bit 62 in the global status register
*/
- if (__test_and_clear_bit(62, (unsigned long *)&status))
+ if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ handled++;
x86_pmu.drain_pebs(regs);
+ }
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ handled++;
+
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -716,8 +766,6 @@ again:
x86_pmu_stop(event);
}
- intel_pmu_ack_status(ack);
-
/*
* Repeat if there is more work to be done:
*/
@@ -727,7 +775,7 @@ again:
done:
intel_pmu_enable_all(0);
- return 1;
+ return handled;
}
static struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index febb12cea795..249015173992 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -497,6 +497,8 @@ static int p4_hw_config(struct perf_event *event)
event->hw.config |= event->attr.config &
(p4_config_pack_escr(P4_ESCR_MASK_HT) |
p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
+
+ event->hw.config &= ~P4_CCCR_FORCE_OVF;
}
rc = x86_setup_perfctr(event);
@@ -658,8 +660,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
int overflow;
- if (!test_bit(idx, cpuc->active_mask))
+ if (!test_bit(idx, cpuc->active_mask)) {
+ /* catch in-flight IRQs */
+ if (__test_and_clear_bit(idx, cpuc->running))
+ handled++;
continue;
+ }
event = cpuc->events[idx];
hwc = &event->hw;
@@ -690,7 +696,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
inc_irq_stat(apic_perf_irqs);
}
- return handled > 0;
+ return handled;
}
/*
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 34b4dad6f0b8..d49079515122 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
+ { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index e5cc7e82e60d..ebdb85cf2686 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,7 +18,6 @@
#include <asm/apic.h>
#include <asm/iommu.h>
#include <asm/gart.h>
-#include <asm/hpet.h>
static void __init fix_hypertransport_config(int num, int slot, int func)
{
@@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
-/*
- * Force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
- *
- * We do this on all SMBUS incarnations for now until we have more
- * information about the affected chipsets.
- */
-static void __init ati_hpet_bugs(int num, int slot, int func)
-{
-#ifdef CONFIG_HPET_TIMER
- hpet_readback_cmp = 1;
-#endif
-}
-
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
- { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
- PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
{}
};
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ff4c453e13f3..fa8c1b8e09fb 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -334,7 +334,7 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
- movl $pa(swapper_pg_dir),%eax
+ movl pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $X86_CR0_PG,%eax
@@ -614,6 +614,8 @@ ignore_int:
.align 4
ENTRY(initial_code)
.long i386_start_kernel
+ENTRY(initial_page_table)
+ .long pa(swapper_pg_dir)
/*
* BSS section
@@ -629,6 +631,10 @@ ENTRY(swapper_pg_dir)
#endif
swapper_pg_fixmap:
.fill 1024,4,0
+#ifdef CONFIG_X86_TRAMPOLINE
+ENTRY(trampoline_pg_dir)
+ .fill 1024,4,0
+#endif
ENTRY(empty_zero_page)
.fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 351f9c0fea1f..7494999141b3 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -35,7 +35,6 @@
unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
u8 hpet_msi_disable;
-u8 hpet_readback_cmp;
#ifdef CONFIG_PCI_MSI
static unsigned long hpet_num_timers;
@@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta,
* at that point and we would wait for the next hpet interrupt
* forever. We found out that reading the CMP register back
* forces the transfer so we can rely on the comparison with
- * the counter register below.
+ * the counter register below. If the read back from the
+ * compare register does not match the value we programmed
+ * then we might have a real hardware problem. We can not do
+ * much about it here, but at least alert the user/admin with
+ * a prominent warning.
*
- * That works fine on those ATI chipsets, but on newer Intel
- * chipsets (ICH9...) this triggers due to an erratum: Reading
- * the comparator immediately following a write is returning
- * the old value.
+ * An erratum on some chipsets (ICH9,..), results in
+ * comparator read immediately following a write returning old
+ * value. Workaround for this is to read this value second
+ * time, when first read returns old value.
*
- * We restrict the read back to the affected ATI chipsets (set
- * by quirks) and also run it with hpet=verbose for debugging
- * purposes.
+ * In fact the write to the comparator register is delayed up
+ * to two HPET cycles so the workaround we tried to restrict
+ * the readback to those known to be borked ATI chipsets
+ * failed miserably. So we give up on optimizations forever
+ * and penalize all HPET incarnations unconditionally.
*/
- if (hpet_readback_cmp || hpet_verbose) {
- u32 cmp = hpet_readl(HPET_Tn_CMP(timer));
-
- if (cmp != cnt)
+ if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) {
+ if (hpet_readl(HPET_Tn_CMP(timer)) != cnt)
printk_once(KERN_WARNING
- "hpet: compare register read back failed.\n");
+ "hpet: compare register read back failed.\n");
}
return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
@@ -503,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
{
unsigned int irq;
- irq = create_irq();
+ irq = create_irq_nr(0, -1);
if (!irq)
return -EINVAL;
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a474ec37c32f..ff15c9dcc25d 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type)
{
- /* Len */
- switch (x86_len) {
- case X86_BREAKPOINT_LEN_X:
+ /* Type */
+ switch (x86_type) {
+ case X86_BREAKPOINT_EXECUTE:
+ if (x86_len != X86_BREAKPOINT_LEN_X)
+ return -EINVAL;
+
+ *gen_type = HW_BREAKPOINT_X;
*gen_len = sizeof(long);
+ return 0;
+ case X86_BREAKPOINT_WRITE:
+ *gen_type = HW_BREAKPOINT_W;
break;
+ case X86_BREAKPOINT_RW:
+ *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Len */
+ switch (x86_len) {
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
@@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
return -EINVAL;
}
- /* Type */
- switch (x86_type) {
- case X86_BREAKPOINT_EXECUTE:
- *gen_type = HW_BREAKPOINT_X;
- break;
- case X86_BREAKPOINT_WRITE:
- *gen_type = HW_BREAKPOINT_W;
- break;
- case X86_BREAKPOINT_RW:
- *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
- break;
- default:
- return -EINVAL;
- }
-
return 0;
}
@@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
ret = -EINVAL;
switch (info->len) {
- case X86_BREAKPOINT_LEN_X:
- align = sizeof(long) -1;
- break;
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f11f5ce668f..a46cb3522c0c 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -40,6 +40,7 @@
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size;
+EXPORT_SYMBOL_GPL(xstate_size);
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ef10940e1af0..852b81967a37 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -194,7 +194,7 @@ static struct hw_breakpoint {
unsigned long addr;
int len;
int type;
- struct perf_event **pev;
+ struct perf_event * __percpu *pev;
} breakinfo[HBP_NUM];
static unsigned long early_dr7;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1bfb6cf4dd55..770ebfb349e9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+ kprobe_opcode_t *correct_ret_addr = NULL;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
@@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
/* another task is sharing our hash bucket */
continue;
+ orig_ret_address = (unsigned long)ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__get_cpu_var(current_kprobe) = &ri->rp->kp;
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
__get_cpu_var(current_kprobe) = NULL;
}
- orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
@@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
break;
}
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e0bc186d7501..1c355c550960 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr,
apply_paravirt(pseg, pseg + para->sh_size);
}
- return module_bug_finalize(hdr, sechdrs, me);
+ return 0;
}
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
- module_bug_cleanup(mod);
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 64ecaf0af9af..57d1868a86aa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread);
/*
* sys_execve() executes a new program.
*/
-long sys_execve(const char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
+long sys_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b008e7883207..c3a4fbb2b996 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1014,6 +1014,8 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+ setup_trampoline_page_table();
+
tboot_probe();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a5e928b0cb5f..8b3bfc4dd708 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,7 +73,6 @@
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
-static int low_mappings;
#endif
/* State of each CPU */
@@ -91,6 +90,25 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
+
+/*
+ * We need this for trampoline_base protection from concurrent accesses when
+ * off- and onlining cores wildly.
+ */
+static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&x86_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+}
+
+ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
+ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
#else
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
@@ -281,6 +299,18 @@ notrace static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
+
+#ifdef CONFIG_X86_32
+ /*
+ * Switch away from the trampoline page-table
+ *
+ * Do this before cpu_init() because it needs to access per-cpu
+ * data which may not be mapped in the trampoline page-table.
+ */
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+#endif
+
vmi_bringup();
cpu_init();
preempt_disable();
@@ -299,12 +329,6 @@ notrace static void __cpuinit start_secondary(void *unused)
legacy_pic->chip->unmask(0);
}
-#ifdef CONFIG_X86_32
- while (low_mappings)
- cpu_relax();
- __flush_tlb_all();
-#endif
-
/* This must be done before setting cpu_online_mask */
set_cpu_sibling_map(raw_smp_processor_id());
wmb();
@@ -750,6 +774,7 @@ do_rest:
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
+ initial_page_table = __pa(&trampoline_pg_dir);
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
@@ -897,20 +922,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-#ifdef CONFIG_X86_32
- /* init low mem mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- flush_tlb_all();
- low_mappings = 1;
-
err = do_boot_cpu(apicid, cpu);
- zap_low_mappings(false);
- low_mappings = 0;
-#else
- err = do_boot_cpu(apicid, cpu);
-#endif
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 196552bb412c..d5e06624e34a 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -28,7 +28,9 @@
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
long __res;
asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index c652ef62742d..e2a595257390 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,6 +1,7 @@
#include <linux/io.h>
#include <asm/trampoline.h>
+#include <asm/pgtable.h>
#include <asm/e820.h>
#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
@@ -37,3 +38,19 @@ unsigned long __trampinit setup_trampoline(void)
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
}
+
+void __init setup_trampoline_page_table(void)
+{
+#ifdef CONFIG_X86_32
+ /* Copy kernel address range */
+ clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ /* Initialize low mappings */
+ clone_pgd_range(trampoline_pg_dir,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min_t(unsigned long, KERNEL_PGD_PTRS,
+ KERNEL_PGD_BOUNDARY));
+#endif
+}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ce8e50239332..26a863a9c2a8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
local_irq_restore(flags);
}
+static unsigned long long cyc2ns_suspend;
+
+void save_sched_clock_state(void)
+{
+ if (!sched_clock_stable)
+ return;
+
+ cyc2ns_suspend = sched_clock();
+}
+
+/*
+ * Even on processors with invariant TSC, TSC gets reset in some the
+ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
+ * arbitrary value (still sync'd across cpu's) during resume from such sleep
+ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
+ * that sched_clock() continues from the point where it was left off during
+ * suspend.
+ */
+void restore_sched_clock_state(void)
+{
+ unsigned long long offset;
+ unsigned long flags;
+ int cpu;
+
+ if (!sched_clock_stable)
+ return;
+
+ local_irq_save(flags);
+
+ __get_cpu_var(cyc2ns_offset) = 0;
+ offset = cyc2ns_suspend - sched_clock();
+
+ for_each_possible_cpu(cpu)
+ per_cpu(cyc2ns_offset, cpu) = offset;
+
+ local_irq_restore(flags);
+}
+
#ifdef CONFIG_CPU_FREQ
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b38bd8b92aa6..66ca98aafdd6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1870,17 +1870,16 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
- u64 old = c->dst.orig_val;
+ u64 old = c->dst.orig_val64;
if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
-
c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
ctxt->eflags &= ~EFLG_ZF;
} else {
- c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
- (u32) c->regs[VCPU_REGS_RBX];
+ c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
+ (u32) c->regs[VCPU_REGS_RBX];
ctxt->eflags |= EFLG_ZF;
}
@@ -2616,7 +2615,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.valptr, c->src.bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
- c->src.orig_val = c->src.val;
+ c->src.orig_val64 = c->src.val64;
}
if (c->src2.type == OP_MEM) {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 0fd6378981f4..ddeb2314b522 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -697,6 +697,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pit->wq = create_singlethread_workqueue("kvm-pit-wq");
if (!pit->wq) {
mutex_unlock(&pit->pit_state.lock);
+ kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
}
@@ -742,7 +743,7 @@ fail:
kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
-
+ destroy_workqueue(pit->wq);
kfree(pit);
return NULL;
}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 8d10c063d7f2..4b7b73ce2098 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -64,6 +64,9 @@ static void pic_unlock(struct kvm_pic *s)
if (!found)
found = s->kvm->bsp_vcpu;
+ if (!found)
+ return;
+
kvm_vcpu_kick(found);
}
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ffed06871c5c..63c314502993 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -43,7 +43,6 @@ struct kvm_kpic_state {
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
- u8 isr_ack; /* interrupt ack detection */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
@@ -56,6 +55,7 @@ struct kvm_kpic_state {
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
+ u8 isr_ack; /* interrupt ack detection */
struct kvm_pic *pics_state;
};
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index bc5b9b8d4a33..8a3f9f64f86f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -766,7 +766,6 @@ static void init_vmcb(struct vcpu_svm *svm)
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __pa(svm->msrpm);
- control->tsc_offset = 0;
control->int_ctl = V_INTR_MASKING_MASK;
init_seg(&save->es);
@@ -902,6 +901,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
init_vmcb(svm);
+ svm->vmcb->control.tsc_offset = 0-native_read_tsc();
err = fx_init(&svm->vcpu);
if (err)
@@ -3163,8 +3163,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
sync_lapic_to_cr8(vcpu);
save_host_msrs(vcpu);
- fs_selector = kvm_read_fs();
- gs_selector = kvm_read_gs();
+ savesegment(fs, fs_selector);
+ savesegment(gs, gs_selector);
ldt_selector = kvm_read_ldt();
svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
@@ -3251,10 +3251,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- kvm_load_fs(fs_selector);
- kvm_load_gs(gs_selector);
- kvm_load_ldt(ldt_selector);
load_host_msrs(vcpu);
+ loadsegment(fs, fs_selector);
+#ifdef CONFIG_X86_64
+ load_gs_index(gs_selector);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+#else
+ loadsegment(gs, gs_selector);
+#endif
+ kvm_load_ldt(ldt_selector);
reload_tss(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 49b25eee25ac..7bddfab12013 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -803,7 +803,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
*/
vmx->host_state.ldt_sel = kvm_read_ldt();
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
- vmx->host_state.fs_sel = kvm_read_fs();
+ savesegment(fs, vmx->host_state.fs_sel);
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
@@ -811,7 +811,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmcs_write16(HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
- vmx->host_state.gs_sel = kvm_read_gs();
+ savesegment(gs, vmx->host_state.gs_sel);
if (!(vmx->host_state.gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
@@ -841,27 +841,21 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
static void __vmx_load_host_state(struct vcpu_vmx *vmx)
{
- unsigned long flags;
-
if (!vmx->host_state.loaded)
return;
++vmx->vcpu.stat.host_state_reload;
vmx->host_state.loaded = 0;
if (vmx->host_state.fs_reload_needed)
- kvm_load_fs(vmx->host_state.fs_sel);
+ loadsegment(fs, vmx->host_state.fs_sel);
if (vmx->host_state.gs_ldt_reload_needed) {
kvm_load_ldt(vmx->host_state.ldt_sel);
- /*
- * If we have to reload gs, we must take care to
- * preserve our gs base.
- */
- local_irq_save(flags);
- kvm_load_gs(vmx->host_state.gs_sel);
#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
+ load_gs_index(vmx->host_state.gs_sel);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+#else
+ loadsegment(gs, vmx->host_state.gs_sel);
#endif
- local_irq_restore(flags);
}
reload_tss();
#ifdef CONFIG_X86_64
@@ -2589,8 +2583,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
- vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
- vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
+ vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
+ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 25f19078b321..3a09c625d526 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2387,7 +2387,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
if (cpu_has_xsave)
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->xsave,
- sizeof(struct xsave_struct));
+ xstate_size);
else {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->fxsave,
@@ -2405,7 +2405,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
if (cpu_has_xsave)
memcpy(&vcpu->arch.guest_fpu.state->xsave,
- guest_xsave->region, sizeof(struct xsave_struct));
+ guest_xsave->region, xstate_size);
else {
if (xstate_bv & ~XSTATE_FPSSE)
return -EINVAL;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9257510b4836..9d5f55848455 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
}
/*
- * For a single GDT entry which changes, we do the lazy thing: alter our GDT,
- * then tell the Host to reload the entire thing. This operation is so rare
- * that this naive implementation is reasonable.
+ * For a single GDT entry which changes, we simply change our copy and
+ * then tell the host about it.
*/
static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
const void *desc, int type)
@@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
}
/*
- * OK, I lied. There are three "thread local storage" GDT entries which change
+ * There are three "thread local storage" GDT entries which change
* on every context switch (these three entries are how glibc implements
- * __thread variables). So we have a hypercall specifically for this case.
+ * __thread variables). As an optimization, we have a hypercall
+ * specifically for this case.
+ *
+ * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall
+ * which took a range of entries?
*/
static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 84e236ce76ba..72fc70cf6184 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -74,7 +74,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
/*
* Map 'pfn' using fixed map 'type' and protections 'prot'
*/
-void *
+void __iomem *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{
/*
@@ -86,12 +86,12 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
prot = PAGE_KERNEL_UC_MINUS;
- return kmap_atomic_prot_pfn(pfn, type, prot);
+ return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
void
-iounmap_atomic(void *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr, enum km_type type)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index f9897f7a9ef1..9c0d0d399c30 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -420,9 +420,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
- for_each_node_mask(i, nodes_parsed)
- e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
+ for (i = 0; i < num_node_memblks; i++)
+ e820_register_active_regions(memblk_nodeid[i],
+ node_memblk_range[i].start >> PAGE_SHIFT,
+ node_memblk_range[i].end >> PAGE_SHIFT);
+
/* for out of order entries in SRAT */
sort_node_map();
if (!nodes_cover_memory(nodes)) {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f6b48f6c5951..f1575c9a2572 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -568,8 +568,13 @@ static int __init init_sysfs(void)
int error;
error = sysdev_class_register(&oprofile_sysclass);
- if (!error)
- error = sysdev_register(&device_oprofile);
+ if (error)
+ return error;
+
+ error = sysdev_register(&device_oprofile);
+ if (error)
+ sysdev_class_unregister(&oprofile_sysclass);
+
return error;
}
@@ -580,8 +585,10 @@ static void exit_sysfs(void)
}
#else
-#define init_sysfs() do { } while (0)
-#define exit_sysfs() do { } while (0)
+
+static inline int init_sysfs(void) { return 0; }
+static inline void exit_sysfs(void) { }
+
#endif /* CONFIG_PM */
static int __init p4_init(char **cpu_type)
@@ -664,7 +671,10 @@ static int __init ppro_init(char **cpu_type)
case 14:
*cpu_type = "i386/core";
break;
- case 15: case 23:
+ case 0x0f:
+ case 0x16:
+ case 0x17:
+ case 0x1d:
*cpu_type = "i386/core_2";
break;
case 0x1a:
@@ -695,6 +705,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
char *cpu_type = NULL;
int ret = 0;
+ using_nmi = 0;
+
if (!cpu_has_apic)
return -ENODEV;
@@ -774,7 +786,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
mux_init(ops);
- init_sysfs();
+ ret = init_sysfs();
+ if (ret)
+ return ret;
+
using_nmi = 1;
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
return 0;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index e7e8c5f54956..87bb35e34ef1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -113,6 +113,7 @@ static void __save_processor_state(struct saved_context *ctxt)
void save_processor_state(void)
{
__save_processor_state(&saved_context);
+ save_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(save_processor_state);
@@ -229,6 +230,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
void restore_processor_state(void)
{
__restore_processor_state(&saved_context);
+ restore_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index 554c002a1e1a..0f456386cce5 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -72,13 +72,17 @@ void __init xen_unplug_emulated_devices(void)
{
int r;
+ /* user explicitly requested no unplug */
+ if (xen_emul_unplug & XEN_UNPLUG_NEVER)
+ return;
/* check the version of the xen platform PCI device */
r = check_platform_magic();
/* If the version matches enable the Xen platform PCI driver.
- * Also enable the Xen platform PCI driver if the version is really old
- * and the user told us to ignore it. */
+ * Also enable the Xen platform PCI driver if the host does
+ * not support the unplug protocol (XEN_PLATFORM_ERR_MAGIC)
+ * but the user told us that unplugging is unnecessary. */
if (r && !(r == XEN_PLATFORM_ERR_MAGIC &&
- (xen_emul_unplug & XEN_UNPLUG_IGNORE)))
+ (xen_emul_unplug & XEN_UNPLUG_UNNECESSARY)))
return;
/* Set the default value of xen_emul_unplug depending on whether or
* not the Xen PV frontends and the Xen platform PCI driver have
@@ -99,7 +103,7 @@ void __init xen_unplug_emulated_devices(void)
}
}
/* Now unplug the emulated devices */
- if (!(xen_emul_unplug & XEN_UNPLUG_IGNORE))
+ if (!(xen_emul_unplug & XEN_UNPLUG_UNNECESSARY))
outw(xen_emul_unplug, XEN_IOPORT_UNPLUG);
xen_platform_pci_unplug = xen_emul_unplug;
}
@@ -125,8 +129,10 @@ static int __init parse_xen_emul_unplug(char *arg)
xen_emul_unplug |= XEN_UNPLUG_AUX_IDE_DISKS;
else if (!strncmp(p, "nics", l))
xen_emul_unplug |= XEN_UNPLUG_ALL_NICS;
- else if (!strncmp(p, "ignore", l))
- xen_emul_unplug |= XEN_UNPLUG_IGNORE;
+ else if (!strncmp(p, "unnecessary", l))
+ xen_emul_unplug |= XEN_UNPLUG_UNNECESSARY;
+ else if (!strncmp(p, "never", l))
+ xen_emul_unplug |= XEN_UNPLUG_NEVER;
else
printk(KERN_WARNING "unrecognised option '%s' "
"in parameter 'xen_emul_unplug'\n", p);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 1a5353a753fc..b2bb5aa3b054 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -489,8 +489,9 @@ static void xen_hvm_setup_cpu_clockevents(void)
__init void xen_hvm_init_time_ops(void)
{
/* vector callback is needed otherwise we cannot receive interrupts
- * on cpu > 0 */
- if (!xen_have_vector_callback && num_present_cpus() > 1)
+ * on cpu > 0 and at this point we don't know how many cpus are
+ * available */
+ if (!xen_have_vector_callback)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
printk(KERN_INFO "Xen doesn't support pvclock on HVM,"