55 files changed, 585 insertions, 290 deletions
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index d953e234daa8..88971460fa6c 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -14,17 +14,4 @@ typedef struct {
 
 void ack_bad_irq(unsigned int irq);
 
-#define HARDIRQ_BITS	12
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially nestable IRQ sources in the system
- * to nest on a single CPU. On Alpha, interrupts are masked at the CPU
- * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
- * so we really only have 8 nestable IRQs, but allow some overhead
- */
-#if (1 << HARDIRQ_BITS) < 16
-#error HARDIRQ_BITS is too low!
-#endif
-
 #endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index f238370c907d..8d0097f10208 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -93,8 +93,8 @@ common_shutdown_1(void *generic_ptr)
 	if (cpuid != boot_cpuid) {
 		flags |= 0x00040000UL; /* "remain halted" */
 		*pflags = flags;
-		cpu_clear(cpuid, cpu_present_map);
-		cpu_clear(cpuid, cpu_possible_map);
+		set_cpu_present(cpuid, false);
+		set_cpu_possible(cpuid, false);
 		halt();
 	}
 #endif
@@ -120,8 +120,8 @@ common_shutdown_1(void *generic_ptr)
 
 #ifdef CONFIG_SMP
 	/* Wait for the secondaries to halt. */
-	cpu_clear(boot_cpuid, cpu_present_map);
-	cpu_clear(boot_cpuid, cpu_possible_map);
+	set_cpu_present(boot_cpuid, false);
+	set_cpu_possible(boot_cpuid, false);
 	while (cpus_weight(cpu_present_map))
 		barrier();
 #endif
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 00f1dc3dfd5f..b1fe5674c3a1 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -120,12 +120,12 @@ void __cpuinit
 smp_callin(void)
 {
 	int cpuid = hard_smp_processor_id();
-	cpumask_t mask = cpu_online_map;
 
-	if (cpu_test_and_set(cpuid, mask)) {
+	if (cpu_online(cpuid)) {
 		printk("??, cpu 0x%x already present??\n", cpuid);
 		BUG();
 	}
+	set_cpu_online(cpuid, true);
 
 	/* Turn on machine checks.  */
 	wrmces(7);
@@ -436,8 +436,8 @@ setup_smp(void)
 				((char *)cpubase + i*hwrpb->processor_size);
 			if ((cpu->flags & 0x1cc) == 0x1cc) {
 				smp_num_probed++;
-				cpu_set(i, cpu_possible_map);
-				cpu_set(i, cpu_present_map);
+				set_cpu_possible(i, true);
+				set_cpu_present(i, true);
 				cpu->pal_revision = boot_cpu_palrev;
 			}
 
@@ -470,8 +470,8 @@ smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Nothing to do on a UP box, or when told not to.  */
 	if (smp_num_probed == 1 || max_cpus == 0) {
-		cpu_possible_map = cpumask_of_cpu(boot_cpuid);
-		cpu_present_map = cpumask_of_cpu(boot_cpuid);
+		init_cpu_possible(cpumask_of(boot_cpuid));
+		init_cpu_present(cpumask_of(boot_cpuid));
 		printk(KERN_INFO "SMP mode deactivated.\n");
 		return;
 	}
diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h
index 267354356f60..015bc75ea798 100644
--- a/arch/avr32/include/asm/hardirq.h
+++ b/arch/avr32/include/asm/hardirq.h
@@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq);
 
 #endif /* __ASSEMBLY__ */
 
-#define HARDIRQ_BITS	12
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 #endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6183aeccecf1..8b6a8a554afa 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -22,6 +22,9 @@ config IA64
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
+	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
 	select HAVE_KVM
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 000000000000..d20db3c2a656
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_IA64_FTRACE_H
+#define _ASM_IA64_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE        32 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
+#define mcount _mcount
+
+#include <asm/kprobes.h>
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	/* second bundle, insn 2 */
+	return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index 140e495b8e0e..d514cd9edb49 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -20,16 +20,6 @@
 
 #define local_softirq_pending()		(local_cpu_data->softirq_pending)
 
-#define HARDIRQ_BITS	14
-
-/*
- * The hardirq mask has to be large enough to have space for potentially all IRQ sources
- * in the system nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
 extern void __iomem *ipi_base_addr;
 
 void ack_bad_irq(unsigned int irq);
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index 68aa6da807c1..bfa86b6af7cd 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -25,6 +25,10 @@
 
 #include <linux/ioctl.h>
 
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h
index 34efe88eb849..f2ca32069b3f 100644
--- a/arch/ia64/include/asm/mmzone.h
+++ b/arch/ia64/include/asm/mmzone.h
@@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 #endif
 }
 
-#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-extern int early_pfn_to_nid(unsigned long pfn);
-#endif
-
 #ifdef CONFIG_IA64_DIG /* DIG systems are small */
 # define MAX_PHYSNODE_ID	8
 # define NR_NODE_MEMBLKS	(MAX_NUMNODES * 8)
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c381ea954892..ab6e7ec0bba3 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,6 +2,10 @@
 # Makefile for the linux kernel.
 #
 
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2c1175..7e3382b06d56 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -47,6 +47,7 @@
 #include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
+#include <asm/ftrace.h>
 
 #include "minstate.h"
 
@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
 	br.ret.sptk.many rp
 END(unw_init_running)
 
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+	br ftrace_stub
+END(_mcount)
+
+.here:
+	br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+	alloc out0 = ar.pfs, 8, 0, 4, 0
+	mov out3 = r0
+	;;
+	mov out2 = b0
+	add r3 = 0x20, r3
+	mov out1 = r1;
+	br.call.sptk.many b0 = ftrace_patch_gp
+	//this might be called from module, so we must patch gp
+ftrace_patch_gp:
+	movl gp=__gp
+	mov b0 = r3
+	;;
+.global ftrace_call;
+ftrace_call:
+{
+	.mlx
+	nop.m 0x0
+	movl r3 = .here;;
+}
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(ftrace_caller)
+
+#else
+GLOBAL_ENTRY(_mcount)
+	movl r2 = ftrace_stub
+	movl r3 = ftrace_trace_function;;
+	ld8 r3 = [r3];;
+	ld8 r3 = [r3];;
+	cmp.eq p7,p0 = r2, r3
+(p7)	br.sptk.many ftrace_stub
+	;;
+
+	alloc loc0 = ar.pfs, 4, 4, 2, 0
+	;;
+	mov loc1 = b0
+	mov out0 = b0
+	mov loc2 = r8
+	mov loc3 = r15
+	;;
+	adds out0 = -MCOUNT_INSN_SIZE, out0
+	mov out1 = in2
+	mov b6 = r3
+
+	br.call.sptk.many b0 = b6
+	;;
+	mov ar.pfs = loc0
+	mov b0 = loc1
+	mov r8 = loc2
+	mov r15 = loc3
+	br ftrace_stub
+	;;
+END(_mcount)
+#endif
+
+GLOBAL_ENTRY(ftrace_stub)
+	mov r3 = b0
+	movl r2 = _mcount_ret_helper
+	;;
+	mov b6 = r2
+	mov b7 = r3
+	br.ret.sptk.many b6
+
+_mcount_ret_helper:
+	mov b0 = r42
+	mov r1 = r41
+	mov ar.pfs = r40
+	br b7
+END(ftrace_stub)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
 	.rodata
 	.align 8
 	.globl sys_call_table
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 000000000000..7fc8c961b1f7
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,206 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+	0x05, 0x00, 0xc4, 0x00,             /* mov r42=b0 */
+	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+	u64 dummy1, dummy2, dummy3;
+	u64 dummy4:64-41+13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 sign:1;
+	u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+	0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+	return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
+	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+	0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+	u64 dummy1, dummy2;
+	u64 dummy3:48;
+	u64 imm39_l:16;
+	u64 imm39_h:23;
+	u64 dummy4:13;
+	u64 imm20:20;
+	u64 dummy5:3;
+	u64 i:1;
+	u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	struct ftrace_call_insn *code = (void *)ftrace_call_code;
+	unsigned long offset = addr - (ip + 0x10);
+
+	code->imm39_l = offset >> 24;
+	code->imm39_h = offset >> 40;
+	code->imm20 = offset >> 4;
+	code->i = offset >> 63;
+	return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+		   unsigned char *new_code, int do_check)
+{
+	unsigned char replaced[MCOUNT_INSN_SIZE];
+
+	/*
+	 * Note: Due to modules and __init, code can
+	 *  disappear and change, we need to protect against faulting
+	 *  as well as code changing. We do this by using the
+	 *  probe_kernel_* functions.
+	 *
+	 * No real locking needed, this code is run through
+	 * kstop_machine, or before SMP starts.
+	 */
+
+	if (!do_check)
+		goto skip_check;
+
+	/* read the text we want to modify */
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/* Make sure it is what we expect it to be */
+	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+		return -EINVAL;
+
+skip_check:
+	/* replace the text with the new text */
+	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+		return -EPERM;
+	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+	unsigned long ip = rec->ip;
+
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+	if (rec->flags & FTRACE_FL_CONVERTED) {
+		struct ftrace_call_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_call_code;
+		tmp_call = (void *)replaced;
+		call_insn->imm39_l = tmp_call->imm39_l;
+		call_insn->imm39_h = tmp_call->imm39_h;
+		call_insn->imm20 = tmp_call->imm20;
+		call_insn->i = tmp_call->i;
+		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	} else {
+		struct ftrace_orig_insn *call_insn, *tmp_call;
+
+		call_insn = (void *)ftrace_orig_code;
+		tmp_call = (void *)replaced;
+		call_insn->sign = tmp_call->sign;
+		call_insn->imm20 = tmp_call->imm20;
+		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+			return -EINVAL;
+		return 0;
+	}
+}
+
+int ftrace_make_nop(struct module *mod,
+		    struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret;
+	char *new;
+
+	ret = ftrace_make_nop_check(rec, addr);
+	if (ret)
+		return ret;
+	new = ftrace_nop_replace();
+	return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	unsigned char *old, *new;
+
+	old=  ftrace_nop_replace();
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip;
+	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+	if (func == ftrace_stub)
+		return 0;
+	ip = ((struct fnptr *)func)->ip;
+
+	ia64_patch_imm64(addr + 2, ip);
+
+	flush_icache_range(addr, addr + 16);
+	return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+	*(unsigned long *)data = 0;
+
+	return 0;
+}
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d7372..2d311864e359 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
 #endif
 extern char ia64_ivt[];
 EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 4e586f6110aa..28f982045f29 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	}
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 552d07724207..230eae482f32 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -455,13 +455,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
 	if (!vmm_fpswa_interface)
 		return (fpswa_ret_t) {-1, 0, 0, 0};
 
-	/*
-	 * Just let fpswa driver to use hardware fp registers.
-	 * No fp register is valid in memory.
-	 */
 	memset(&fp_state, 0, sizeof(fp_state_t));
 
 	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the
+	 * vmm, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
+
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+
+   /*
 	 * unsigned long (*EFI_FPSWA) (
 	 *      unsigned long    trap_type,
 	 *      void             *Bundle,
@@ -545,10 +550,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
 		status = vmm_handle_fpu_swa(0, regs, isr);
 		if (!status)
 			return ;
-		else if (-EAGAIN == status) {
-			vcpu_decrement_iip(vcpu);
-			return ;
-		}
 		break;
 	}
 
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index b73bf1838e57..3efea7d0a351 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr)
  * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
  * the section resides.
  */
-int early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
 {
 	int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
 
@@ -70,7 +70,7 @@ int early_pfn_to_nid(unsigned long pfn)
 			return node_memblk[i].nid;
 	}
 
-	return 0;
+	return -1;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/powerpc/include/asm/pgtable-4k.h b/arch/powerpc/include/asm/pgtable-4k.h
index 6b18ba9d2d85..1dbca4e7de67 100644
--- a/arch/powerpc/include/asm/pgtable-4k.h
+++ b/arch/powerpc/include/asm/pgtable-4k.h
@@ -60,7 +60,7 @@
 /* It should be preserving the high 48 bits and then specifically */
 /* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
-                         _PAGE_HPTEFLAGS)
+                         _PAGE_HPTEFLAGS | _PAGE_SPECIAL)
 
 /* Bits to mask out from a PMD to get to the PTE page */
 #define PMD_MASKED_BITS		0
diff --git a/arch/powerpc/include/asm/pgtable-64k.h b/arch/powerpc/include/asm/pgtable-64k.h
index 07b0d8f09cb6..7389003349a6 100644
--- a/arch/powerpc/include/asm/pgtable-64k.h
+++ b/arch/powerpc/include/asm/pgtable-64k.h
@@ -114,7 +114,7 @@ static inline struct subpage_prot_table *pgd_subpage_prot(pgd_t *pgd)
  * pgprot changes
  */
 #define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-                         _PAGE_ACCESSED)
+                         _PAGE_ACCESSED | _PAGE_SPECIAL)
 
 /* Bits to mask out from a PMD to get to the PTE page */
 #define PMD_MASKED_BITS		0x1ff
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index f69a4d977729..820b5f0a35ce 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -429,7 +429,8 @@ extern int icache_44x_need_flush;
 #define PMD_PAGE_SIZE(pmd)	bad_call_to_PMD_PAGE_SIZE()
 #endif
 
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
+			 _PAGE_SPECIAL)
 
 
 #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 5af4e9b2dbe2..ada06924a423 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -646,11 +646,16 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
 		       unsigned int areg, struct pt_regs *regs,
 		       unsigned int flags, unsigned int length)
 {
-	char *ptr = (char *) &current->thread.TS_FPR(reg);
+	char *ptr;
 	int ret = 0;
 
 	flush_vsx_to_thread(current);
 
+	if (reg < 32)
+		ptr = (char *) &current->thread.TS_FPR(reg);
+	else
+		ptr = (char *) &current->thread.vr[reg - 32];
+
 	if (flags & ST)
 		ret = __copy_to_user(addr, ptr, length);
         else {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2822c8ccfaaf..5f81256287f5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm)
 	}
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvmppc_free_vcpus(kvm);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 7393bd76d698..5ac08b8ab654 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -19,6 +19,7 @@
 #include <linux/notifier.h>
 #include <linux/lmb.h>
 #include <linux/of.h>
+#include <linux/pfn.h>
 #include <asm/sparsemem.h>
 #include <asm/prom.h>
 #include <asm/system.h>
@@ -882,7 +883,7 @@ static void mark_reserved_regions_for_nid(int nid)
 		unsigned long physbase = lmb.reserved.region[i].base;
 		unsigned long size = lmb.reserved.region[i].size;
 		unsigned long start_pfn = physbase >> PAGE_SHIFT;
-		unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT);
+		unsigned long end_pfn = PFN_UP(physbase + size);
 		struct node_active_region node_ar;
 		unsigned long node_end_pfn = node->node_start_pfn +
 					     node->node_spanned_pages;
@@ -908,7 +909,7 @@ static void mark_reserved_regions_for_nid(int nid)
 			 */
 			if (end_pfn > node_ar.end_pfn)
 				reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
-					- (start_pfn << PAGE_SHIFT);
+					- physbase;
 			/*
 			 * Only worry about *this* node, others may not
 			 * yet have valid NODE_DATA().
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 67de6bf3db3d..d281cc0bca71 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -328,7 +328,7 @@ static int __init ps3_mm_add_memory(void)
 	return result;
 }
 
-core_initcall(ps3_mm_add_memory);
+device_initcall(ps3_mm_add_memory);
 
 /*============================================================================*/
 /* dma routines                                                               */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index be8497186b96..0d33893e1e89 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
 	}
 }
 
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_free_vcpus(kvm);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9c39095b33fc..8fc9a847cf48 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..e1983fa025d2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,28 +174,8 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
-config MMIOTRACE
-	bool "Memory mapped IO tracing"
-	depends on DEBUG_KERNEL && PCI
-	select TRACING
-	help
-	  Mmiotrace traces Memory Mapped I/O access and is meant for
-	  debugging and reverse engineering. It is called from the ioremap
-	  implementation and works via page faults. Tracing is disabled by
-	  default and can be enabled at run-time.
-
-	  See Documentation/tracers/mmiotrace.txt.
-	  If you are not helping to develop drivers, say N.
-
-config MMIOTRACE_TEST
-	tristate "Test module for mmiotrace"
-	depends on MMIOTRACE && m
-	help
-	  This is a dumb module for testing mmiotrace. It is very dangerous
-	  as it will write garbage to IO memory starting at a given address.
-	  However, it should be safe to use on e.g. unused portion of VRAM.
-
-	  Say N, unless you absolutely know what you are doing.
+config HAVE_MMIOTRACE_SUPPORT
+	def_bool y
 
 #
 # IO delay types:
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index d2e3bf3608af..886c9402ec45 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,13 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_PIT
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+#define __KVM_HAVE_MSI
+#define __KVM_HAVE_USER_NMI
+
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 07f1af494ca5..105fb90a0635 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void)
 	get_memcfg_numa_flat();
 }
 
-extern int early_pfn_to_nid(unsigned long pfn);
-
 extern void resume_map_numa_kva(pgd_t *pgd);
 
 #else /* !CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a5b3817d4b9e..a29f48c2a322 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn +	\
 				 NODE_DATA(nid)->node_spanned_pages)
 
-extern int early_pfn_to_nid(unsigned long pfn);
-
 #ifdef CONFIG_NUMA_EMU
 #define FAKE_NODE_MIN_SIZE	(64 * 1024 * 1024)
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e8695..776579119a00 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
 typedef struct { pgprotval_t pgprot; } pgprot_t;
 
 extern int page_is_ram(unsigned long pagenr);
-extern int pagerange_is_ram(unsigned long start, unsigned long end);
 extern int devmem_is_allowed(unsigned long pagenr);
 extern void map_devmem(unsigned long pfn, unsigned long size,
 		       pgprot_t vma_prot);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c09a14127584..e299287e8e33 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
 	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
 }
 
-static inline void arch_flush_lazy_cpu_mode(void)
-{
-	if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
-		arch_leave_lazy_cpu_mode();
-		arch_enter_lazy_cpu_mode();
-	}
-}
-
+void arch_flush_lazy_cpu_mode(void);
 
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
@@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
 	PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
 }
 
-static inline void arch_flush_lazy_mmu_mode(void)
-{
-	if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
-		arch_leave_lazy_mmu_mode();
-		arch_enter_lazy_mmu_mode();
-	}
-}
+void arch_flush_lazy_mmu_mode(void);
 
 static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 				unsigned long phys, pgprot_t flags)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c3..c5d737cdb365 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
 
 #include <linux/acpi.h>
 #include <acpi/processor.h>
@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
 
 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
 
+DEFINE_TRACE(power_mark);
+
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance *acpi_perf_data;
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d8..6428aa17b40e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	data->cpu = pol->cpu;
 	data->currpstate = HW_PSTATE_INVALID;
 
-	rc = powernow_k8_cpu_init_acpi(data);
-	if (rc) {
+	if (powernow_k8_cpu_init_acpi(data)) {
 		/*
 		 * Use the PSB BIOS structure. This is only availabe on
 		 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 			       "ACPI maintainers and complain to your BIOS "
 			       "vendor.\n");
 #endif
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		if (pol->cpu != 0) {
 			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
 			       "CPU other than CPU0. Complain to your BIOS "
 			       "vendor.\n");
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		rc = find_psb_table(data);
 		if (rc) {
-			goto err_out;
+			kfree(data);
+			return -ENODEV;
 		}
 		/* Take a crude guess here.
 		 * That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c0852291b623..4325165753d8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,6 +15,7 @@
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
+#include <linux/ftrace.h>
 
 #include <asm/stacktrace.h>
 
@@ -196,6 +197,11 @@ unsigned __kprobes long oops_begin(void)
 	int cpu;
 	unsigned long flags;
 
+	/* notify the hw-branch tracer so it may disable tracing and
+	   add the last trace to the trace buffer -
+	   the earlier this happens, the more useful the trace. */
+	trace_hw_branch_oops();
+
 	oops_enter();
 
 	/* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 76f7141e0f91..c2e057d9f88c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -82,7 +82,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  * are the same as what exists.
  */
 
-static atomic_t in_nmi = ATOMIC_INIT(0);
+static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
 static int mod_code_write;		/* set when NMI should do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
@@ -115,8 +115,8 @@ static void ftrace_mod_code(void)
 
 void ftrace_nmi_enter(void)
 {
-	atomic_inc(&in_nmi);
-	/* Must have in_nmi seen before reading write flag */
+	atomic_inc(&nmi_running);
+	/* Must have nmi_running seen before reading write flag */
 	smp_mb();
 	if (mod_code_write) {
 		ftrace_mod_code();
@@ -126,22 +126,21 @@ void ftrace_nmi_enter(void)
 
 void ftrace_nmi_exit(void)
 {
-	/* Finish all executions before clearing in_nmi */
+	/* Finish all executions before clearing nmi_running */
 	smp_wmb();
-	atomic_dec(&in_nmi);
+	atomic_dec(&nmi_running);
 }
 
 static void wait_for_nmi(void)
 {
-	int waited = 0;
+	if (!atomic_read(&nmi_running))
+		return;
 
-	while (atomic_read(&in_nmi)) {
-		waited = 1;
+	do {
 		cpu_relax();
-	}
+	} while (atomic_read(&nmi_running));
 
-	if (waited)
-		nmi_wait_count++;
+	nmi_wait_count++;
 }
 
 static int
@@ -368,25 +367,6 @@ int ftrace_disable_ftrace_graph_caller(void)
 	return ftrace_mod_jmp(ip, old_offset, new_offset);
 }
 
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
-	atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
-	atomic_dec(&in_nmi);
-}
-
 #endif /* !CONFIG_DYNAMIC_FTRACE */
 
 /*
@@ -403,7 +383,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 				&return_to_handler;
 
 	/* Nmi's are currently unsupported */
-	if (unlikely(atomic_read(&in_nmi)))
+	if (unlikely(in_nmi()))
 		return;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -439,13 +419,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	if (unlikely(!__kernel_text_address(old))) {
-		ftrace_graph_stop();
-		*parent = old;
-		WARN_ON(1);
-		return;
-	}
-
 	calltime = cpu_clock(raw_smp_processor_id());
 
 	if (ftrace_push_return_trace(old, calltime,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 388254f69a2a..a00545fe5cdd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
 		now = hpet_readl(HPET_COUNTER);
 		cmp = now + (unsigned long) delta;
 		cfg = hpet_readl(HPET_Tn_CFG(timer));
+		/* Make sure we use edge triggered interrupts */
+		cfg &= ~HPET_TN_LEVEL;
 		cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
 		       HPET_TN_SETVAL | HPET_TN_32BIT;
 		hpet_writel(cfg, HPET_Tn_CFG(timer));
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1f..4006c522adc7 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
 static void __init platform_detect(void)
 {
 	/* stopgap until OFW support is added to the kernel */
-	olpc_platform_info.boardrev = 0xc2;
+	olpc_platform_info.boardrev = olpc_board(0xc2);
 }
 #endif
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..c6520a4e85d4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	return __get_cpu_var(paravirt_lazy_mode);
 }
 
+void arch_flush_lazy_mmu_mode(void)
+{
+	preempt_disable();
+
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+		WARN_ON(preempt_count() == 1);
+		arch_leave_lazy_mmu_mode();
+		arch_enter_lazy_mmu_mode();
+	}
+
+	preempt_enable();
+}
+
+void arch_flush_lazy_cpu_mode(void)
+{
+	preempt_disable();
+
+	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+		WARN_ON(preempt_count() == 1);
+		arch_leave_lazy_cpu_mode();
+		arch_enter_lazy_cpu_mode();
+	}
+
+	preempt_enable();
+}
+
 struct pv_info pv_info = {
 	.name = "bare hardware",
 	.paravirt_enabled = 0,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d12f7e37f8c..23b328edc2b3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 
@@ -19,6 +19,9 @@ EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
+DEFINE_TRACE(power_start);
+DEFINE_TRACE(power_end);
+
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..5a4c23d89892 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
 
 static void ptrace_bts_detach(struct task_struct *child)
 {
-	if (unlikely(child->bts)) {
-		ds_release_bts(child->bts);
-		child->bts = NULL;
-
-		ptrace_bts_free_buffer(child);
-	}
+	/*
+	 * Ptrace_detach() races with ptrace_untrace() in case
+	 * the child dies and is reaped by another thread.
+	 *
+	 * We only do the memory accounting at this point and
+	 * leave the buffer deallocation and the bts tracer
+	 * release to ptrace_bts_untrace() which will be called
+	 * later on with tasklist_lock held.
+	 */
+	release_locked_buffer(child->bts_buffer, child->bts_size);
 }
 #else
 static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb3..a9e7548e1790 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
 		local_irq_enable();
 }
 
+static inline void conditional_cli(struct pt_regs *regs)
+{
+	if (regs->flags & X86_EFLAGS_IF)
+		local_irq_disable();
+}
+
 static inline void preempt_conditional_cli(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
 
 #ifdef CONFIG_X86_32
 debug_vm86:
+	/* reenable preemption: handle_vm86_trap() might sleep */
+	dec_preempt_count();
 	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
-	preempt_conditional_cli(regs);
+	conditional_cli(regs);
 	return;
 #endif
 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bdee..c7da3683f4c5 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
 
 config KVM_TRACE
 	bool "KVM trace support"
-	depends on KVM && MARKERS && SYSFS
+	depends on KVM && SYSFS
+	select MARKERS
 	select RELAY
 	select DEBUG_FS
 	default n
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623ca..72bd275a9b5c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 	hrtimer_add_expires_ns(&pt->timer, pt->period);
 	pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
 	if (pt->period)
-		ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
+		ps->channels[0].count_load_time = ktime_get();
 
 	return (pt->period == 0 ? 0 : 1);
 }
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8edcdb7..cf17ed52f6fb 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-	kvm_apic_timer_intr_post(vcpu, vec);
-	/* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
-
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	__kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a03ceec..82579ee538d0 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815c..f0b67f2cdd69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,12 @@
 #include "kvm_cache_regs.h"
 #include "irq.h"
 
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
 #define PRId64 "d"
 #define PRIx64 "llx"
 #define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
-	u64 counter_passed;
-	ktime_t passed, now;
+	ktime_t remaining;
+	s64 ns;
 	u32 tmcct;
 
 	ASSERT(apic != NULL);
 
-	now = apic->timer.dev.base->get_time();
-	tmcct = apic_get_reg(apic, APIC_TMICT);
-
 	/* if initial count is 0, current count should also be 0 */
-	if (tmcct == 0)
+	if (apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
-	if (unlikely(ktime_to_ns(now) <=
-		ktime_to_ns(apic->timer.last_update))) {
-		/* Wrap around */
-		passed = ktime_add(( {
-				    (ktime_t) {
-				    .tv64 = KTIME_MAX -
-				    (apic->timer.last_update).tv64}; }
-				   ), now);
-		apic_debug("time elapsed\n");
-	} else
-		passed = ktime_sub(now, apic->timer.last_update);
-
-	counter_passed = div64_u64(ktime_to_ns(passed),
-				   (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
-
-	if (counter_passed > tmcct) {
-		if (unlikely(!apic_lvtt_period(apic))) {
-			/* one-shot timers stick at 0 until reset */
-			tmcct = 0;
-		} else {
-			/*
-			 * periodic timers reset to APIC_TMICT when they
-			 * hit 0. The while loop simulates this happening N
-			 * times. (counter_passed %= tmcct) would also work,
-			 * but might be slower or not work on 32-bit??
-			 */
-			while (counter_passed > tmcct)
-				counter_passed -= tmcct;
-			tmcct -= counter_passed;
-		}
-	} else {
-		tmcct -= counter_passed;
-	}
+	remaining = hrtimer_expires_remaining(&apic->timer.dev);
+	if (ktime_to_ns(remaining) < 0)
+		remaining = ktime_set(0, 0);
+
+	ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
+	tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
 
 	return tmcct;
 }
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now = apic->timer.dev.base->get_time();
 
-	apic->timer.last_update = now;
-
 	apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
 		    APIC_BUS_CYCLE_NS * apic->timer.divide_count;
 	atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 	}
 }
 
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
-		apic->timer.last_update = ktime_add_ns(
-				apic->timer.last_update,
-				apic->timer.period);
-}
-
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 81858881287e..45ab6ee71209 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@ struct kvm_lapic {
 		atomic_t pending;
 		s64 period;	/* unit: ns */
 		u32 divide_count;
-		ktime_t last_update;
 		struct hrtimer dev;
 	} timer;
 	struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a1..2d4477c71473 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
 	if (mt_mask) {
-		mt_mask = get_memory_type(vcpu, gfn) <<
-			  kvm_x86_ops->get_mt_mask_shift();
+		if (!kvm_is_mmio_pfn(pfn)) {
+			mt_mask = get_memory_type(vcpu, gfn) <<
+				kvm_x86_ops->get_mt_mask_shift();
+			mt_mask |= VMX_EPT_IGMT_BIT;
+		} else
+			mt_mask = MTRR_TYPE_UNCACHABLE <<
+				kvm_x86_ops->get_mt_mask_shift();
 		spte |= mt_mask;
 	}
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae258..a9e769e4e251 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
 	intr_vector = kvm_cpu_get_interrupt(vcpu);
 	svm_inject_irq(svm, intr_vector);
-	kvm_timer_intr_post(vcpu, intr_vector);
 out:
 	update_cr8_intercept(vcpu);
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d7467648..7611af576829 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		data = vmcs_readl(GUEST_SYSENTER_ESP);
 		break;
 	default:
+		vmx_load_host_state(to_vmx(vcpu));
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
 		if (msr) {
 			data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 	}
 	if (vcpu->arch.interrupt.pending) {
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
 		if (kvm_cpu_has_interrupt(vcpu))
 			enable_irq_window(vcpu);
 	}
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
 	if (vm_need_ept()) {
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
-			VMX_EPT_WRITABLE_MASK |
-			VMX_EPT_IGMT_BIT);
+			VMX_EPT_WRITABLE_MASK);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
 				VMX_EPT_EXECUTABLE_MASK,
 				VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a2406..758b7a155ae9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
-	case KVM_CAP_CLOCKSOURCE:
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
 	case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOMMU:
 		r = iommu_found();
 		break;
+	case KVM_CAP_CLOCKSOURCE:
+		r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
+		break;
 	default:
 		r = 0;
 		break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 }
 
-void kvm_arch_destroy_vm(struct kvm *kvm)
+void kvm_arch_sync_events(struct kvm *kvm)
 {
 	kvm_free_all_assigned_devices(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
 	kvm_iommu_unmap_guest(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b6..f45d5e29a72e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
 	return 0;
 }
 
-int pagerange_is_ram(unsigned long start, unsigned long end)
-{
-	int ram_page = 0, not_rampage = 0;
-	unsigned long page_nr;
-
-	for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
-	     ++page_nr) {
-		if (page_is_ram(page_nr))
-			ram_page = 1;
-		else
-			not_rampage = 1;
-
-		if (ram_page == not_rampage)
-			return -1;
-	}
-
-	return ram_page;
-}
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..f3516da035d1 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
 	return shift;
 }
 
-int early_pfn_to_nid(unsigned long pfn)
+int __meminit  __early_pfn_to_nid(unsigned long pfn)
 {
 	return phys_to_nid(pfn << PAGE_SHIFT);
 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad6..8ca0d8566fc8 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
 		address = cpa->vaddr[cpa->curpage];
 	else
 		address = *cpa->vaddr;
-
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
@@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	vm_unmap_aliases();
 
+	/*
+	 * If we're called with lazy mmu updates enabled, the
+	 * in-memory pte state may be stale.  Flush pending updates to
+	 * bring them up to date.
+	 */
+	arch_flush_lazy_mmu_mode();
+
 	cpa.vaddr = addr;
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
@@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	} else
 		cpa_flush_all(cache);
 
+	/*
+	 * If we've been called with lazy mmu updates enabled, then
+	 * make sure that everything gets flushed out before we
+	 * return.
+	 */
+	arch_flush_lazy_mmu_mode();
+
 out:
 	return ret;
 }
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427df..aebbf67a79d0 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
 static struct memtype *cached_entry;
 static u64 cached_start;
 
+static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
+{
+	int ram_page = 0, not_rampage = 0;
+	unsigned long page_nr;
+
+	for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+	     ++page_nr) {
+		/*
+		 * For legacy reasons, physical address range in the legacy ISA
+		 * region is tracked as non-RAM. This will allow users of
+		 * /dev/mem to map portions of legacy ISA region, even when
+		 * some of those portions are listed(or not even listed) with
+		 * different e820 types(RAM/reserved/..)
+		 */
+		if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
+		    page_is_ram(page_nr))
+			ram_page = 1;
+		else
+			not_rampage = 1;
+
+		if (ram_page == not_rampage)
+			return -1;
+	}
+
+	return ram_page;
+}
+
 /*
  * For RAM pages, mark the pages as non WB memory type using
  * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	if (new_type)
 		*new_type = actual_type;
 
-	/*
-	 * For legacy reasons, some parts of the physical address range in the
-	 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
-	 * the e820 tables).  So we will track the memory attributes of this
-	 * legacy 1MB region using the linear memtype_list always.
-	 */
-	if (end >= ISA_END_ADDRESS) {
-		is_range_ram = pagerange_is_ram(start, end);
-		if (is_range_ram == 1)
-			return reserve_ram_pages_type(start, end, req_type,
-						      new_type);
-		else if (is_range_ram < 0)
-			return -EINVAL;
-	}
+	is_range_ram = pat_pagerange_is_ram(start, end);
+	if (is_range_ram == 1)
+		return reserve_ram_pages_type(start, end, req_type,
+					      new_type);
+	else if (is_range_ram < 0)
+		return -EINVAL;
 
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
@@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end)
 	if (is_ISA_range(start, end - 1))
 		return 0;
 
-	/*
-	 * For legacy reasons, some parts of the physical address range in the
-	 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
-	 * the e820 tables).  So we will track the memory attributes of this
-	 * legacy 1MB region using the linear memtype_list always.
-	 */
-	if (end >= ISA_END_ADDRESS) {
-		is_range_ram = pagerange_is_ram(start, end);
-		if (is_range_ram == 1)
-			return free_ram_pages_type(start, end);
-		else if (is_range_ram < 0)
-			return -EINVAL;
-	}
+	is_range_ram = pat_pagerange_is_ram(start, end);
+	if (is_range_ram == 1)
+		return free_ram_pages_type(start, end);
+	else if (is_range_ram < 0)
+		return -EINVAL;
 
 	spin_lock(&memtype_lock);
 	list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
 	unsigned long flags;
 	unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
 
-	is_ram = pagerange_is_ram(paddr, paddr + size);
+	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
 
-	if (is_ram != 0) {
-		/*
-		 * For mapping RAM pages, drivers need to call
-		 * set_memory_[uc|wc|wb] directly, for reserve and free, before
-		 * setting up the PTE.
-		 */
-		WARN_ON_ONCE(1);
-		return 0;
-	}
+	/*
+	 * reserve_pfn_range() doesn't support RAM pages.
+	 */
+	if (is_ram != 0)
+		return -EINVAL;
 
 	ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
 	if (ret)
@@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
 {
 	int is_ram;
 
-	is_ram = pagerange_is_ram(paddr, paddr + size);
+	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
 	if (is_ram == 0)
 		free_memtype(paddr, paddr + size);
 }