From c513867561eeb07d24a0bdda1a18a8f91921a301 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 14 Oct 2008 18:08:48 -0400
Subject: ftrace: do not enclose logic in WARN_ON

In ftrace, logic is defined in the WARN_ON_ONCE, which can become a
nop with some configs. This patch fixes it.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d073d981a730..8821ceabf51d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -62,6 +62,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
 	unsigned char replaced[MCOUNT_INSN_SIZE];
+	int ret;
 
 	/*
 	 * Note: Due to modules and __init, code can
@@ -77,8 +78,9 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 		return 2;
 
-	WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
-				    MCOUNT_INSN_SIZE));
+	ret = __copy_to_user_inatomic((char __user *)ip, new_code,
+					MCOUNT_INSN_SIZE);
+	WARN_ON_ONCE(ret);
 
 	sync_core();
 
-- 
cgit v1.2.3


From 606576ce816603d9fe1fb453a88bc6eea16ca709 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Mon, 6 Oct 2008 19:06:12 -0400
Subject: ftrace: rename FTRACE to FUNCTION_TRACER

Due to confusion between the ftrace infrastructure and the gcc profiling
tracer "ftrace", this patch renames the config options from FTRACE to
FUNCTION_TRACER.  The other two names that are offspring from FTRACE
DYNAMIC_FTRACE and FTRACE_MCOUNT_RECORD will stay the same.

This patch was generated mostly by script, and partially by hand.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Makefile                                 |  2 +-
 arch/arm/Kconfig                         |  4 ++--
 arch/arm/boot/compressed/Makefile        |  2 +-
 arch/arm/include/asm/ftrace.h            |  2 +-
 arch/arm/kernel/armksyms.c               |  2 +-
 arch/arm/kernel/entry-common.S           |  4 ++--
 arch/powerpc/Kconfig                     |  2 +-
 arch/powerpc/Makefile                    |  2 +-
 arch/powerpc/include/asm/ftrace.h        |  2 +-
 arch/powerpc/kernel/Makefile             |  2 +-
 arch/powerpc/kernel/entry_32.S           |  2 +-
 arch/powerpc/kernel/entry_64.S           |  2 +-
 arch/powerpc/kernel/ppc_ksyms.c          |  2 +-
 arch/powerpc/platforms/powermac/Makefile |  2 +-
 arch/sparc64/Kconfig                     |  2 +-
 arch/sparc64/Kconfig.debug               |  2 +-
 arch/sparc64/lib/mcount.S                |  4 ++--
 arch/x86/Kconfig                         |  2 +-
 arch/x86/kernel/Makefile                 |  2 +-
 arch/x86/kernel/entry_32.S               |  4 ++--
 arch/x86/kernel/entry_64.S               |  4 ++--
 arch/x86/kernel/i386_ksyms_32.c          |  2 +-
 arch/x86/kernel/x8664_ksyms_64.c         |  2 +-
 arch/x86/xen/Makefile                    |  2 +-
 include/asm-x86/ftrace.h                 |  4 ++--
 include/linux/ftrace.h                   | 12 ++++++------
 kernel/Makefile                          |  4 ++--
 kernel/sysctl.c                          |  2 +-
 kernel/trace/Kconfig                     | 17 +++++++++--------
 kernel/trace/Makefile                    |  6 +++---
 kernel/trace/trace.c                     |  2 +-
 kernel/trace/trace.h                     |  2 +-
 kernel/trace/trace_irqsoff.c             |  4 ++--
 kernel/trace/trace_sched_wakeup.c        |  4 ++--
 kernel/trace/trace_selftest.c            |  4 ++--
 lib/Makefile                             |  2 +-
 36 files changed, 61 insertions(+), 60 deletions(-)

(limited to 'arch/x86')

diff --git a/Makefile b/Makefile
index 16e3fbb968a8..b7eb70b13cad 100644
--- a/Makefile
+++ b/Makefile
@@ -536,7 +536,7 @@ KBUILD_CFLAGS	+= -g
 KBUILD_AFLAGS	+= -gdwarf-2
 endif
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 KBUILD_CFLAGS	+= -pg
 endif
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4853f9df37bd..c2f18ea40500 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -16,8 +16,8 @@ config ARM
 	select HAVE_ARCH_KGDB
 	select HAVE_KPROBES if (!XIP_KERNEL)
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
-	select HAVE_FTRACE if (!XIP_KERNEL)
-	select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE)
+	select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+	select HAVE_DYNAMIC_FTRACE if (HAVE_FUNCTION_TRACER)
 	select HAVE_GENERIC_DMA_COHERENT
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 7a03f2007882..c47f2a3f8f8f 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -70,7 +70,7 @@ SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
 targets       := vmlinux vmlinux.lds piggy.gz piggy.o font.o font.c \
 		 head.o misc.o $(OBJS)
 
-ifeq ($(CONFIG_FTRACE),y)
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 584ef9a8e5a5..39c8bc1a006a 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_ARM_FTRACE
 #define _ASM_ARM_FTRACE
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
 
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 2357b1cf1cf9..c74f766ffc12 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -183,6 +183,6 @@ EXPORT_SYMBOL(_find_next_bit_be);
 
 EXPORT_SYMBOL(copy_page);
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(mcount);
 #endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 3aa14dcc5bab..06269ea375c5 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -101,7 +101,7 @@ ENDPROC(ret_from_fork)
 #undef CALL
 #define CALL(x) .long x
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
 	stmdb sp!, {r0-r3, lr}
@@ -149,7 +149,7 @@ trace:
 ftrace_stub:
 	mov pc, lr
 
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 /*=============================================================================
  * SWI handler
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 380baa1780e9..97d86702e2d5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -112,7 +112,7 @@ config PPC
 	bool
 	default y
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_FTRACE
+	select HAVE_FUNCTION_TRACER
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 24dd1a37f8fb..1f0667069940 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -122,7 +122,7 @@ KBUILD_CFLAGS		+= -mcpu=powerpc
 endif
 
 # Work around a gcc code-gen bug with -fno-omit-frame-pointer.
-ifeq ($(CONFIG_FTRACE),y)
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
 KBUILD_CFLAGS		+= -mno-sched-epilog
 endif
 
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index de921326cca8..b298f7a631e6 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_POWERPC_FTRACE
 #define _ASM_POWERPC_FTRACE
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(_mcount))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
 
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fdb58253fa5b..92673b43858d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,7 +12,7 @@ CFLAGS_prom_init.o      += -fPIC
 CFLAGS_btext.o		+= -fPIC
 endif
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
 CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
 CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1cbbf7033641..7ecc0d1855c3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1158,7 +1158,7 @@ machine_check_in_rtas:
 
 #endif /* CONFIG_PPC_RTAS */
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index fd8b4bae9b04..e6d52845854f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -884,7 +884,7 @@ _GLOBAL(enter_prom)
 	mtlr    r0
         blr
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 8edc2359c419..260089dccfb0 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(single_step_exception);
 EXPORT_SYMBOL(sys_sigreturn);
 #endif
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
 #endif
 
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index be60d64be7ad..50f169392551 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,6 +1,6 @@
 CFLAGS_bootx_init.o  		+= -fPIC
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
 CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog
 endif
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 5446e2a499b1..d269400d2868 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -12,7 +12,7 @@ config SPARC64
 	bool
 	default y
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_FTRACE
+	select HAVE_FUNCTION_TRACER
 	select HAVE_IDE
 	select HAVE_LMB
 	select HAVE_ARCH_KGDB
diff --git a/arch/sparc64/Kconfig.debug b/arch/sparc64/Kconfig.debug
index d6d32d178fc8..c40515c06690 100644
--- a/arch/sparc64/Kconfig.debug
+++ b/arch/sparc64/Kconfig.debug
@@ -33,7 +33,7 @@ config DEBUG_PAGEALLOC
 
 config MCOUNT
 	bool
-	depends on STACK_DEBUG || FTRACE
+	depends on STACK_DEBUG || FUNCTION_TRACER
 	default y
 
 config FRAME_POINTER
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index fad90ddb3a28..7ce9c65f3592 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -93,7 +93,7 @@ mcount:
 	 nop
 1:
 #endif
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 	mov		%o7, %o0
 	.globl		mcount_call
@@ -119,7 +119,7 @@ mcount_call:
 	.size		_mcount,.-_mcount
 	.size		mcount,.-mcount
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 	.globl		ftrace_stub
 	.type		ftrace_stub,#function
 ftrace_stub:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 40ee80809562..290e21aa774d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,7 +28,7 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_FTRACE
+	select HAVE_FUNCTION_TRACER
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..ec3d30136bf0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y                := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4e4269c73bb7..9d49facc21f2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1149,7 +1149,7 @@ ENDPROC(xen_failsafe_callback)
 
 #endif	/* CONFIG_XEN */
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(mcount)
@@ -1204,7 +1204,7 @@ trace:
 	jmp ftrace_stub
 END(mcount)
 #endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 .section .rodata,"a"
 #include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 09e7145484c5..b86f332c96a6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -61,7 +61,7 @@
 
 	.code64
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
 	retq
@@ -138,7 +138,7 @@ trace:
 	jmp ftrace_stub
 END(mcount)
 #endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index dd7ebee446af..43cec6bdda63 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -5,7 +5,7 @@
 #include <asm/desc.h>
 #include <asm/ftrace.h>
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 /* mcount is defined in assembly */
 EXPORT_SYMBOL(mcount);
 #endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index b545f371b5f5..695e426aa354 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -12,7 +12,7 @@
 #include <asm/desc.h>
 #include <asm/ftrace.h>
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 /* mcount is defined in assembly */
 EXPORT_SYMBOL(mcount);
 #endif
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 313947940a1a..6dcefba7836f 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
 CFLAGS_REMOVE_spinlock.o = -pg
 CFLAGS_REMOVE_time.o = -pg
diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h
index 1bb6f9bbe1ab..233bb9b869c0 100644
--- a/include/asm-x86/ftrace.h
+++ b/include/asm-x86/ftrace.h
@@ -1,7 +1,7 @@
 #ifndef ASM_X86__FTRACE_H
 #define ASM_X86__FTRACE_H
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
@@ -19,6 +19,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 }
 #endif
 
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #endif /* ASM_X86__FTRACE_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a3d46151be19..0e9529589151 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 #include <linux/kallsyms.h>
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 
 extern int ftrace_enabled;
 extern int
@@ -36,12 +36,12 @@ void clear_ftrace_function(void);
 
 extern void ftrace_stub(unsigned long a0, unsigned long a1);
 
-#else /* !CONFIG_FTRACE */
+#else /* !CONFIG_FUNCTION_TRACER */
 # define register_ftrace_function(ops) do { } while (0)
 # define unregister_ftrace_function(ops) do { } while (0)
 # define clear_ftrace_function(ops) do { } while (0)
 static inline void ftrace_kill_atomic(void) { }
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 # define FTRACE_HASHBITS	10
@@ -101,7 +101,7 @@ void ftrace_kill_atomic(void);
 
 static inline void tracer_disable(void)
 {
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 	ftrace_enabled = 0;
 #endif
 }
@@ -113,7 +113,7 @@ static inline void tracer_disable(void)
  */
 static inline int __ftrace_enabled_save(void)
 {
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 	int saved_ftrace_enabled = ftrace_enabled;
 	ftrace_enabled = 0;
 	return saved_ftrace_enabled;
@@ -124,7 +124,7 @@ static inline int __ftrace_enabled_save(void)
 
 static inline void __ftrace_enabled_restore(int enabled)
 {
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 	ftrace_enabled = enabled;
 #endif
 }
diff --git a/kernel/Makefile b/kernel/Makefile
index 8f9ce7ec21b6..85f588a9d0b1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 
 CFLAGS_REMOVE_sched.o = -mno-spe
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
 CFLAGS_REMOVE_lockdep.o = -pg
 CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -86,7 +86,7 @@ obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
-obj-$(CONFIG_FTRACE) += trace/
+obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 617d41e4d6a0..619eb9f3acd8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -464,7 +464,7 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "ftrace_enabled",
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5866edbc2ed1..3533c583df47 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,11 +1,12 @@
 #
-# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
+# Architectures that offer an FUNCTION_TRACER implementation should
+#  select HAVE_FUNCTION_TRACER:
 #
 
 config NOP_TRACER
 	bool
 
-config HAVE_FTRACE
+config HAVE_FUNCTION_TRACER
 	bool
 	select NOP_TRACER
 
@@ -28,9 +29,9 @@ config TRACING
 	select STACKTRACE
 	select TRACEPOINTS
 
-config FTRACE
+config FUNCTION_TRACER
 	bool "Kernel Function Tracer"
-	depends on HAVE_FTRACE
+	depends on HAVE_FUNCTION_TRACER
 	depends on DEBUG_KERNEL
 	select FRAME_POINTER
 	select TRACING
@@ -136,9 +137,9 @@ config BOOT_TRACER
 
 config STACK_TRACER
 	bool "Trace max stack"
-	depends on HAVE_FTRACE
+	depends on HAVE_FUNCTION_TRACER
 	depends on DEBUG_KERNEL
-	select FTRACE
+	select FUNCTION_TRACER
 	select STACKTRACE
 	help
 	  This special tracer records the maximum stack footprint of the
@@ -155,7 +156,7 @@ config STACK_TRACER
 
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
-	depends on FTRACE
+	depends on FUNCTION_TRACER
 	depends on HAVE_DYNAMIC_FTRACE
 	depends on DEBUG_KERNEL
 	default y
@@ -165,7 +166,7 @@ config DYNAMIC_FTRACE
 	 with a No-Op instruction) as they are called. A table is
 	 created to dynamically enable them again.
 
-	 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
+	 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
 	 has native performance as long as no tracing is active.
 
 	 The changes to the code are done by a kernel thread that
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index a85dfba88ba0..c8228b1a49e9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
 
 # Do not instrument the tracer itself:
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 
@@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg
 obj-y += trace_selftest_dynamic.o
 endif
 
-obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
-obj-$(CONFIG_FTRACE) += trace_functions.o
+obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d345d649d073..aeb2f2505bc5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -851,7 +851,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
 	preempt_enable_notrace();
 }
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 static void
 function_trace_call(unsigned long ip, unsigned long parent_ip)
 {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f1f99572cde7..6889ca48f1f1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -335,7 +335,7 @@ void update_max_tr_single(struct trace_array *tr,
 
 extern cycle_t ftrace_now(int cpu);
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 void tracing_start_function_trace(void);
 void tracing_stop_function_trace(void);
 #else
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a7db7f040ae0..9c74071c10e0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
  */
 static __cacheline_aligned_in_smp	unsigned long max_sequence;
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 /*
  * irqsoff uses its own tracer function to keep the overhead down:
  */
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = irqsoff_tracer_call,
 };
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 /*
  * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index fe4a252c2363..3ae93f16b565 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
 
 static void __wakeup_reset(struct trace_array *tr);
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 /*
  * irqsoff uses its own tracer function to keep the overhead down:
  */
@@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
 {
 	.func = wakeup_tracer_call,
 };
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 /*
  * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 09cf230d7eca..95815d26a041 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 	return ret;
 }
 
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
@@ -226,7 +226,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 
 	return ret;
 }
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_IRQSOFF_TRACER
 int
diff --git a/lib/Makefile b/lib/Makefile
index 16feaab057b2..7cb65d85aeb0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -2,7 +2,7 @@
 # Makefile for some libs needed in the kernel.
 #
 
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
-- 
cgit v1.2.3


From 593eb8a2d63e95772a5f22d746f18a997c5ee463 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 23 Oct 2008 09:32:59 -0400
Subject: ftrace: return error on failed modified text.

Have the ftrace_modify_code return error values:

  -EFAULT on error of reading the address

  -EINVAL if what is read does not match what it expected

  -EPERM  if the write fails to update after a successful match.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 14 +++++++-------
 include/linux/ftrace.h   | 24 ++++++++++++++++++++++--
 kernel/trace/ftrace.c    | 21 +++++++++++++++------
 3 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8821ceabf51d..428291581cb2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -62,7 +62,6 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
 	unsigned char replaced[MCOUNT_INSN_SIZE];
-	int ret;
 
 	/*
 	 * Note: Due to modules and __init, code can
@@ -72,15 +71,16 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 * No real locking needed, this code is run through
 	 * kstop_machine, or before SMP starts.
 	 */
-	if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
-		return 1;
+	if (__copy_from_user_inatomic(replaced, (char __user *)ip,
+				      MCOUNT_INSN_SIZE))
+		return -EFAULT;
 
 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
-		return 2;
+		return -EINVAL;
 
-	ret = __copy_to_user_inatomic((char __user *)ip, new_code,
-					MCOUNT_INSN_SIZE);
-	WARN_ON_ONCE(ret);
+	if (__copy_to_user_inatomic((char __user *)ip, new_code,
+				    MCOUNT_INSN_SIZE))
+		return -EPERM;
 
 	sync_core();
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0e9529589151..79fa10cbdcfb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -72,13 +72,33 @@ extern unsigned char *ftrace_nop_replace(void);
 extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
 extern int ftrace_dyn_arch_init(void *data);
 extern int ftrace_mcount_set(unsigned long *data);
-extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
-			      unsigned char *new_code);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
 
+/**
+ * ftrace_modify_code - modify code segment
+ * @ip: the address of the code segment
+ * @old_code: the contents of what is expected to be there
+ * @new_code: the code to patch in
+ *
+ * This is a very sensitive operation and great care needs
+ * to be taken by the arch.  The operation should carefully
+ * read the location, check to see if what is read is indeed
+ * what we expect it to be, and then on success of the compare,
+ * it should write to the location.
+ *
+ * Return must be:
+ *  0 on success
+ *  -EFAULT on error reading the location
+ *  -EINVAL on a failed compare of the contents
+ *  -EPERM  on error writing to the location
+ * Any other value will be considered a failure.
+ */
+extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+			      unsigned char *new_code);
+
 extern int skip_trace(unsigned long ip);
 
 extern void ftrace_release(void *start, unsigned long size);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1f54a94189fe..b2de8de77356 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -596,22 +596,22 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 {
 	unsigned long ip;
 	unsigned char *nop, *call;
-	int failed;
+	int ret;
 
 	ip = rec->ip;
 
 	nop = ftrace_nop_replace();
 	call = ftrace_call_replace(ip, mcount_addr);
 
-	failed = ftrace_modify_code(ip, call, nop);
-	if (failed) {
-		switch (failed) {
-		case 1:
+	ret = ftrace_modify_code(ip, call, nop);
+	if (ret) {
+		switch (ret) {
+		case -EFAULT:
 			WARN_ON_ONCE(1);
 			pr_info("ftrace faulted on modifying ");
 			print_ip_sym(ip);
 			break;
-		case 2:
+		case -EINVAL:
 			WARN_ON_ONCE(1);
 			pr_info("ftrace failed to modify ");
 			print_ip_sym(ip);
@@ -620,6 +620,15 @@ ftrace_code_disable(struct dyn_ftrace *rec)
 			print_ip_ins(" replace: ", nop);
 			printk(KERN_CONT "\n");
 			break;
+		case -EPERM:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace faulted on writing ");
+			print_ip_sym(ip);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			pr_info("ftrace faulted on unknown error ");
+			print_ip_sym(ip);
 		}
 
 		rec->flags |= FTRACE_FL_FAILED;
-- 
cgit v1.2.3


From 76aefee57657428fb77cbd8624119c1a440bee44 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 23 Oct 2008 09:33:00 -0400
Subject: ftrace: comment arch ftrace code

Add comments to explain what is happening in the x86 arch ftrace code.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 428291581cb2..783455454d78 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -66,18 +66,23 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	/*
 	 * Note: Due to modules and __init, code can
 	 *  disappear and change, we need to protect against faulting
-	 *  as well as code changing.
+	 *  as well as code changing. We do this by using the
+	 *  __copy_*_user functions.
 	 *
 	 * No real locking needed, this code is run through
 	 * kstop_machine, or before SMP starts.
 	 */
+
+	/* read the text we want to modify */
 	if (__copy_from_user_inatomic(replaced, (char __user *)ip,
 				      MCOUNT_INSN_SIZE))
 		return -EFAULT;
 
+	/* Make sure it is what we expect it to be */
 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
 		return -EINVAL;
 
+	/* replace the text with the new text */
 	if (__copy_to_user_inatomic((char __user *)ip, new_code,
 				    MCOUNT_INSN_SIZE))
 		return -EPERM;
-- 
cgit v1.2.3


From ab9a0918cbf0fa8883301838df8dbc8fc085ff50 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 23 Oct 2008 09:33:01 -0400
Subject: ftrace: use probe_kernel

Andrew Morton suggested using the proper API for reading and writing
kernel areas that might fault.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 783455454d78..da4fb0deecf7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -67,15 +67,14 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	 * Note: Due to modules and __init, code can
 	 *  disappear and change, we need to protect against faulting
 	 *  as well as code changing. We do this by using the
-	 *  __copy_*_user functions.
+	 *  probe_kernel_* functions.
 	 *
 	 * No real locking needed, this code is run through
 	 * kstop_machine, or before SMP starts.
 	 */
 
 	/* read the text we want to modify */
-	if (__copy_from_user_inatomic(replaced, (char __user *)ip,
-				      MCOUNT_INSN_SIZE))
+	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
 		return -EFAULT;
 
 	/* Make sure it is what we expect it to be */
@@ -83,8 +82,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		return -EINVAL;
 
 	/* replace the text with the new text */
-	if (__copy_to_user_inatomic((char __user *)ip, new_code,
-				    MCOUNT_INSN_SIZE))
+	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
 		return -EPERM;
 
 	sync_core();
-- 
cgit v1.2.3


From 4d296c24326783bff1282ac72f310d8bac8df413 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 23 Oct 2008 09:33:06 -0400
Subject: ftrace: remove mcount set

The arch dependent function ftrace_mcount_set was only used by the daemon
start up code. This patch removes it.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/ftrace.c     | 13 -------------
 arch/powerpc/kernel/ftrace.c | 17 -----------------
 arch/sparc64/kernel/ftrace.c | 18 ------------------
 arch/x86/kernel/ftrace.c     |  7 -------
 include/linux/ftrace.h       |  1 -
 kernel/trace/ftrace.c        |  9 ---------
 6 files changed, 65 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 76d50e6091bc..6c90479e8974 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -95,19 +95,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ret;
 }
 
-int ftrace_mcount_set(unsigned long *data)
-{
-	unsigned long pc, old;
-	unsigned long *addr = data;
-	unsigned char *new;
-
-	pc = (unsigned long)&mcount_call;
-	memcpy(&old, &mcount_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(pc, *addr);
-	*addr = ftrace_modify_code(pc, (unsigned char *)&old, new);
-	return 0;
-}
-
 /* run from kstop_machine */
 int __init ftrace_dyn_arch_init(void *data)
 {
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 3855ceb937b0..6b75522e8b34 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -126,23 +126,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ret;
 }
 
-notrace int ftrace_mcount_set(unsigned long *data)
-{
-	unsigned long ip = (long)(&mcount_call);
-	unsigned long *addr = data;
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-	/*
-	 * Replace the mcount stub with a pointer to the
-	 * ip recorder function.
-	 */
-	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(ip, *addr);
-	*addr = ftrace_modify_code(ip, old, new);
-
-	return 0;
-}
-
 int __init ftrace_dyn_arch_init(void *data)
 {
 	/* This is running in kstop_machine */
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
index 4298d0aee713..447942041a7c 100644
--- a/arch/sparc64/kernel/ftrace.c
+++ b/arch/sparc64/kernel/ftrace.c
@@ -69,24 +69,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ftrace_modify_code(ip, old, new);
 }
 
-notrace int ftrace_mcount_set(unsigned long *data)
-{
-	unsigned long ip = (long)(&mcount_call);
-	unsigned long *addr = data;
-	unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-	/*
-	 * Replace the mcount stub with a pointer to the
-	 * ip recorder function.
-	 */
-	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-	new = ftrace_call_replace(ip, *addr);
-	*addr = ftrace_modify_code(ip, old, new);
-
-	return 0;
-}
-
-
 int __init ftrace_dyn_arch_init(void *data)
 {
 	ftrace_mcount_set(data);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index da4fb0deecf7..b399eed23538 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -103,13 +103,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 	return ret;
 }
 
-notrace int ftrace_mcount_set(unsigned long *data)
-{
-	/* mcount is initialized as a nop */
-	*data = 0;
-	return 0;
-}
-
 int __init ftrace_dyn_arch_init(void *data)
 {
 	extern const unsigned char ftrace_test_p6nop[];
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ac58e94668b7..1c4835f86911 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -71,7 +71,6 @@ extern int ftrace_ip_converted(unsigned long ip);
 extern unsigned char *ftrace_nop_replace(void);
 extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
 extern int ftrace_dyn_arch_init(void *data);
-extern int ftrace_mcount_set(unsigned long *data);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
 extern void ftrace_call(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e758cab0836f..226fd9132d53 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -620,7 +620,6 @@ static int ftrace_update_code(void *ignore);
 
 static int __ftrace_modify_code(void *data)
 {
-	unsigned long addr;
 	int *command = data;
 
 	if (*command & FTRACE_ENABLE_CALLS) {
@@ -639,14 +638,6 @@ static int __ftrace_modify_code(void *data)
 	if (*command & FTRACE_UPDATE_TRACE_FUNC)
 		ftrace_update_ftrace_func(ftrace_trace_function);
 
-	if (*command & FTRACE_ENABLE_MCOUNT) {
-		addr = (unsigned long)ftrace_record_ip;
-		ftrace_mcount_set(&addr);
-	} else if (*command & FTRACE_DISABLE_MCOUNT) {
-		addr = (unsigned long)ftrace_stub;
-		ftrace_mcount_set(&addr);
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 15adc048986f6b54b6044f2b6fc4b48f49413e2f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 23 Oct 2008 09:33:08 -0400
Subject: ftrace, powerpc, sparc64, x86: remove notrace from arch ftrace file

The entire file of ftrace.c in the arch code needs to be marked
as notrace. It is much cleaner to do this from the Makefile with
CFLAGS_REMOVE_ftrace.o.

[ powerpc already had this in its Makefile. ]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/ftrace.c | 10 +++++-----
 arch/sparc64/kernel/Makefile |  2 ++
 arch/sparc64/kernel/ftrace.c |  8 ++++----
 arch/x86/kernel/Makefile     |  1 +
 arch/x86/kernel/ftrace.c     | 10 +++++-----
 5 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 6b75522e8b34..f4b006ed0ab1 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -28,17 +28,17 @@ static unsigned int ftrace_nop = 0x60000000;
 #endif
 
 
-static unsigned int notrace ftrace_calc_offset(long ip, long addr)
+static unsigned int ftrace_calc_offset(long ip, long addr)
 {
 	return (int)(addr - ip);
 }
 
-notrace unsigned char *ftrace_nop_replace(void)
+unsigned char *ftrace_nop_replace(void)
 {
 	return (char *)&ftrace_nop;
 }
 
-notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
 	static unsigned int op;
 
@@ -68,7 +68,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 # define _ASM_PTR	" .long "
 #endif
 
-notrace int
+int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
@@ -113,7 +113,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	return faulted;
 }
 
-notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
 	unsigned char old[MCOUNT_INSN_SIZE], *new;
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index c0b8009ab196..b3e0b986bef8 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -5,6 +5,8 @@
 EXTRA_AFLAGS := -ansi
 EXTRA_CFLAGS := -Werror
 
+CFLAGS_REMOVE_ftrace.o = -pg
+
 extra-y		:= head.o init_task.o vmlinux.lds
 
 obj-y		:= process.o setup.o cpu.o idprom.o reboot.o \
diff --git a/arch/sparc64/kernel/ftrace.c b/arch/sparc64/kernel/ftrace.c
index 447942041a7c..d0218e73f982 100644
--- a/arch/sparc64/kernel/ftrace.c
+++ b/arch/sparc64/kernel/ftrace.c
@@ -9,12 +9,12 @@
 
 static const u32 ftrace_nop = 0x01000000;
 
-notrace unsigned char *ftrace_nop_replace(void)
+unsigned char *ftrace_nop_replace(void)
 {
 	return (char *)&ftrace_nop;
 }
 
-notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
 	static u32 call;
 	s32 off;
@@ -25,7 +25,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	return (unsigned char *) &call;
 }
 
-notrace int
+int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
@@ -59,7 +59,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	return faulted;
 }
 
-notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
 	unsigned char old[MCOUNT_INSN_SIZE], *new;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f1283fe60723..e489ff9cb3e2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -11,6 +11,7 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_tsc.o = -pg
 CFLAGS_REMOVE_rtc.o = -pg
 CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
 #
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b399eed23538..b1e5e2244eca 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -33,17 +33,17 @@ union ftrace_code_union {
 };
 
 
-static int notrace ftrace_calc_offset(long ip, long addr)
+static int ftrace_calc_offset(long ip, long addr)
 {
 	return (int)(addr - ip);
 }
 
-notrace unsigned char *ftrace_nop_replace(void)
+unsigned char *ftrace_nop_replace(void)
 {
 	return (char *)ftrace_nop;
 }
 
-notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
 	static union ftrace_code_union calc;
 
@@ -57,7 +57,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	return calc.code;
 }
 
-notrace int
+int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		   unsigned char *new_code)
 {
@@ -90,7 +90,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 	return 0;
 }
 
-notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
 	unsigned char old[MCOUNT_INSN_SIZE], *new;
-- 
cgit v1.2.3


From 75bebb7f0c2a709812cccb4d3151a21b012c5cad Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 23 Oct 2008 20:46:55 +0900
Subject: x86: use GFP_DMA for 24bit coherent_dma_mask

dma_alloc_coherent (include/asm-x86/dma-mapping.h) avoids GFP_DMA
allocation first and if the allocated address is not fit for the
device's coherent_dma_mask, then dma_alloc_coherent does GFP_DMA
allocation. This is because dma_alloc_coherent avoids precious GFP_DMA
zone if possible. This is also how the old dma_alloc_coherent
(arch/x86/kernel/pci-dma.c) works.

However, if the coherent_dma_mask of a device is 24bit, there is no
point to go into the above GFP_DMA retry mechanism. We had better use
GFP_DMA in the first place.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/dma-mapping.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4a5397bfce27..7f225a4b2a26 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -255,9 +255,11 @@ static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
 
 static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 {
-#ifdef CONFIG_X86_64
 	unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
 
+	if (dma_mask <= DMA_24BIT_MASK)
+		gfp |= GFP_DMA;
+#ifdef CONFIG_X86_64
 	if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA))
 		gfp |= GFP_DMA32;
 #endif
-- 
cgit v1.2.3


From 03967c5267b0e7312d1d55dc814d94cf190ca573 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 23 Oct 2008 23:14:29 +0900
Subject: x86: restore the old swiotlb alloc_coherent behavior

This restores the old swiotlb alloc_coherent behavior (before the
alloc_coherent rewrite):

  http://lkml.org/lkml/2008/8/12/200

The old alloc_coherent avoids GFP_DMA allocation first and if the
allocated address is not fit for the device's coherent_dma_mask, then
dma_alloc_coherent does GFP_DMA allocation. If it fails,
alloc_coherent calls swiotlb_alloc_coherent (in short, we rarely used
swiotlb_alloc_coherent).

After the alloc_coherent rewrite, dma_alloc_coherent
(include/asm-x86/dma-mapping.h) directly calls swiotlb_alloc_coherent.
It means that we possibly can't handle a device having dma_masks >
24bit < 32bits since swiotlb_alloc_coherent doesn't have the above
GFP_DMA retry mechanism.

This patch fixes x86's swiotlb alloc_coherent to use the GFP_DMA retry
mechanism, which dma_generic_alloc_coherent() provides now
(pci-nommu.c and GART IOMMU driver also use
dma_generic_alloc_coherent).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index c4ce0332759e..3c539d111abb 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -18,9 +18,21 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
 	return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
 }
 
+static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flags)
+{
+	void *vaddr;
+
+	vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags);
+	if (vaddr)
+		return vaddr;
+
+	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
+}
+
 struct dma_mapping_ops swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
-	.alloc_coherent = swiotlb_alloc_coherent,
+	.alloc_coherent = x86_swiotlb_alloc_coherent,
 	.free_coherent = swiotlb_free_coherent,
 	.map_single = swiotlb_map_single_phys,
 	.unmap_single = swiotlb_unmap_single,
-- 
cgit v1.2.3


From 3b15e581981b3ad35809f56d8131d5c19b6da1bd Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 23 Oct 2008 16:51:00 -0700
Subject: x86/PCI: build failure at x86/kernel/pci-dma.c with !CONFIG_PCI

On Thu, Oct 23, 2008 at 04:09:52PM -0700, Alexander Beregalov wrote:
> arch/x86/kernel/built-in.o: In function `iommu_setup':
> pci-dma.c:(.init.text+0x36ad): undefined reference to `forbid_dac'
> pci-dma.c:(.init.text+0x36cc): undefined reference to `forbid_dac'
> pci-dma.c:(.init.text+0x3711): undefined reference to `forbid_dac

This patch partially reverts a patch to add IOMMU support to ia64.  The
forbid_dac variable was incorrectly moved to quirks.c, which isn't built
when PCI is disabled.

Tested-by: "Alexander Beregalov" <a.beregalov@gmail.com>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/ia64/include/asm/iommu.h |  1 -
 arch/ia64/kernel/pci-dma.c    |  7 -------
 arch/x86/include/asm/iommu.h  |  1 -
 arch/x86/kernel/pci-dma.c     | 16 ++++++++++++++++
 drivers/pci/quirks.c          | 14 --------------
 5 files changed, 16 insertions(+), 23 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 5fb2bb93de3b..0490794fe4aa 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -11,6 +11,5 @@ extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern void iommu_dma_init(void);
 extern void machvec_init(const char *name);
-extern int forbid_dac;
 
 #endif
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index 10a75b557650..031abbf9c875 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -89,13 +89,6 @@ int iommu_dma_supported(struct device *dev, u64 mask)
 {
 	struct dma_mapping_ops *ops = get_dma_ops(dev);
 
-#ifdef CONFIG_PCI
-	if (mask > 0xffffffff && forbid_dac > 0) {
-		dev_info(dev, "Disallowing DAC for device\n");
-		return 0;
-	}
-#endif
-
 	if (ops->dma_supported_op)
 		return ops->dma_supported_op(dev, mask);
 
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 98e28ea8cd16..e4a552d44465 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -7,7 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int dmar_disabled;
-extern int forbid_dac;
 
 extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
 
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1972266e8ba5..192624820217 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -9,6 +9,8 @@
 #include <asm/calgary.h>
 #include <asm/amd_iommu.h>
 
+static int forbid_dac __read_mostly;
+
 struct dma_mapping_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
@@ -291,3 +293,17 @@ void pci_iommu_shutdown(void)
 }
 /* Must execute after PCI subsystem */
 fs_initcall(pci_iommu_init);
+
+#ifdef CONFIG_PCI
+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+
+static __devinit void via_no_dac(struct pci_dev *dev)
+{
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+		printk(KERN_INFO "PCI: VIA PCI bridge detected."
+				 "Disabling DAC.\n");
+		forbid_dac = 1;
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+#endif
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 96cf8ecd04ce..bbf66ea8fd87 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -43,20 +43,6 @@ static void __devinit quirk_mellanox_tavor(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR,quirk_mellanox_tavor);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX,PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,quirk_mellanox_tavor);
 
-/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
-int forbid_dac __read_mostly;
-EXPORT_SYMBOL(forbid_dac);
-
-static __devinit void via_no_dac(struct pci_dev *dev)
-{
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
-		dev_info(&dev->dev,
-			"VIA PCI bridge detected. Disabling DAC.\n");
-		forbid_dac = 1;
-	}
-}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
-
 /* Deal with broken BIOS'es that neglect to enable passive release,
    which can cause problems in combination with the 82441FX/PPro MTRRs */
 static void quirk_passive_release(struct pci_dev *dev)
-- 
cgit v1.2.3


From 9f32d21c981bb638d0991ce5675a20337312066b Mon Sep 17 00:00:00 2001
From: Chris Lalancette <clalance@redhat.com>
Date: Thu, 23 Oct 2008 17:40:25 -0700
Subject: xen: fix Xen domU boot with batched mprotect

Impact: fix guest kernel boot crash on certain configs

Recent i686 2.6.27 kernels with a certain amount of memory (between
736 and 855MB) have a problem booting under a hypervisor that supports
batched mprotect (this includes the RHEL-5 Xen hypervisor as well as
any 3.3 or later Xen hypervisor).

The problem ends up being that xen_ptep_modify_prot_commit() is using
virt_to_machine to calculate which pfn to update.  However, this only
works for pages that are in the p2m list, and the pages coming from
change_pte_range() in mm/mprotect.c are kmap_atomic pages.  Because of
this, we can run into the situation where the lookup in the p2m table
returns an INVALID_MFN, which we then try to pass to the hypervisor,
which then (correctly) denies the request to a totally bogus pfn.

The right thing to do is to use arbitrary_virt_to_machine, so that we
can be sure we are modifying the right pfn.  This unfortunately
introduces a performance penalty because of a full page-table-walk,
but we can avoid that penalty for pages in the p2m list by checking if
virt_addr_valid is true, and if so, just doing the lookup in the p2m
table.

The attached patch implements this, and allows my 2.6.27 i686 based
guest with 768MB of memory to boot on a RHEL-5 hypervisor again.
Thanks to Jeremy for the suggestions about how to fix this particular
issue.

Signed-off-by: Chris Lalancette <clalance@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Chris Lalancette <clalance@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d4d52f5a1cf7..aba77b2b7d18 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -246,11 +246,21 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 {
 	unsigned long address = (unsigned long)vaddr;
 	unsigned int level;
-	pte_t *pte = lookup_address(address, &level);
-	unsigned offset = address & ~PAGE_MASK;
+	pte_t *pte;
+	unsigned offset;
 
-	BUG_ON(pte == NULL);
+	/*
+	 * if the PFN is in the linear mapped vaddr range, we can just use
+	 * the (quick) virt_to_machine() p2m lookup
+	 */
+	if (virt_addr_valid(vaddr))
+		return virt_to_machine(vaddr);
 
+	/* otherwise we have to do a (slower) full page-table walk */
+
+	pte = lookup_address(address, &level);
+	BUG_ON(pte == NULL);
+	offset = address & ~PAGE_MASK;
 	return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
 }
 
@@ -410,7 +420,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 
 	xen_mc_batch();
 
-	u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
+	u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
 	u.val = pte_val_ma(pte);
 	xen_extend_mmu_update(&u);
 
-- 
cgit v1.2.3


From ef020ab0109aa5cd6eac2e93519b7641c9862828 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Thu, 23 Oct 2008 17:54:05 -0500
Subject: x86/uv: memory allocation at initialization

Impact: on SGI UV platforms, fix boot crash

UV initialization is currently called too late to call alloc_bootmem_pages().
The current sequence is:

 start_kernel()
   mem_init()
     free_all_bootmem()           <--- discard of bootmem
   rest_init()
     kernel_init()
       smp_prepare_cpus()
       native_smp_prepare_cpus()
         uv_system_init()         <--- uses alloc_bootmem_pages()

It should be calling kmalloc().

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/genx2apic_uv_x.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 680a06557c5e..2c7dbdb98278 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -15,7 +15,6 @@
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/hardirq.h>
 #include <asm/smp.h>
@@ -398,16 +397,16 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = alloc_bootmem_pages(bytes);
+	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
 
 	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
 	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
-	uv_node_to_blade = alloc_bootmem_pages(bytes);
+	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
 	memset(uv_node_to_blade, 255, bytes);
 
 	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
-	uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
 	memset(uv_cpu_to_blade, 255, bytes);
 
 	blade = 0;
-- 
cgit v1.2.3


From 8115f3f0c939c5db0fe3c6c6c58911fd3a205b1e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 24 Oct 2008 09:12:17 -0400
Subject: ftrace: use a real variable for ftrace_nop in x86

Impact: avoid section mismatch warning, clean up

The dynamic ftrace determines which nop is safe to use at start up.
When it finds a safe nop for patching, it sets a pointer called ftrace_nop
to point to the code. All call sites are then patched to this nop.

Later, when tracing is turned on, this ftrace_nop variable is again used
to compare the location to make sure it is a nop before we update it to
an mcount call. If this fails just once, a warning is printed and ftrace
is disabled.

Rakib Mullick noted that the code that sets up the nop is a .init section
where as the nop itself is in the .text section. This is needed because
the nop is used later on after boot up. The problem is that the test of the
nop jumps back to the setup code and causes a "section mismatch" warning.

Rakib first recommended to convert the nop to .init.text, but as stated
above, this would fail since that text is used later.

The real solution is to extend Rabik's patch, and to make the ftrace_nop
into an array, and just save the code from the assembly to this array.

Now the section can stay as an init section, and we have a nop to use
later on.

Reported-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ftrace.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b1e5e2244eca..50ea0ac8c9bf 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -21,8 +21,7 @@
 #include <asm/nops.h>
 
 
-/* Long is fine, even if it is only 4 bytes ;-) */
-static unsigned long *ftrace_nop;
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
 
 union ftrace_code_union {
 	char code[MCOUNT_INSN_SIZE];
@@ -40,7 +39,7 @@ static int ftrace_calc_offset(long ip, long addr)
 
 unsigned char *ftrace_nop_replace(void)
 {
-	return (char *)ftrace_nop;
+	return ftrace_nop;
 }
 
 unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
@@ -125,9 +124,6 @@ int __init ftrace_dyn_arch_init(void *data)
 	 * TODO: check the cpuid to determine the best nop.
 	 */
 	asm volatile (
-		"jmp ftrace_test_jmp\n"
-		/* This code needs to stay around */
-		".section .text, \"ax\"\n"
 		"ftrace_test_jmp:"
 		"jmp ftrace_test_p6nop\n"
 		"nop\n"
@@ -138,8 +134,6 @@ int __init ftrace_dyn_arch_init(void *data)
 		"jmp 1f\n"
 		"ftrace_test_nop5:"
 		".byte 0x66,0x66,0x66,0x66,0x90\n"
-		"jmp 1f\n"
-		".previous\n"
 		"1:"
 		".section .fixup, \"ax\"\n"
 		"2:	movl $1, %0\n"
@@ -154,15 +148,15 @@ int __init ftrace_dyn_arch_init(void *data)
 	switch (faulted) {
 	case 0:
 		pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
-		ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+		memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
 		break;
 	case 1:
 		pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
-		ftrace_nop = (unsigned long *)ftrace_test_nop5;
+		memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
 		break;
 	case 2:
 		pr_info("ftrace: converting mcount calls to jmp . + 5\n");
-		ftrace_nop = (unsigned long *)ftrace_test_jmp;
+		memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
 		break;
 	}
 
-- 
cgit v1.2.3


From 3afa39493de510c33c56ddc76e6e1af7f87c5392 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 25 Oct 2008 22:58:21 -0700
Subject: x86: keep the /proc/meminfo page count correct

Impact: get correct page count in /proc/meminfo

found page count in /proc/meminfo is nor correct on 1G system in VirtualBox 2.0.4

# cat /proc/meminfo
MemTotal:        1017508 kB
MemFree:          822700 kB
Buffers:            1456 kB
Cached:            26632 kB
SwapCached:            0 kB
...
Hugepagesize:       2048 kB
DirectMap4k:      4032 kB
DirectMap2M:  18446744073709549568 kB

with this patch get:
...
DirectMap4k:      4032 kB
DirectMap2M:   1044480 kB

which is consistent to kernel_page_tables
---[ Low Kernel Mapping ]---
0xffff880000000000-0xffff880000001000           4K     RW     PCD     GLB x  pte
0xffff880000001000-0xffff88000009f000         632K     RW             GLB x  pte
0xffff88000009f000-0xffff8800000a0000           4K     RW     PCD     GLB x  pte
0xffff8800000a0000-0xffff880000200000        1408K     RW             GLB x  pte
0xffff880000200000-0xffff88003fe00000        1020M     RW         PSE GLB x  pmd
0xffff88003fe00000-0xffff88003fff0000        1984K     RW             GLB NX pte
0xffff88003fff0000-0xffff880040000000          64K                           pte
0xffff880040000000-0xffff888000000000         511G                           pud
0xffff888000000000-0xffffc20000000000       58880G                           pgd

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b8e461d49412..c7a4c5a9a21b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 		 * pagetable pages as RO. So assume someone who pre-setup
 		 * these mappings are more intelligent.
 		 */
-		if (pte_val(*pte))
+		if (pte_val(*pte)) {
+			pages++;
 			continue;
+		}
 
 		if (0)
 			printk("   pte=%p addr=%lx pte=%016lx\n",
@@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			 * not differ with respect to page frame and
 			 * attributes.
 			 */
-			if (page_size_mask & (1 << PG_LEVEL_2M))
+			if (page_size_mask & (1 << PG_LEVEL_2M)) {
+				pages++;
 				continue;
+			}
 			new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
 		}
 
@@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			 * not differ with respect to page frame and
 			 * attributes.
 			 */
-			if (page_size_mask & (1 << PG_LEVEL_1G))
+			if (page_size_mask & (1 << PG_LEVEL_1G)) {
+				pages++;
 				continue;
+			}
 			prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
 		}
 
-- 
cgit v1.2.3


From 60817c9b31ef7897d60bca2f384cbc316a3fdd8b Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 27 Oct 2008 13:03:18 -0700
Subject: x86, memory hotplug: remove wrong -1 in calling init_memory_mapping()

Impact: fix crash with memory hotplug

Shuahua Li found:

| I just did some experiments on a desktop for memory hotplug and this bug
| triggered a crash in my test.
|
| Yinghai's suggestion also fixed the bug.

We don't need to round it, just remove that extra -1

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c7a4c5a9a21b..f79a02f64d10 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -837,7 +837,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	last_mapped_pfn = init_memory_mapping(start, start + size-1);
+	last_mapped_pfn = init_memory_mapping(start, start + size);
 	if (last_mapped_pfn > max_pfn_mapped)
 		max_pfn_mapped = last_mapped_pfn;
 
-- 
cgit v1.2.3


From 6ad9f15c94822c3f067a7d443f3b414e08b34460 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 15 Oct 2008 07:45:08 -0200
Subject: KVM: MMU: sync root on paravirt TLB flush

The pvmmu TLB flush handler should request a root sync, similarly to
a native read-write CR3.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 99c239c5c0ac..2a5e64881d9b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2634,6 +2634,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
 static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	kvm_x86_ops->tlb_flush(vcpu);
+	set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
 	return 1;
 }
 
-- 
cgit v1.2.3


From 5550af4df179e52753d3a43a788a113ad8cd95cd Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Wed, 15 Oct 2008 20:15:06 +0800
Subject: KVM: Fix guest shared interrupt with in-kernel irqchip

Every call of kvm_set_irq() should offer an irq_source_id, which is
allocated by kvm_request_irq_source_id(). Based on irq_source_id, we
identify the irq source and implement logical OR for shared level
interrupts.

The allocated irq_source_id can be freed by kvm_free_irq_source_id().

Currently, we support at most sizeof(unsigned long) different irq sources.

[Amit: - rebase to kvm.git HEAD
       - move definition of KVM_USERSPACE_IRQ_SOURCE_ID to common file
       - move kvm_request_irq_source_id to the update_irq ioctl]

[Xiantao: - Add kvm/ia64 stuff and make it work for kvm/ia64 guests]

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |  3 +++
 arch/ia64/kvm/kvm-ia64.c         |  8 +++++---
 arch/x86/include/asm/kvm_host.h  |  3 +++
 arch/x86/kvm/i8254.c             | 11 +++++++++--
 arch/x86/kvm/i8254.h             |  1 +
 arch/x86/kvm/x86.c               |  6 +++++-
 include/linux/kvm_host.h         |  7 ++++++-
 virt/kvm/irq_comm.c              | 42 +++++++++++++++++++++++++++++++++++++---
 virt/kvm/kvm_main.c              | 12 ++++++++----
 9 files changed, 79 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 85db124d37f6..04c0b88f7b3a 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -417,6 +417,9 @@ struct kvm_arch {
 	struct list_head assigned_dev_head;
 	struct dmar_domain *intel_iommu_domain;
 	struct hlist_head irq_ack_notifier_list;
+
+	unsigned long irq_sources_bitmap;
+	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 };
 
 union cpuid3_t {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index a312c9e9b9ef..8a2b13ff0aff 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -778,6 +778,9 @@ static void kvm_init_vm(struct kvm *kvm)
 	kvm_build_io_pmt(kvm);
 
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+
+	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
@@ -941,9 +944,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			mutex_lock(&kvm->lock);
-			kvm_ioapic_set_irq(kvm->arch.vioapic,
-						irq_event.irq,
-						irq_event.level);
+			kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+				    irq_event.irq, irq_event.level);
 			mutex_unlock(&kvm->lock);
 			r = 0;
 		}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 65679d006337..8346be87cfa1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -364,6 +364,9 @@ struct kvm_arch{
 
 	struct page *ept_identity_pagetable;
 	bool ept_identity_pagetable_done;
+
+	unsigned long irq_sources_bitmap;
+	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 11c6725fb798..8772dc946823 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -545,6 +545,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	if (!pit)
 		return NULL;
 
+	mutex_lock(&kvm->lock);
+	pit->irq_source_id = kvm_request_irq_source_id(kvm);
+	mutex_unlock(&kvm->lock);
+	if (pit->irq_source_id < 0)
+		return NULL;
+
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
 	spin_lock_init(&pit->pit_state.inject_lock);
@@ -587,6 +593,7 @@ void kvm_free_pit(struct kvm *kvm)
 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
 		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
 		hrtimer_cancel(timer);
+		kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id);
 		mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 		kfree(kvm->arch.vpit);
 	}
@@ -595,8 +602,8 @@ void kvm_free_pit(struct kvm *kvm)
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
 	mutex_lock(&kvm->lock);
-	kvm_set_irq(kvm, 0, 1);
-	kvm_set_irq(kvm, 0, 0);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
+	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
 	mutex_unlock(&kvm->lock);
 }
 
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index e436d4983aa1..4178022b97aa 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -44,6 +44,7 @@ struct kvm_pit {
 	struct kvm_io_device speaker_dev;
 	struct kvm *kvm;
 	struct kvm_kpit_state pit_state;
+	int irq_source_id;
 };
 
 #define KVM_PIT_BASE_ADDRESS	    0x40
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f0677d1eae8..f1f8ff2f1fa2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			mutex_lock(&kvm->lock);
-			kvm_set_irq(kvm, irq_event.irq, irq_event.level);
+			kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+				    irq_event.irq, irq_event.level);
 			mutex_unlock(&kvm->lock);
 			r = 0;
 		}
@@ -4013,6 +4014,9 @@ struct  kvm *kvm_arch_create_vm(void)
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
+	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
+	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+
 	return kvm;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3833c48fae3a..bb92be2153bc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -37,6 +37,8 @@
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 
+#define KVM_USERSPACE_IRQ_SOURCE_ID	0
+
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
@@ -306,15 +308,18 @@ struct kvm_assigned_dev_kernel {
 	int host_irq;
 	int guest_irq;
 	int irq_requested;
+	int irq_source_id;
 	struct pci_dev *dev;
 	struct kvm *kvm;
 };
-void kvm_set_irq(struct kvm *kvm, int irq, int level);
+void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 				     struct kvm_irq_ack_notifier *kian);
+int kvm_request_irq_source_id(struct kvm *kvm);
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index d0169f5e6047..55ad76ee2d09 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -25,15 +25,23 @@
 #include "ioapic.h"
 
 /* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq, int level)
+void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
+	unsigned long *irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
+
+	/* Logical OR for level trig interrupt */
+	if (level)
+		set_bit(irq_source_id, irq_state);
+	else
+		clear_bit(irq_source_id, irq_state);
+
 	/* Not possible to detect if the guest uses the PIC or the
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
+	kvm_ioapic_set_irq(kvm->arch.vioapic, irq, !!(*irq_state));
 #ifdef CONFIG_X86
-	kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
+	kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
 #endif
 }
 
@@ -58,3 +66,31 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 {
 	hlist_del(&kian->link);
 }
+
+/* The caller must hold kvm->lock mutex */
+int kvm_request_irq_source_id(struct kvm *kvm)
+{
+	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
+	int irq_source_id = find_first_zero_bit(bitmap,
+				sizeof(kvm->arch.irq_sources_bitmap));
+	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
+		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
+		irq_source_id = -EFAULT;
+	} else
+		set_bit(irq_source_id, bitmap);
+	return irq_source_id;
+}
+
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
+{
+	int i;
+
+	if (irq_source_id <= 0 ||
+	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
+		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
+		return;
+	}
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
+	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cf0ab8ed3845..a87f45edfae8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -105,14 +105,12 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 */
 	mutex_lock(&assigned_dev->kvm->lock);
 	kvm_set_irq(assigned_dev->kvm,
+		    assigned_dev->irq_source_id,
 		    assigned_dev->guest_irq, 1);
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
 }
 
-/* FIXME: Implement the OR logic needed to make shared interrupts on
- * this line behave properly
- */
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev =
@@ -134,7 +132,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
 	dev = container_of(kian, struct kvm_assigned_dev_kernel,
 			   ack_notifier);
-	kvm_set_irq(dev->kvm, dev->guest_irq, 0);
+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
 	enable_irq(dev->host_irq);
 }
 
@@ -146,6 +144,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
 	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
 		/* We had pending work. That means we will have to take
@@ -215,6 +214,11 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		match->ack_notifier.gsi = assigned_irq->guest_irq;
 		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
 		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+		r = kvm_request_irq_source_id(kvm);
+		if (r < 0)
+			goto out_release;
+		else
+			match->irq_source_id = r;
 
 		/* Even though this is PCI, we don't want to use shared
 		 * interrupts. Sharing host devices with guest-assigned devices
-- 
cgit v1.2.3


From 531f6ed7de911e975352fbb2b228367121da630a Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Fri, 17 Oct 2008 09:09:27 +0200
Subject: x86, bts: improve help text for BTS config

Improve the help text of the X86_PTRACE_BTS config.
Make X86_DS invisible and depend on X86_PTRACE_BTS.

Reported-by: Roland Dreier <rdreier@cisco.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.cpu | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 0b7c4a3f0651..b815664fe370 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -513,19 +513,19 @@ config CPU_SUP_UMC_32
 	  If unsure, say N.
 
 config X86_DS
-	bool "Debug Store support"
-	default y
-	help
-	  Add support for Debug Store.
-	  This allows the kernel to provide a memory buffer to the hardware
-	  to store various profiling and tracing events.
+	def_bool X86_PTRACE_BTS
+	depends on X86_DEBUGCTLMSR
 
 config X86_PTRACE_BTS
-	bool "ptrace interface to Branch Trace Store"
+	bool "Branch Trace Store"
 	default y
-	depends on (X86_DS && X86_DEBUGCTLMSR)
+	depends on X86_DEBUGCTLMSR
 	help
-	  Add a ptrace interface to allow collecting an execution trace
-	  of the traced task.
-	  This collects control flow changes in a (cyclic) buffer and allows
-	  debuggers to fill in the gaps and show an execution trace of the debuggee.
+	  This adds a ptrace interface to the hardware's branch trace store.
+
+	  Debuggers may use it to collect an execution trace of the debugged
+	  application in order to answer the question 'how did I get here?'.
+	  Debuggers may trace user mode as well as kernel mode.
+
+	  Say Y unless there is no application development on this machine
+	  and you want to save a small amount of code size.
-- 
cgit v1.2.3


From 36b75da27bb51dc34e358d0b7487406132806c46 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Fri, 17 Oct 2008 15:30:37 +0200
Subject: x86: microcode patch loader author update

Removed one author's email address from module init message.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 936d8d55f230..82fb2809ce32 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -480,8 +480,8 @@ static int __init microcode_init(void)
 
 	printk(KERN_INFO
 	       "Microcode Update Driver: v" MICROCODE_VERSION
-	       " <tigran@aivazian.fsnet.co.uk>"
-	       " <peter.oruba@amd.com>\n");
+	       " <tigran@aivazian.fsnet.co.uk>,"
+	       " Peter Oruba\n");
 
 	return 0;
 }
-- 
cgit v1.2.3


From 3c52204bb90834bca8e9e78a3628d886ad6d4db5 Mon Sep 17 00:00:00 2001
From: Peter Oruba <peter.oruba@amd.com>
Date: Fri, 17 Oct 2008 15:30:38 +0200
Subject: x86: AMD microcode patch loader author update

Removed author's email address from MODULE_AUTHOR.

Signed-off-by: Peter Oruba <peter.oruba@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/microcode_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 7a1f8eeac2c7..5f8e5d75a254 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -39,7 +39,7 @@
 #include <asm/microcode.h>
 
 MODULE_DESCRIPTION("AMD Microcode Update Driver");
-MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
+MODULE_AUTHOR("Peter Oruba");
 MODULE_LICENSE("GPL v2");
 
 #define UCODE_MAGIC                0x00414d44
-- 
cgit v1.2.3


From 1281675e9c0d4d42d993697f4daab45ef22d49da Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 14 Oct 2008 18:59:17 -0700
Subject: x86: fix APIC_DEBUG with inquire_remote_apic

APIC_DEBUG is always 2.
need to update inquire_remote_apic to check apic_verbosity with
it instead.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/es7000/wakecpu.h            | 9 ++++-----
 arch/x86/include/asm/mach-default/mach_wakecpu.h | 9 ++++-----
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 3ffc5a7bf667..398493461913 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -50,10 +50,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
 {
 }
 
-#if APIC_DEBUG
- #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid)
-#else
- #define inquire_remote_apic(apicid) {}
-#endif
+#define inquire_remote_apic(apicid) do {		\
+		if (apic_verbosity >= APIC_DEBUG)	\
+			__inquire_remote_apic(apicid);	\
+	} while (0)
 
 #endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index d5c0b826a4ff..9d80db91e992 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -33,10 +33,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
 {
 }
 
-#if APIC_DEBUG
- #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid)
-#else
- #define inquire_remote_apic(apicid) {}
-#endif
+#define inquire_remote_apic(apicid) do {		\
+		if (apic_verbosity >= APIC_DEBUG)	\
+			__inquire_remote_apic(apicid);	\
+	} while (0)
 
 #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
-- 
cgit v1.2.3


From e7706fc691513b0f06adb3de3d6ac04293180146 Mon Sep 17 00:00:00 2001
From: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Date: Mon, 20 Oct 2008 13:51:52 +0900
Subject: x86, kdump: fix invalid access on i386 sparsemem

Impact: fix kdump crash on 32-bit sparsemem kernels

Since linux-2.6.27, kdump has failed on i386 sparsemem kernel.
1st-kernel gets a panic just before switching to 2nd-kernel.

The cause is that a kernel accesses invalid mem_section by
page_to_pfn(image->swap_page) at machine_kexec().
image->swap_page is allocated if kexec for hibernation, but
it is not allocated if kdump. So if kdump, a kernel should
not access the mem_section corresponding to image->swap_page.

The attached patch fixes this invalid access.

Signed-off-by: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Cc: kexec-ml <kexec@lists.infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/machine_kexec_32.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 0732adba05ca..7a385746509a 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -162,7 +162,10 @@ void machine_kexec(struct kimage *image)
 	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
 	page_list[PA_PTE_1] = __pa(kexec_pte1);
 	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-	page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT);
+
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+						<< PAGE_SHIFT);
 
 	/* The segment registers are funny things, they have both a
 	 * visible and an invisible part.  Whenever the visible part is
-- 
cgit v1.2.3


From 11a6b0c933b55654a58afd84f63a5dde1607d78f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 14 Oct 2008 18:59:18 -0700
Subject: x86: 64 bit print out absent pages num too

so users are not confused with memhole causing big total ram

we don't need to worry about 32 bit, because memhole is always
above max_low_pfn.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f79a02f64d10..ad38648bddbd 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -884,6 +884,7 @@ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
 void __init mem_init(void)
 {
 	long codesize, reservedpages, datasize, initsize;
+	unsigned long absent_pages;
 
 	start_periodic_check_for_corruption();
 
@@ -899,8 +900,9 @@ void __init mem_init(void)
 #else
 	totalram_pages = free_all_bootmem();
 #endif
-	reservedpages = max_pfn - totalram_pages -
-					absent_pages_in_range(0, max_pfn);
+
+	absent_pages = absent_pages_in_range(0, max_pfn);
+	reservedpages = max_pfn - totalram_pages - absent_pages;
 	after_bootmem = 1;
 
 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
@@ -917,10 +919,11 @@ void __init mem_init(void)
 				 VSYSCALL_END - VSYSCALL_START);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
-				"%ldk reserved, %ldk data, %ldk init)\n",
+			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
 		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
 		max_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
+		absent_pages << (PAGE_SHIFT-10),
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10);
-- 
cgit v1.2.3


From 87c6f40128f92621698f97a62d2ead5184d1dd97 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 28 Oct 2008 16:13:54 +0100
Subject: x86, gart: fix gart detection for Fam11h CPUs

Impact: fix AMD Family 11h boot hangs / USB device problems

The AMD Fam11h CPUs have a K8 northbridge. This northbridge is different
from other family's because it lacks GART support (as I just learned).

But the kernel implicitly expects a GART if it finds an AMD northbridge.

Fix this by removing the Fam11h northbridge id from the scan list of K8
northbridges. This patch also changes the message in the GART driver
about missing K8 northbridges to tell that the GART is missing which is
the correct information in this case.

Reported-by: Jouni Malinen <jkmalinen@gmail.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/k8.c          | 1 -
 arch/x86/kernel/pci-gart_64.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c
index 304d8bad6559..cbc4332a77b2 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/k8.c
@@ -18,7 +18,6 @@ static u32 *flush_words;
 struct pci_device_id k8_nb_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
 	{}
 };
 EXPORT_SYMBOL(k8_nb_ids);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index e3f75bbcedea..a42b02b4df68 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -744,7 +744,7 @@ void __init gart_iommu_init(void)
 	long i;
 
 	if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
-		printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
+		printk(KERN_INFO "PCI-GART: No AMD GART found.\n");
 		return;
 	}
 
-- 
cgit v1.2.3


From f96f57d91c2df75011d1e260c23edca429f37361 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 28 Oct 2008 12:39:23 -0700
Subject: x86: fix init_memory_mapping for [dc000000 - e0000000) - v2

Impact: change over-mapping to precise mapping, fix /proc/meminfo output

v2: fix less than 1G ram system handling

when gart aperture is 0xdc000000 - 0xe0000000
it return 0xc0000000 - 0xe0000000

that is not right.

this patch fix that will get exact mapping

on 256g sytem with that aperture after patch
LBSuse:~ # cat /proc/meminfo
MemTotal:       264742432 kB
MemFree:        263920628 kB
Buffers:            1416 kB
Cached:            24468 kB
...
DirectMap4k:      5760 kB
DirectMap2M:   3205120 kB
DirectMap1G:  265289728 kB

it is consistent to
LBSuse:~ # cat /sys/kernel/debug/kernel_page_tables
..
---[ Low Kernel Mapping ]---
0xffff880000000000-0xffff880000200000           2M     RW             GLB x  pte
0xffff880000200000-0xffff880040000000        1022M     RW         PSE GLB x  pmd
0xffff880040000000-0xffff8800c0000000           2G     RW         PSE GLB NX pud
0xffff8800c0000000-0xffff8800d7e00000         382M     RW         PSE GLB NX pmd
0xffff8800d7e00000-0xffff8800d7fa0000        1664K     RW             GLB NX pte
0xffff8800d7fa0000-0xffff8800d8000000         384K                           pte
0xffff8800d8000000-0xffff8800dc000000          64M                           pmd
0xffff8800dc000000-0xffff8800e0000000          64M     RW         PSE GLB NX pmd
0xffff8800e0000000-0xffff880100000000         512M                           pmd
0xffff880100000000-0xffff880800000000          28G     RW         PSE GLB NX pud
0xffff880800000000-0xffff880824600000         582M     RW         PSE GLB NX pmd
0xffff880824600000-0xffff8808247f0000        1984K     RW             GLB NX pte
0xffff8808247f0000-0xffff880824800000          64K     RW     PCD     GLB NX pte
0xffff880824800000-0xffff880840000000         440M     RW         PSE GLB NX pmd
0xffff880840000000-0xffff884000000000         223G     RW         PSE GLB NX pud
0xffff884000000000-0xffff884028000000         640M     RW         PSE GLB NX pmd
0xffff884028000000-0xffff884040000000         384M                           pmd
0xffff884040000000-0xffff888000000000         255G                           pud
0xffff888000000000-0xffffc20000000000       58880G                           pgd

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 50 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ad38648bddbd..ebe1811e5b1e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,12 +671,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 	unsigned long last_map_addr = 0;
 	unsigned long page_size_mask = 0;
 	unsigned long start_pfn, end_pfn;
+	unsigned long pos;
 
 	struct map_range mr[NR_RANGE_MR];
 	int nr_range, i;
 	int use_pse, use_gbpages;
 
-	printk(KERN_INFO "init_memory_mapping\n");
+	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
 
 	/*
 	 * Find space for the kernel direct mapping tables.
@@ -710,35 +711,50 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 
 	/* head if not big page alignment ?*/
 	start_pfn = start >> PAGE_SHIFT;
-	end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
+	pos = start_pfn << PAGE_SHIFT;
+	end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
 			<< (PMD_SHIFT - PAGE_SHIFT);
-	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+	if (start_pfn < end_pfn) {
+		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+		pos = end_pfn << PAGE_SHIFT;
+	}
 
 	/* big page (2M) range*/
-	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
 			 << (PMD_SHIFT - PAGE_SHIFT);
-	end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
+	end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
 			 << (PUD_SHIFT - PAGE_SHIFT);
-	if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
-		end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
-	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
-			page_size_mask & (1<<PG_LEVEL_2M));
+	if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
+		end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+	if (start_pfn < end_pfn) {
+		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+				page_size_mask & (1<<PG_LEVEL_2M));
+		pos = end_pfn << PAGE_SHIFT;
+	}
 
 	/* big page (1G) range */
-	start_pfn = end_pfn;
-	end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
-	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+	start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+			 << (PUD_SHIFT - PAGE_SHIFT);
+	end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+	if (start_pfn < end_pfn) {
+		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
 				page_size_mask &
 				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+		pos = end_pfn << PAGE_SHIFT;
+	}
 
 	/* tail is not big page (1G) alignment */
-	start_pfn = end_pfn;
-	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
-	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
-			page_size_mask & (1<<PG_LEVEL_2M));
+	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+			 << (PMD_SHIFT - PAGE_SHIFT);
+	end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+	if (start_pfn < end_pfn) {
+		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+				page_size_mask & (1<<PG_LEVEL_2M));
+		pos = end_pfn << PAGE_SHIFT;
+	}
 
 	/* tail is not big page (2M) alignment */
-	start_pfn = end_pfn;
+	start_pfn = pos>>PAGE_SHIFT;
 	end_pfn = end>>PAGE_SHIFT;
 	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
 
-- 
cgit v1.2.3


From 9352f5698db2c6d7f2789f6cd37e3996d49ac4b5 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 28 Oct 2008 23:05:22 -0700
Subject: x86: two trivial sparse annotations

Impact: fewer sparse warnings, no functional changes

arch/x86/kernel/vsmp_64.c:87:14: warning: incorrect type in argument 1 (different address spaces)
arch/x86/kernel/vsmp_64.c:87:14:    expected void const volatile [noderef] <asn:2>*addr
arch/x86/kernel/vsmp_64.c:87:14:    got void *[assigned] address
arch/x86/kernel/vsmp_64.c:88:22: warning: incorrect type in argument 1 (different address spaces)
arch/x86/kernel/vsmp_64.c:88:22:    expected void const volatile [noderef] <asn:2>*addr
arch/x86/kernel/vsmp_64.c:88:22:    got void *
arch/x86/kernel/vsmp_64.c:100:23: warning: incorrect type in argument 2 (different address spaces)
arch/x86/kernel/vsmp_64.c:100:23:    expected void volatile [noderef] <asn:2>*addr
arch/x86/kernel/vsmp_64.c:100:23:    got void *
arch/x86/kernel/vsmp_64.c:101:23: warning: incorrect type in argument 1 (different address spaces)
arch/x86/kernel/vsmp_64.c:101:23:    expected void const volatile [noderef] <asn:2>*addr
arch/x86/kernel/vsmp_64.c:101:23:    got void *
arch/x86/mm/gup.c:235:6: warning: incorrect type in argument 1 (different base types)
arch/x86/mm/gup.c:235:6:    expected void const volatile [noderef] <asn:1>*<noident>
arch/x86/mm/gup.c:235:6:    got unsigned long [unsigned] [assigned] start

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/vsmp_64.c | 2 +-
 arch/x86/mm/gup.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 7766d36983fc..a688f3bfaec2 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -78,7 +78,7 @@ static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
 
 static void __init set_vsmp_pv_ops(void)
 {
-	void *address;
+	void __iomem *address;
 	unsigned int cap, ctl, cfg;
 
 	/* set vSMP magic bits to indicate vSMP capable kernel */
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 4ba373c5b8c8..be54176e9eb2 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -233,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	len = (unsigned long) nr_pages << PAGE_SHIFT;
 	end = start + len;
 	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-					start, len)))
+					(void __user *)start, len)))
 		goto slow_irqon;
 
 	/*
-- 
cgit v1.2.3


From 1d6cf1feb854c53c6d59e0d879603692b379e208 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 28 Oct 2008 22:46:04 -0700
Subject: x86: start annotating early ioremap pointers with __iomem

Impact: some new sparse warnings in e820.c etc, but no functional change.

As with regular ioremap, iounmap etc, annotate with __iomem.

Fixes the following sparse warnings, will produce some new ones
elsewhere in arch/x86 that will get worked out over time.

arch/x86/mm/ioremap.c:402:9: warning: cast removes address space of expression
arch/x86/mm/ioremap.c:406:10: warning: cast adds address space to expression (<asn:2>)
arch/x86/mm/ioremap.c:782:19: warning: Using plain integer as NULL pointer

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io.h |  6 +++---
 arch/x86/mm/ioremap.c     | 22 +++++++++++-----------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 5618a103f395..ac2abc88cd95 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -82,9 +82,9 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
 extern void early_ioremap_init(void);
 extern void early_ioremap_clear(void);
 extern void early_ioremap_reset(void);
-extern void *early_ioremap(unsigned long offset, unsigned long size);
-extern void *early_memremap(unsigned long offset, unsigned long size);
-extern void early_iounmap(void *addr, unsigned long size);
+extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
+extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
+extern void early_iounmap(void __iomem *addr, unsigned long size);
 extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
 
 
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ae71e11eb3e5..d4c4307ff3e0 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -387,7 +387,7 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
 					unsigned long size)
 {
 	unsigned long flags;
-	void *ret;
+	void __iomem *ret;
 	int err;
 
 	/*
@@ -399,11 +399,11 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
 	if (err < 0)
 		return NULL;
 
-	ret = (void *) __ioremap_caller(phys_addr, size, flags,
-					__builtin_return_address(0));
+	ret = __ioremap_caller(phys_addr, size, flags,
+			       __builtin_return_address(0));
 
 	free_memtype(phys_addr, phys_addr + size);
-	return (void __iomem *)ret;
+	return ret;
 }
 
 void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
@@ -622,7 +622,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
 		__early_set_fixmap(idx, 0, __pgprot(0));
 }
 
-static void *prev_map[FIX_BTMAPS_SLOTS] __initdata;
+static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
 static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
 static int __init check_early_ioremap_leak(void)
 {
@@ -645,7 +645,7 @@ static int __init check_early_ioremap_leak(void)
 }
 late_initcall(check_early_ioremap_leak);
 
-static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
+static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
 {
 	unsigned long offset, last_addr;
 	unsigned int nrpages;
@@ -713,23 +713,23 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size,
 	if (early_ioremap_debug)
 		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
 
-	prev_map[slot] = (void *) (offset + fix_to_virt(idx0));
+	prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
 	return prev_map[slot];
 }
 
 /* Remap an IO device */
-void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
 }
 
 /* Remap memory */
-void __init *early_memremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
 {
 	return __early_ioremap(phys_addr, size, PAGE_KERNEL);
 }
 
-void __init early_iounmap(void *addr, unsigned long size)
+void __init early_iounmap(void __iomem *addr, unsigned long size)
 {
 	unsigned long virt_addr;
 	unsigned long offset;
@@ -779,7 +779,7 @@ void __init early_iounmap(void *addr, unsigned long size)
 		--idx;
 		--nrpages;
 	}
-	prev_map[slot] = 0;
+	prev_map[slot] = NULL;
 }
 
 void __this_fixmap_does_not_exist(void)
-- 
cgit v1.2.3


From fe8b868eccb9f85a0e231e35f0abac5b39bac801 Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Tue, 28 Oct 2008 16:43:14 -0700
Subject: x86: remove debug code from arch_add_memory()

Impact: remove incorrect WARN_ON(1)

Gets rid of dmesg spam created during physical memory hot-add which
will very likely confuse users.  The change removes what appears to
be debugging code which I assume was unintentionally included in:

  x86: arch/x86/mm/init_64.c printk fixes
  commit 10f22dde556d1ed41d55355d1fb8ad495f9810c8

Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ebe1811e5b1e..9db01db6e3cd 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -858,7 +858,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 		max_pfn_mapped = last_mapped_pfn;
 
 	ret = __add_pages(zone, start_pfn, nr_pages);
-	WARN_ON(1);
+	WARN_ON_ONCE(ret);
 
 	return ret;
 }
-- 
cgit v1.2.3


From ab00fee30cddf975200b3c97aef25bea144a0d89 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Thu, 30 Oct 2008 10:37:21 +0000
Subject: i386/PAE: fix pud_page()

Impact: cleanup

To the unsuspecting user it is quite annoying that this broken and
inconsistent with x86-64 definition still exists.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pgtable-3level.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index fb16cec702e4..52597aeadfff 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -120,13 +120,13 @@ static inline void pud_clear(pud_t *pudp)
 		write_cr3(pgd);
 }
 
-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK))
+#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
 
 #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
 
 
 /* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) +	\
+#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) +	\
 				  pmd_index(address))
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From ae9b9403644f3ecc76867af042e7e1cfd5c099d0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 30 Oct 2008 17:43:57 +0100
Subject: AMD IOMMU: fix detection of NP capable IOMMUs

This patch changes the code to use IOMMU_CAP_NPCACHE as a shift and not
as a mask.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3b346c6f5514..38e88d40ab10 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 /* returns !0 if the IOMMU is caching non-present entries in its TLB */
 static int iommu_has_npcache(struct amd_iommu *iommu)
 {
-	return iommu->cap & IOMMU_CAP_NPCACHE;
+	return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
 }
 
 /****************************************************************************
-- 
cgit v1.2.3


From c17dad6905fc82d8f523399e5c3f014e81d61df6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 29 Oct 2008 14:00:50 -0700
Subject: .gitignore updates

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/.gitignore   | 1 +
 Documentation/auxdisplay/.gitignore   | 1 +
 Documentation/connector/.gitignore    | 1 +
 Documentation/ia64/.gitignore         | 1 +
 Documentation/networking/.gitignore   | 1 +
 Documentation/pcmcia/.gitignore       | 1 +
 Documentation/spi/.gitignore          | 2 ++
 Documentation/video4linux/.gitignore  | 1 +
 Documentation/vm/.gitignore           | 1 +
 Documentation/watchdog/src/.gitignore | 2 ++
 arch/x86/boot/compressed/.gitignore   | 2 ++
 11 files changed, 14 insertions(+)
 create mode 100644 Documentation/accounting/.gitignore
 create mode 100644 Documentation/auxdisplay/.gitignore
 create mode 100644 Documentation/connector/.gitignore
 create mode 100644 Documentation/ia64/.gitignore
 create mode 100644 Documentation/networking/.gitignore
 create mode 100644 Documentation/pcmcia/.gitignore
 create mode 100644 Documentation/spi/.gitignore
 create mode 100644 Documentation/video4linux/.gitignore
 create mode 100644 Documentation/vm/.gitignore
 create mode 100644 Documentation/watchdog/src/.gitignore

(limited to 'arch/x86')

diff --git a/Documentation/accounting/.gitignore b/Documentation/accounting/.gitignore
new file mode 100644
index 000000000000..86485203c4ae
--- /dev/null
+++ b/Documentation/accounting/.gitignore
@@ -0,0 +1 @@
+getdelays
diff --git a/Documentation/auxdisplay/.gitignore b/Documentation/auxdisplay/.gitignore
new file mode 100644
index 000000000000..7af222860a96
--- /dev/null
+++ b/Documentation/auxdisplay/.gitignore
@@ -0,0 +1 @@
+cfag12864b-example
diff --git a/Documentation/connector/.gitignore b/Documentation/connector/.gitignore
new file mode 100644
index 000000000000..d2b9c32accd4
--- /dev/null
+++ b/Documentation/connector/.gitignore
@@ -0,0 +1 @@
+ucon
diff --git a/Documentation/ia64/.gitignore b/Documentation/ia64/.gitignore
new file mode 100644
index 000000000000..ab806edc8732
--- /dev/null
+++ b/Documentation/ia64/.gitignore
@@ -0,0 +1 @@
+aliasing-test
diff --git a/Documentation/networking/.gitignore b/Documentation/networking/.gitignore
new file mode 100644
index 000000000000..286a5680f490
--- /dev/null
+++ b/Documentation/networking/.gitignore
@@ -0,0 +1 @@
+ifenslave
diff --git a/Documentation/pcmcia/.gitignore b/Documentation/pcmcia/.gitignore
new file mode 100644
index 000000000000..53d081336757
--- /dev/null
+++ b/Documentation/pcmcia/.gitignore
@@ -0,0 +1 @@
+crc32hash
diff --git a/Documentation/spi/.gitignore b/Documentation/spi/.gitignore
new file mode 100644
index 000000000000..4280576397e8
--- /dev/null
+++ b/Documentation/spi/.gitignore
@@ -0,0 +1,2 @@
+spidev_fdx
+spidev_test
diff --git a/Documentation/video4linux/.gitignore b/Documentation/video4linux/.gitignore
new file mode 100644
index 000000000000..952703943e8e
--- /dev/null
+++ b/Documentation/video4linux/.gitignore
@@ -0,0 +1 @@
+v4lgrab
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore
new file mode 100644
index 000000000000..33e8a023df02
--- /dev/null
+++ b/Documentation/vm/.gitignore
@@ -0,0 +1 @@
+slabinfo
diff --git a/Documentation/watchdog/src/.gitignore b/Documentation/watchdog/src/.gitignore
new file mode 100644
index 000000000000..ac90997dba93
--- /dev/null
+++ b/Documentation/watchdog/src/.gitignore
@@ -0,0 +1,2 @@
+watchdog-simple
+watchdog-test
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore
index be0ed065249b..63eff3b04d01 100644
--- a/arch/x86/boot/compressed/.gitignore
+++ b/arch/x86/boot/compressed/.gitignore
@@ -1 +1,3 @@
 relocs
+vmlinux.bin.all
+vmlinux.relocs
-- 
cgit v1.2.3


From c08b6acc9b996ba6231105cb12a4125c957e0c97 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 30 Oct 2008 11:33:19 -0700
Subject: x86, uv: fix compile error in uv_hub.h

Impact: include file dependency cleanup

Fix compile errors of files that include asm/uv/uv_hub.h but do
not include linux/timer.h.

[ such files are not mainline right now. ]

Signed-of-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_hub.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index c6ad93e315c8..7a5782610b2b 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -13,6 +13,7 @@
 
 #include <linux/numa.h>
 #include <linux/percpu.h>
+#include <linux/timer.h>
 #include <asm/types.h>
 #include <asm/percpu.h>
 
-- 
cgit v1.2.3


From 08c33308575b370c89b4ed1198ece5f93145a2aa Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 30 Oct 2008 16:08:38 -0500
Subject: x86/voyager: fix boot breakage caused by x86: boot secondary cpus
 through initial_code

Impact: boot up secondary CPUs as well on x86/Voyager systems

This commit:

| commit 3e9704739daf46a8ba6593d749c67b5f7cd633d2
| Author: Glauber Costa <gcosta@redhat.com>
| Date:   Wed May 28 13:01:54 2008 -0300
|
|     x86: boot secondary cpus through initial_code

removed the use of initialize_secondary.  However, it didn't update
voyager, so the secondary cpus no longer boot.  Fix this by adding the
initial_code switch to voyager as well.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Glauber Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-voyager/voyager_smp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0f6e8a6523ae..9cd327a278ad 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -90,6 +90,7 @@ static void ack_vic_irq(unsigned int irq);
 static void vic_enable_cpi(void);
 static void do_boot_cpu(__u8 cpuid);
 static void do_quad_bootstrap(void);
+static void initialize_secondary(void);
 
 int hard_smp_processor_id(void);
 int safe_smp_processor_id(void);
@@ -650,6 +651,8 @@ void __init smp_boot_cpus(void)
 	 smp_tune_scheduling();
 	 */
 	smp_store_cpu_info(boot_cpu_id);
+	/* setup the jump vector */
+	initial_code = (unsigned long)initialize_secondary;
 	printk("CPU%d: ", boot_cpu_id);
 	print_cpu_info(&cpu_data(boot_cpu_id));
 
@@ -702,7 +705,7 @@ void __init smp_boot_cpus(void)
 
 /* Reload the secondary CPUs task structure (this function does not
  * return ) */
-void __init initialize_secondary(void)
+static void __init initialize_secondary(void)
 {
 #if 0
 	// AC kernels only
-- 
cgit v1.2.3


From 017d9d20d88cacb0a6a29f343b23c95e203f6645 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 30 Oct 2008 16:05:39 -0500
Subject: x86: use CONFIG_X86_SMP instead of CONFIG_SMP

Impact: fix x86/Voyager boot

CONFIG_SMP is used for features which work on *all* x86 boxes.
CONFIG_X86_SMP is used for standard PC like x86 boxes (for things like
multi core and apics)

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +-
 arch/x86/kernel/tsc.c                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 0d9c993aa93e..ef8f831af823 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
  */
 void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_SMP
 	unsigned int eax, ebx, ecx, edx, sub_index;
 	unsigned int ht_mask_width, core_plus_mask_width;
 	unsigned int core_select_mask, core_level_siblings;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 161bb850fc47..62348e4fd8d1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -759,7 +759,7 @@ __cpuinit int unsynchronized_tsc(void)
 	if (!cpu_has_tsc || tsc_unstable)
 		return 1;
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_SMP
 	if (apic_is_clustered_box())
 		return 1;
 #endif
-- 
cgit v1.2.3


From ee477524b461324ed8fc950f451c3671dc79f12e Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 30 Oct 2008 16:28:35 -0500
Subject: x86/voyager: fix compile breakage casued by x86: move
 prefill_possible_map calling early

Impact: fix build failure on x86/Voyager

Before:

| commit 329513a35d1a2b6b28d54f5c2c0dde4face8200b
| Author: Yinghai Lu <yhlu.kernel@gmail.com>
| Date:   Wed Jul 2 18:54:40 2008 -0700
|
|     x86: move prefill_possible_map calling early

prefill_possible_mask() was hidden under CONFIG_HOTPLUG_CPU rendering
it invisitble to voyager.  Since this commit it's exposed, but not
provided by the voyager subarch, so add a dummy stub to fix the link
breakage.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mach-voyager/voyager_smp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9cd327a278ad..01285af5782c 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -345,6 +345,12 @@ static void do_quad_bootstrap(void)
 	}
 }
 
+void prefill_possible_map(void)
+{
+	/* This is empty on voyager because we need a much
+	 * earlier detection which is done in find_smp_config */
+}
+
 /* Set up all the basic stuff: read the SMP config and make all the
  * SMP information reflect only the boot cpu.  All others will be
  * brought on-line later. */
-- 
cgit v1.2.3


From 9e41bff2708e420e61e6b89a54c15232857069b1 Mon Sep 17 00:00:00 2001
From: Ravikiran G Thirumalai <kiran@scalex86.org>
Date: Thu, 30 Oct 2008 13:59:21 -0700
Subject: x86: fix /dev/mem mmap breakage when PAT is disabled

Impact: allow /dev/mem mmaps on non-PAT CPUs/platforms

Fix mmap to /dev/mem when CONFIG_X86_PAT is off and CONFIG_STRICT_DEVMEM is
off

mmap to /dev/mem on kernel memory has been failing since the
introduction of PAT (CONFIG_STRICT_DEVMEM=n case).   Seems like
the check to avoid cache aliasing with PAT is kicking in even
when PAT is disabled. The bug seems to have crept in 2.6.26.

This patch makes sure that the mmap to regular
kernel memory succeeds if CONFIG_STRICT_DEVMEM=n and
PAT is disabled, and the checks to avoid cache aliasing
still happens if PAT is enabled.

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Tested-by: Tim Sirianni <tim@scalemp.com>
Cc: <stable@kernel.org>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 738fd0f24958..eb1bf000d12e 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -481,12 +481,16 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	return 1;
 }
 #else
+/* This check is needed to avoid cache aliasing when PAT is enabled */
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	u64 from = ((u64)pfn) << PAGE_SHIFT;
 	u64 to = from + size;
 	u64 cursor = from;
 
+	if (!pat_enabled)
+		return 1;
+
 	while (cursor < to) {
 		if (!devmem_is_allowed(pfn)) {
 			printk(KERN_INFO
-- 
cgit v1.2.3


From b3572e361b6b2ac5e724bc4bb932b7774b720b95 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 30 Oct 2008 16:00:59 -0500
Subject: x86/voyager: fix compile breakage caused by
 dc1e35c6e95e8923cf1d3510438b63c600fee1e2

Impact: build fix on x86/Voyager

Given commits like this:

| Author: Suresh Siddha <suresh.b.siddha@intel.com>
| Date:   Tue Jul 29 10:29:19 2008 -0700
|
|     x86, xsave: enable xsave/xrstor on cpus with xsave support

Which deliberately expose boot cpu dependence to pieces of the system,
I think it's time to explicitly have a variable for it to prevent this
continual misassumption that the boot CPU is zero.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig             | 4 ++++
 arch/x86/include/asm/smp.h   | 6 ++++++
 arch/x86/kernel/cpu/common.c | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 350bee1d54dc..2a40c4c6dd7d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -231,6 +231,10 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
+config X86_HAS_BOOT_CPU_ID
+	def_bool y
+	depends on X86_VOYAGER
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2766021aef80..d12811ce51d9 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -225,5 +225,11 @@ static inline int hard_smp_processor_id(void)
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
+extern unsigned char boot_cpu_id;
+#else
+#define boot_cpu_id	0
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 25581dcb280e..93e9393ea64a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1134,7 +1134,7 @@ void __cpuinit cpu_init(void)
 	/*
 	 * Boot processor to setup the FP and extended state context info.
 	 */
-	if (!smp_processor_id())
+	if (smp_processor_id() == boot_cpu_id)
 		init_thread_xstate();
 
 	xsave_init();
-- 
cgit v1.2.3


From bfcb4c1becf93b1592f4a03a4d6e00a3ab89d5ec Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 30 Oct 2008 16:13:37 -0500
Subject: x86/voyager: fix missing cpu_index initialisation

Impact: fix /proc/cpuinfo output on x86/Voyager

Ever since

| commit 92cb7612aee39642d109b8d935ad265e602c0563
| Author: Mike Travis <travis@sgi.com>
| Date:   Fri Oct 19 20:35:04 2007 +0200
|
|     x86: convert cpuinfo_x86 array to a per_cpu array

We've had an extra field in cpuinfo_x86 which is cpu_index.
Unfortunately, voyager has never initialised this, although the only
noticeable impact seems to be that /proc/cpuinfo shows all zeros for
the processor ids.

Anyway, fix this by initialising the boot CPU properly and setting the
index when the secondaries update.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c        | 2 ++
 arch/x86/mach-voyager/voyager_smp.c | 1 +
 2 files changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 93e9393ea64a..da8f15ac7a6d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -549,6 +549,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		this_cpu->c_early_init(c);
 
 	validate_pat_support(c);
+
+	c->cpu_index = boot_cpu_id;
 }
 
 void __init early_cpu_init(void)
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 01285af5782c..7f4c6af14351 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -420,6 +420,7 @@ void __init smp_store_cpu_info(int id)
 	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
+	c->cpu_index = id;
 
 	identify_secondary_cpu(c);
 }
-- 
cgit v1.2.3


From 1c4acdb467f8a6704855a5670ff3d82e3c18eb0b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 31 Oct 2008 00:43:03 +0100
Subject: x86: cpu_index build fix

fix:

 arch/x86/kernel/cpu/common.c: In function 'early_identify_cpu':
 arch/x86/kernel/cpu/common.c:553: error: 'struct cpuinfo_x86' has no member named 'cpu_index'

as cpu_index is only available on SMP.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index da8f15ac7a6d..003a65395bd5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -550,7 +550,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	validate_pat_support(c);
 
+#ifdef CONFIG_SMP
 	c->cpu_index = boot_cpu_id;
+#endif
 }
 
 void __init early_cpu_init(void)
-- 
cgit v1.2.3


From ad5173ff8a387191dbacf889becb92c59aba5d59 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 31 Oct 2008 11:24:27 -0500
Subject: lguest: fix early_ioremap.

dmi_scan_machine breaks under lguest:
	lguest: unhandled trap 14 at 0xc04edeae (0xffa00000)

This is because we use current_cr3 for the read_cr3() paravirt
function, and it isn't set until the first cr3 change.  We got away
with it until this happened.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/boot.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 48ee4f9435f4..4e22fa08d629 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -367,10 +367,9 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
  * lazily after a task switch, and Linux uses that gratefully, but wouldn't a
  * name like "FPUTRAP bit" be a little less cryptic?
  *
- * We store cr0 (and cr3) locally, because the Host never changes it.  The
- * Guest sometimes wants to read it and we'd prefer not to bother the Host
- * unnecessarily. */
-static unsigned long current_cr0, current_cr3;
+ * We store cr0 locally because the Host never changes it.  The Guest sometimes
+ * wants to read it and we'd prefer not to bother the Host unnecessarily. */
+static unsigned long current_cr0;
 static void lguest_write_cr0(unsigned long val)
 {
 	lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0);
@@ -399,17 +398,23 @@ static unsigned long lguest_read_cr2(void)
 	return lguest_data.cr2;
 }
 
+/* See lguest_set_pte() below. */
+static bool cr3_changed = false;
+
 /* cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0.  Keep a local copy, and tell the Host when it changes. */
+ * cr0.  Keep a local copy, and tell the Host when it changes.  The only
+ * difference is that our local copy is in lguest_data because the Host needs
+ * to set it upon our initial hypercall. */
 static void lguest_write_cr3(unsigned long cr3)
 {
+	lguest_data.pgdir = cr3;
 	lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0);
-	current_cr3 = cr3;
+	cr3_changed = true;
 }
 
 static unsigned long lguest_read_cr3(void)
 {
-	return current_cr3;
+	return lguest_data.pgdir;
 }
 
 /* cr4 is used to enable and disable PGE, but we don't care. */
@@ -498,13 +503,13 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * to forget all of them.  Fortunately, this is very rare.
  *
  * ... except in early boot when the kernel sets up the initial pagetables,
- * which makes booting astonishingly slow.  So we don't even tell the Host
- * anything changed until we've done the first page table switch. */
+ * which makes booting astonishingly slow: 1.83 seconds!  So we don't even tell
+ * the Host anything changed until we've done the first page table switch,
+ * which brings boot back to 0.25 seconds. */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
 	*ptep = pteval;
-	/* Don't bother with hypercall before initial setup. */
-	if (current_cr3)
+	if (cr3_changed)
 		lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
 }
 
@@ -521,7 +526,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 	/* Simply set it to zero: if it was not, it will fault back in. */
-	lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0);
+	lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
 }
 
 /* This is what happens after the Guest has removed a large number of entries.
-- 
cgit v1.2.3


From 526e5ab200ce483dcdf146806f4936bd58daa800 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 31 Oct 2008 11:24:27 -0500
Subject: lguest: fix irq vectors.

	do_IRQ: cannot handle IRQ -1 vector 0x20 cpu 0
	------------[ cut here ]------------
	kernel BUG at arch/x86/kernel/irq_32.c:219!

We're not ISA: we have a 1:1 mapping from vectors to irqs.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/lguest/boot.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4e22fa08d629..a5d8e1ace1cf 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -586,6 +586,9 @@ static void __init lguest_init_IRQ(void)
 
 	for (i = 0; i < LGUEST_IRQS; i++) {
 		int vector = FIRST_EXTERNAL_VECTOR + i;
+		/* Some systems map "vectors" to interrupts weirdly.  Lguest has
+		 * a straightforward 1 to 1 mapping, so force that here. */
+		__get_cpu_var(vector_irq)[vector] = i;
 		if (vector != SYSCALL_VECTOR) {
 			set_intr_gate(vector, interrupt[vector]);
 			set_irq_chip_and_handler_name(i, &lguest_irq_controller,
-- 
cgit v1.2.3


From b342797c1e5116a130841527b47dfaa462ed0968 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 31 Oct 2008 09:31:38 +0100
Subject: x86: build fix

Impact: build fix on certain UP configs

fix:

 arch/x86/kernel/cpu/common.c: In function 'cpu_init':
 arch/x86/kernel/cpu/common.c:1141: error: 'boot_cpu_id' undeclared (first use in this function)
 arch/x86/kernel/cpu/common.c:1141: error: (Each undeclared identifier is reported only once
 arch/x86/kernel/cpu/common.c:1141: error: for each function it appears in.)

Pull in asm/smp.h on UP, so that we get the definition of
boot_cpu_id.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 003a65395bd5..b9c9ea0217a9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -20,6 +20,7 @@
 #include <asm/pat.h>
 #include <asm/asm.h>
 #include <asm/numa.h>
+#include <asm/smp.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/mpspec.h>
 #include <asm/apic.h>
-- 
cgit v1.2.3


From fd9409343521eac22b6ed51686128a643c7c976b Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Thu, 30 Oct 2008 19:37:09 -0700
Subject: x86: add iomap_atomic*()/iounmap_atomic() on 32-bit using fixmaps

Impact: introduce new APIs, separate kmap code from CONFIG_HIGHMEM

This takes the code used for CONFIG_HIGHMEM memory mappings except that
it's designed for dynamic IO resource mapping.

These fixmaps are available even with CONFIG_HIGHMEM turned off.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/fixmap.h    |  4 +++
 arch/x86/include/asm/fixmap_32.h |  4 ---
 arch/x86/include/asm/highmem.h   |  5 +---
 arch/x86/mm/Makefile             |  2 +-
 arch/x86/mm/init_32.c            |  3 +-
 arch/x86/mm/iomap_32.c           | 59 ++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/iomap.h          | 30 ++++++++++++++++++++
 7 files changed, 96 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/mm/iomap_32.c
 create mode 100644 include/asm-x86/iomap.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8668a94f850e..23696d44a0af 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -9,6 +9,10 @@
 
 extern int fixmaps_set;
 
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
 void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
 void native_set_fixmap(enum fixed_addresses idx,
 		       unsigned long phys, pgprot_t flags);
diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h
index 09f29ab5c139..c7115c1d7217 100644
--- a/arch/x86/include/asm/fixmap_32.h
+++ b/arch/x86/include/asm/fixmap_32.h
@@ -28,10 +28,8 @@ extern unsigned long __FIXADDR_TOP;
 #include <asm/acpi.h>
 #include <asm/apicdef.h>
 #include <asm/page.h>
-#ifdef CONFIG_HIGHMEM
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
-#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -75,10 +73,8 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_CYCLONE_TIMER
 	FIX_CYCLONE_TIMER, /*cyclone timer register*/
 #endif
-#ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
-#endif
 #ifdef CONFIG_PCI_MMCONFIG
 	FIX_PCIE_MCFG,
 #endif
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index a3b3b7c3027b..bf9276bea660 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -25,14 +25,11 @@
 #include <asm/kmap_types.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
+#include <asm/fixmap.h>
 
 /* declarations for highmem.c */
 extern unsigned long highstart_pfn, highend_pfn;
 
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
-extern pte_t *pkmap_page_table;
-
 /*
  * Right now we initialize only a single pte table. It can be extended
  * easily, subsequent pte tables have to be allocated in one physical
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 59f89b434b45..fea4565ff576 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,7 +1,7 @@
 obj-y	:=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
 	    pat.o pgtable.o gup.o
 
-obj-$(CONFIG_X86_32)		+= pgtable_32.o
+obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8396868e82c5..c483f4242079 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -334,7 +334,6 @@ int devmem_is_allowed(unsigned long pagenr)
 	return 0;
 }
 
-#ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
 
@@ -357,6 +356,7 @@ static void __init kmap_init(void)
 	kmap_prot = PAGE_KERNEL;
 }
 
+#ifdef CONFIG_HIGHMEM
 static void __init permanent_kmaps_init(pgd_t *pgd_base)
 {
 	unsigned long vaddr;
@@ -436,7 +436,6 @@ static void __init set_highmem_pages_init(void)
 #endif /* !CONFIG_NUMA */
 
 #else
-# define kmap_init()				do { } while (0)
 # define permanent_kmaps_init(pgd_base)		do { } while (0)
 # define set_highmem_pages_init()	do { } while (0)
 #endif /* CONFIG_HIGHMEM */
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
new file mode 100644
index 000000000000..d0151d8ce452
--- /dev/null
+++ b/arch/x86/mm/iomap_32.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <asm/iomap.h>
+#include <linux/module.h>
+
+/* Map 'pfn' using fixed map 'type' and protections 'prot'
+ */
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr;
+
+	pagefault_disable();
+
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+	set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
+	arch_flush_lazy_mmu_mode();
+
+	return (void*) vaddr;
+}
+EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+	/*
+	 * Force other mappings to Oops if they'll try to access this pte
+	 * without first remap it.  Keeping stale mappings around is a bad idea
+	 * also, in case the page changes cacheability attributes or becomes
+	 * a protected page in a hypervisor.
+	 */
+	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+		kpte_clear_flush(kmap_pte-idx, vaddr);
+
+	arch_flush_lazy_mmu_mode();
+	pagefault_enable();
+}
+EXPORT_SYMBOL_GPL(iounmap_atomic);
diff --git a/include/asm-x86/iomap.h b/include/asm-x86/iomap.h
new file mode 100644
index 000000000000..c1f06289b14b
--- /dev/null
+++ b/include/asm-x86/iomap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type);
-- 
cgit v1.2.3


From 2576c9991758e431b73e374f6019d6e1e12a8d36 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue, 7 Oct 2008 13:33:12 -0700
Subject: x86: fix AMDC1E and XTOPOLOGY conflict in cpufeature

Impact: fix xsave slowdown regression

Fix two features from conflicting in feature bits.

Fixes this performance regression:

   Subject: cpu2000(both float and int) 13% regression with 2.6.28-rc1
   http://lkml.org/lkml/2008/10/28/36

Reported-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Bisected-by: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index f73e95d75b45..cfdf8c2c5c31 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -91,7 +91,7 @@
 #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
 #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
-#define X86_FEATURE_XTOPOLOGY	(3*32+21) /* cpu topology enum extensions */
+#define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
-- 
cgit v1.2.3


From 1f98757776eafe31065be9118db6051afcf8643c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 1 Nov 2008 10:17:22 -0700
Subject: x86: Clean up late e820 resource allocation

This makes the late e820 resources use 'insert_resource_expand_to_fit()'
instead of doing a 'reserve_region_with_split()', and also avoids
marking them as IORESOURCE_BUSY.

This results in us being perfectly happy to use pre-existing PCI
resources even if they were marked as being in a reserved region, while
still avoiding any _new_ allocations in the reserved regions.  It also
makes for a simpler and more accurate resource tree.

Example resource allocation from Jonathan Corbet, who has firmware that
has an e820 reserved entry that covered a big range (e0000000-fed003ff),
and that had various PCI resources in it set up by firmware.

With old kernels, the reserved range would force us to re-allocate all
pre-existing PCI resources, and his reserved range would end up looking
like this:

	e0000000-fed003ff : reserved
	  fec00000-fec00fff : IOAPIC 0
	  fed00000-fed003ff : HPET 0

where only the pre-allocated special regions (IOAPIC and HPET) were kept
around.

With 2.6.28-rc2, which uses 'reserve_region_with_split()', Jonathan's
resource tree looked like this:

	e0000000-fe7fffff : reserved
	fe800000-fe8fffff : PCI Bus 0000:01
	 fe800000-fe8fffff : reserved
	fe900000-fe9d9aff : reserved
	fe9d9b00-fe9d9bff : 0000:00:1f.3
	 fe9d9b00-fe9d9bff : reserved
	fe9d9c00-fe9d9fff : 0000:00:1a.7
	 fe9d9c00-fe9d9fff : reserved
	fe9da000-fe9dafff : 0000:00:03.3
	 fe9da000-fe9dafff : reserved
	fe9db000-fe9dbfff : 0000:00:19.0
	 fe9db000-fe9dbfff : reserved
	fe9dc000-fe9dffff : 0000:00:1b.0
	 fe9dc000-fe9dffff : reserved
	fe9e0000-fe9fffff : 0000:00:19.0
	 fe9e0000-fe9fffff : reserved
	fea00000-fea7ffff : 0000:00:02.0
	 fea00000-fea7ffff : reserved
	fea80000-feafffff : 0000:00:02.1
	 fea80000-feafffff : reserved
	feb00000-febfffff : 0000:00:02.0
	 feb00000-febfffff : reserved
	fec00000-fed003ff : reserved
	 fec00000-fec00fff : IOAPIC 0
	 fed00000-fed003ff : HPET 0

and because the reserved entry had been split and moved into the
individual resources, and because it used the IORESOURCE_BUSY flag, the
drivers that actually wanted to _use_ those resources couldn't actually
attach to them:

	e1000e 0000:00:19.0: BAR 0: can't reserve mem region [0xfe9e0000-0xfe9fffff]
	HDA Intel 0000:00:1b.0: BAR 0: can't reserve mem region [0xfe9dc000-0xfe9dffff]

with this patch, the resource tree instead becomes

	e0000000-fed003ff : reserved
	  fe800000-fe8fffff : PCI Bus 0000:01
	  fe9d9b00-fe9d9bff : 0000:00:1f.3
	  fe9d9c00-fe9d9fff : 0000:00:1a.7
	    fe9d9c00-fe9d9fff : ehci_hcd
	  fe9da000-fe9dafff : 0000:00:03.3
	  fe9db000-fe9dbfff : 0000:00:19.0
	    fe9db000-fe9dbfff : e1000e
	  fe9dc000-fe9dffff : 0000:00:1b.0
	    fe9dc000-fe9dffff : ICH HD audio
	  fe9e0000-fe9fffff : 0000:00:19.0
	    fe9e0000-fe9fffff : e1000e
	  fea00000-fea7ffff : 0000:00:02.0
	  fea80000-feafffff : 0000:00:02.1
	  feb00000-febfffff : 0000:00:02.0
	  fec00000-fec00fff : IOAPIC 0
	  fed00000-fed003ff : HPET 0

ie the one reserved region now ends up surrounding all the PCI resources
that were allocated inside of it by firmware, and because it is not
marked BUSY, drivers have no problem attaching to the pre-allocated
resources.

Reported-and-tested-by: Jonathan Corbet <corbet@lwn.net>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robert Hancock <hancockr@shaw.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/e820.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ce97bf3bed12..7aafeb5263ef 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1290,15 +1290,17 @@ void __init e820_reserve_resources(void)
 		res->start = e820.map[i].addr;
 		res->end = end;
 
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res->flags = IORESOURCE_MEM;
 
 		/*
 		 * don't register the region that could be conflicted with
 		 * pci device BAR resource and insert them later in
 		 * pcibios_resource_survey()
 		 */
-		if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20))
+		if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
+			res->flags |= IORESOURCE_BUSY;
 			insert_resource(&iomem_resource, res);
+		}
 		res++;
 	}
 
@@ -1318,7 +1320,7 @@ void __init e820_reserve_resources_late(void)
 	res = e820_res;
 	for (i = 0; i < e820.nr_map; i++) {
 		if (!res->parent && res->end)
-			reserve_region_with_split(&iomem_resource, res->start, res->end, res->name);
+			insert_resource_expand_to_fit(&iomem_resource, res);
 		res++;
 	}
 }
-- 
cgit v1.2.3


From 73557af5bf32c3db973050de1fb73423e8fc873e Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 31 Oct 2008 13:59:49 -0400
Subject: x86, voyager: fix smp_intr_init() compile breakage

Impact: fix x86/Voyager build

Looks like this became static on the rest of x86.  Fix it up by adding
an external definition to mach-voyager/setup.c

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/voyager.h      | 1 +
 arch/x86/mach-voyager/setup.c       | 2 +-
 arch/x86/mach-voyager/voyager_smp.c | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h
index 9c811d2e6f91..b3e647307625 100644
--- a/arch/x86/include/asm/voyager.h
+++ b/arch/x86/include/asm/voyager.h
@@ -520,6 +520,7 @@ extern void voyager_restart(void);
 extern void voyager_cat_power_off(void);
 extern void voyager_cat_do_common_interrupt(void);
 extern void voyager_handle_nmi(void);
+extern void voyager_smp_intr_init(void);
 /* Commands for the following are */
 #define	VOYAGER_PSI_READ	0
 #define VOYAGER_PSI_WRITE	1
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index 6bbdd633864c..a580b9562e76 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -27,7 +27,7 @@ static struct irqaction irq2 = {
 void __init intr_init_hook(void)
 {
 #ifdef CONFIG_SMP
-	smp_intr_init();
+	voyager_smp_intr_init();
 #endif
 
 	setup_irq(2, &irq2);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 7f4c6af14351..0e331652681e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1258,7 +1258,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
 #define QIC_SET_GATE(cpi, vector) \
 	set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
 
-void __init smp_intr_init(void)
+void __init voyager_smp_intr_init(void)
 {
 	int i;
 
-- 
cgit v1.2.3


From e5beae16901795223d677f15aa2fe192976278ee Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Mon, 3 Nov 2008 18:21:45 +0100
Subject: io mapping: clean up #ifdefs

Impact: cleanup

clean up ifdefs: change #ifdef CONFIG_X86_32/64 to
CONFIG_HAVE_ATOMIC_IOMAP.

flip around the #ifdef sections to clean up the structure.

Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig           |  4 ++++
 include/linux/io-mapping.h | 43 +++++++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6f20718d3156..e60c59b81bdd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1894,6 +1894,10 @@ config SYSVIPC_COMPAT
 endmenu
 
 
+config HAVE_ATOMIC_IOMAP
+	def_bool y
+	depends on X86_32
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 1b566993db6e..82df31726a54 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -33,86 +33,93 @@
 /* this struct isn't actually defined anywhere */
 struct io_mapping;
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_HAVE_ATOMIC_IOMAP
+
+/*
+ * For small address space machines, mapping large objects
+ * into the kernel virtual space isn't practical. Where
+ * available, use fixmap support to dynamically map pages
+ * of the object at run time.
+ */
 
-/* Create the io_mapping object*/
 static inline struct io_mapping *
 io_mapping_create_wc(unsigned long base, unsigned long size)
 {
-	return (struct io_mapping *) ioremap_wc(base, size);
+	return (struct io_mapping *) base;
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
-	iounmap(mapping);
 }
 
 /* Atomic map/unmap */
 static inline void *
 io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	return ((char *) mapping) + offset;
+	offset += (unsigned long) mapping;
+	return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0,
+				     __pgprot(__PAGE_KERNEL_WC));
 }
 
 static inline void
 io_mapping_unmap_atomic(void *vaddr)
 {
+	iounmap_atomic(vaddr, KM_USER0);
 }
 
-/* Non-atomic map/unmap */
 static inline void *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	return ((char *) mapping) + offset;
+	offset += (unsigned long) mapping;
+	return ioremap_wc(offset, PAGE_SIZE);
 }
 
 static inline void
 io_mapping_unmap(void *vaddr)
 {
+	iounmap(vaddr);
 }
 
-#endif /* CONFIG_X86_64 */
+#else
 
-#ifdef CONFIG_X86_32
+/* Create the io_mapping object*/
 static inline struct io_mapping *
 io_mapping_create_wc(unsigned long base, unsigned long size)
 {
-	return (struct io_mapping *) base;
+	return (struct io_mapping *) ioremap_wc(base, size);
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
+	iounmap(mapping);
 }
 
 /* Atomic map/unmap */
 static inline void *
 io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	offset += (unsigned long) mapping;
-	return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0,
-				     __pgprot(__PAGE_KERNEL_WC));
+	return ((char *) mapping) + offset;
 }
 
 static inline void
 io_mapping_unmap_atomic(void *vaddr)
 {
-	iounmap_atomic(vaddr, KM_USER0);
 }
 
+/* Non-atomic map/unmap */
 static inline void *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	offset += (unsigned long) mapping;
-	return ioremap_wc(offset, PAGE_SIZE);
+	return ((char *) mapping) + offset;
 }
 
 static inline void
 io_mapping_unmap(void *vaddr)
 {
-	iounmap(vaddr);
 }
-#endif /* CONFIG_X86_32 */
+
+#endif /* HAVE_ATOMIC_IOMAP */
 
 #endif /* _LINUX_IO_MAPPING_H */
-- 
cgit v1.2.3


From 70de9a97049e0ba79dc040868564408d5ce697f9 Mon Sep 17 00:00:00 2001
From: Alok Kataria <akataria@vmware.com>
Date: Mon, 3 Nov 2008 11:18:47 -0800
Subject: x86: don't use tsc_khz to calculate lpj if notsc is passed

Impact: fix udelay when "notsc" boot parameter is passed

With notsc passed on commandline, tsc may not be used for
udelays, make sure that we do not use tsc_khz to calculate
the lpj value in such cases.

Reported-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Alok N Kataria <akataria@vmware.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 62348e4fd8d1..2ef80e301925 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -813,10 +813,6 @@ void __init tsc_init(void)
 		cpu_khz = calibrate_cpu();
 #endif
 
-	lpj = ((u64)tsc_khz * 1000);
-	do_div(lpj, HZ);
-	lpj_fine = lpj;
-
 	printk("Detected %lu.%03lu MHz processor.\n",
 			(unsigned long)cpu_khz / 1000,
 			(unsigned long)cpu_khz % 1000);
@@ -836,6 +832,10 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 	tsc_disabled = 0;
 
+	lpj = ((u64)tsc_khz * 1000);
+	do_div(lpj, HZ);
+	lpj_fine = lpj;
+
 	use_tsc_delay();
 	/* Check and install the TSC clocksource */
 	dmi_check_system(bad_tsc_dmi_table);
-- 
cgit v1.2.3


From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 5 Nov 2008 16:52:08 +0100
Subject: sched: re-tune balancing

Impact: improve wakeup affinity on NUMA systems, tweak SMP systems

Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain
balancing defaults on NUMA and SMP systems.

Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason
why we would not want to have wakeup affinity across nodes as well.
(we already do this in the standard NUMA template.)

lat_ctx on a NUMA box is particularly happy about this change:

before:

 |   phoenix:~/l> ./lat_ctx -s 0 2
 |   "size=0k ovr=2.60
 |   2 5.70

after:

 |   phoenix:~/l> ./lat_ctx -s 0 2
 |   "size=0k ovr=2.65
 |   2 2.07

a 2.75x speedup.

pipe-test is similarly happy about it too:

 |  phoenix:~/sched-tests> ./pipe-test
 |   18.26 usecs/loop.
 |   14.70 usecs/loop.
 |   14.38 usecs/loop.
 |   10.55 usecs/loop.              # +WAKE_AFFINE on domain0+domain1
 |   8.63 usecs/loop.
 |   8.59 usecs/loop.
 |   9.03 usecs/loop.
 |   8.94 usecs/loop.
 |   8.96 usecs/loop.
 |   8.63 usecs/loop.

Also:

 - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings)
 - enable SD_WAKE_BALANCE on SMP domains

Sysbench+postgresql improves all around the board, quite significantly:

           .28-rc3-11474e2c  .28-rc3-11474e2c-tune
-------------------------------------------------
    1:             571              688    +17.08%
    2:            1236             1206    -2.55%
    4:            2381             2642    +9.89%
    8:            4958             5164    +3.99%
   16:            9580             9574    -0.07%
   32:            7128             8118    +12.20%
   64:            7342             8266    +11.18%
  128:            7342             8064    +8.95%
  256:            7519             7884    +4.62%
  512:            7350             7731    +4.93%
-------------------------------------------------
  SUM:           55412            59341    +6.62%

So it's a win both for the runup portion, the peak area and the tail.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 7 ++++---
 include/linux/topology.h        | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 90ac7718469a..4850e4b02b61 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -154,7 +154,7 @@ extern unsigned long node_remap_size[];
 
 #endif
 
-/* sched_domains SD_NODE_INIT for NUMAQ machines */
+/* sched_domains SD_NODE_INIT for NUMA machines */
 #define SD_NODE_INIT (struct sched_domain) {		\
 	.min_interval		= 8,			\
 	.max_interval		= 32,			\
@@ -169,8 +169,9 @@ extern unsigned long node_remap_size[];
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_WAKE_AFFINE	\
+				| SD_WAKE_BALANCE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 2158fc0d5a56..34a7ee0ebed2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -146,10 +146,10 @@ void arch_update_cpu_topology(void);
 	.wake_idx		= 1,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
+				| SD_WAKE_BALANCE	\
 				| BALANCE_FOR_PKG_POWER,\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
-- 
cgit v1.2.3


From c78d0cf2925bffae8a6f00e7d9b8e971b0392edd Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 5 Nov 2008 12:04:46 +0000
Subject: x86: don't allow nr_irqs > NR_IRQS

Impact: fix boot hang on 32-bit systems with more than 224 IO-APIC pins

On some 32-bit systems with a lot of IO-APICs probe_nr_irqs() can
return a value larger than NR_IRQS. This will lead to probe_irq_on()
overrunning the irq_desc array.

I hit this when running net-next-2.6 (close to 2.6.28-rc3) on a
Supermicro dual Xeon system.  NR_IRQS is 224 but probe_nr_irqs() detects
5 IOAPICs and returns 240.  Here are the log messages:

Tue Nov  4 16:53:47 2008 ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
Tue Nov  4 16:53:47 2008 IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23
Tue Nov  4 16:53:47 2008 ACPI: IOAPIC (id[0x02] address[0xfec81000] gsi_base[24])
Tue Nov  4 16:53:47 2008 IOAPIC[1]: apic_id 2, version 32, address 0xfec81000, GSI 24-47
Tue Nov  4 16:53:47 2008 ACPI: IOAPIC (id[0x03] address[0xfec81400] gsi_base[48])
Tue Nov  4 16:53:47 2008 IOAPIC[2]: apic_id 3, version 32, address 0xfec81400, GSI 48-71
Tue Nov  4 16:53:47 2008 ACPI: IOAPIC (id[0x04] address[0xfec82000] gsi_base[72])
Tue Nov  4 16:53:47 2008 IOAPIC[3]: apic_id 4, version 32, address 0xfec82000, GSI 72-95
Tue Nov  4 16:53:47 2008 ACPI: IOAPIC (id[0x05] address[0xfec82400] gsi_base[96])
Tue Nov  4 16:53:47 2008 IOAPIC[4]: apic_id 5, version 32, address 0xfec82400, GSI 96-119
Tue Nov  4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge)
Tue Nov  4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
Tue Nov  4 16:53:47 2008 Enabling APIC mode:  Flat.  Using 5 I/O APICs

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index b764d7429c61..7a3f2028e2eb 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3611,6 +3611,8 @@ int __init probe_nr_irqs(void)
 	/* something wrong ? */
 	if (nr < nr_min)
 		nr = nr_min;
+	if (WARN_ON(nr > NR_IRQS))
+		nr = NR_IRQS;
 
 	return nr;
 }
-- 
cgit v1.2.3


From 1b4897688011cd05e07f00dcfe6af3331eb36a3c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 4 Nov 2008 14:10:13 -0800
Subject: x86: size NR_IRQS on 32-bit systems the same way as 64-bit

Impact: make NR_IRQS big enough for system with lots of apic/pins

If lots of IO_APIC's are there (or can be there), size the same way
as 64-bit, depending on MAX_IO_APICS and NR_CPUS.

This fixes the boot problem reported by Ben Hutchings on a 32-bit
server with 5 IO-APICs and 240 IO-APIC pins.

Signed-off-by: Yinghai <yinghai@kernel.org>
Tested-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq_vectors.h | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index d843ed0e9b2e..503aadc4ad35 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,30 +101,22 @@
 #define LAST_VM86_IRQ		15
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER)
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
 
-#elif !defined(CONFIG_X86_VOYAGER)
+#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER)
 
-# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
-
-#  define NR_IRQS		224
-
-# else /* IO_APIC || PARAVIRT */
-
-#  define NR_IRQS		16
-
-# endif
+# define NR_IRQS		224
 
-#else /* !VISWS && !VOYAGER */
+#else /* IO_APIC || PARAVIRT */
 
-# define NR_IRQS		224
+# define NR_IRQS		16
 
-#endif /* VISWS */
+#endif
 
 /* Voyager specific defines */
 /* These define the CPIs we use in linux */
-- 
cgit v1.2.3


From da85f865b1dcec0853c48b763ed312441ce0c7df Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Wed, 5 Nov 2008 13:37:27 -0600
Subject: x86: mention ACPI in top-level Kconfig menu

Impact: clarify menuconfig text

Mention ACPI in the top-level menu to give a clue as to where
it lives. This matches what ia64 does.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6f20718d3156..5d6aa4013dc7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1494,7 +1494,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 	def_bool X86_64
 	depends on NUMA
 
-menu "Power management options"
+menu "Power management and ACPI options"
 	depends on !X86_VOYAGER
 
 config ARCH_HIBERNATION_HEADER
-- 
cgit v1.2.3


From 7db282fa67b58daff8a57f9e1c93d4474b5908ff Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 5 Nov 2008 23:36:48 -0800
Subject: x86: remove VISWS and PARAVIRT around NR_IRQS puzzle

Impact: fix warning message when PARAVIRT is set in config

Remove stale #ifdef components from our IRQ sizing logic.
x86/Voyager is the only holdout.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/irq_vectors.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 503aadc4ad35..0005adb0f941 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,18 +101,18 @@
 #define LAST_VM86_IRQ		15
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
-#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_PARAVIRT) && !defined(CONFIG_X86_VISWS) && !defined(CONFIG_X86_VOYAGER)
+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
 
-#elif defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) || defined(CONFIG_X86_VOYAGER)
+#elif defined(CONFIG_X86_VOYAGER)
 
 # define NR_IRQS		224
 
-#else /* IO_APIC || PARAVIRT */
+#else /* IO_APIC || VOYAGER */
 
 # define NR_IRQS		16
 
-- 
cgit v1.2.3


From d6f0f39b7d05e62b347c4352d070e4afb3ade4b5 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 4 Nov 2008 13:53:04 -0800
Subject: x86: add smp_mb() before sending INVALIDATE_TLB_VECTOR

Impact: fix rare x2apic hang

On x86, x2apic mode accesses for sending IPI's don't have serializing
semantics. If the IPI receivner refers(in lock-free fashion) to some
memory setup by the sender, the need for smp_mb() before sending the
IPI becomes critical in x2apic mode.

Add the smp_mb() in native_flush_tlb_others() before sending the IPI.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tlb_32.c | 6 ++++++
 arch/x86/kernel/tlb_64.c | 5 +++++
 2 files changed, 11 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index e00534b33534..f4049f3513b6 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	flush_mm = mm;
 	flush_va = va;
 	cpus_or(flush_cpumask, cpumask, flush_cpumask);
+
+	/*
+	 * Make the above memory operations globally visible before
+	 * sending the IPI.
+	 */
+	smp_mb();
 	/*
 	 * We have to send the IPI only to
 	 * CPUs affected.
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index dcbf7a1159ea..8f919ca69494 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -182,6 +182,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	f->flush_va = va;
 	cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
 
+	/*
+	 * Make the above memory operations globally visible before
+	 * sending the IPI.
+	 */
+	smp_mb();
 	/*
 	 * We have to send the IPI only to
 	 * CPUs affected.
-- 
cgit v1.2.3


From 80be308dfa3798c7bad0fc81760b2faf83870e91 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 6 Nov 2008 14:59:05 +0100
Subject: AMD IOMMU: fix lazy IO/TLB flushing in unmap path

Lazy flushing needs to take care of the unmap path too which is not yet
implemented and leads to stale IO/TLB entries. This is fixed by this
patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 38e88d40ab10..4755bbc7ae5b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -526,6 +526,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 {
 	address >>= PAGE_SHIFT;
 	iommu_area_free(dom->bitmap, address, pages);
+
+	if (address + pages >= dom->next_bit)
+		dom->need_flush = true;
 }
 
 /****************************************************************************
@@ -981,8 +984,10 @@ static void __unmap_single(struct amd_iommu *iommu,
 
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
-	if (amd_iommu_unmap_flush)
+	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
 		iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+		dma_dom->need_flush = false;
+	}
 }
 
 /*
-- 
cgit v1.2.3


From b9c3bfc24e1088d260de4091b2b41808c7398355 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Thu, 6 Nov 2008 12:05:40 +0000
Subject: x86: align DirectMap in /proc/meminfo

Impact: right-align /proc/meminfo consistent with other fields

When the split-LRU patches added Inactive(anon) and Inactive(file) lines
to /proc/meminfo, all counts were moved two columns rightwards to fit in.
Now move x86's DirectMap lines two columns rightwards to line up.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f1dc1b75d166..e89d24815f26 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -67,18 +67,18 @@ static void split_page_count(int level)
 
 void arch_report_meminfo(struct seq_file *m)
 {
-	seq_printf(m, "DirectMap4k:  %8lu kB\n",
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_4K] << 2);
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-	seq_printf(m, "DirectMap2M:  %8lu kB\n",
+	seq_printf(m, "DirectMap2M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 11);
 #else
-	seq_printf(m, "DirectMap4M:  %8lu kB\n",
+	seq_printf(m, "DirectMap4M:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
 #ifdef CONFIG_X86_64
 	if (direct_gbpages)
-		seq_printf(m, "DirectMap1G:  %8lu kB\n",
+		seq_printf(m, "DirectMap1G:    %8lu kB\n",
 			direct_pages_count[PG_LEVEL_1G] << 20);
 #endif
 }
-- 
cgit v1.2.3


From 8d00450d296dedec9ada38d43b83e79cca6fd5a3 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Tue, 4 Nov 2008 12:52:44 -0200
Subject: Revert "x86: default to reboot via ACPI"

This reverts commit c7ffa6c26277b403920e2255d10df849bd613380.

the assumptio of this change was that this would not break
any existing machine. Andrey Borzenkov reported troubles with
the ACPI reboot method: the system would hang on reboot, necessiating
a power cycle. Probably more systems are affected as well.

Also, there are patches queued up for v2.6.29 to disable virtualization
on emergency_restart() - which was the original motivation of
this change.

Reported-by: Andrey Borzenkov <arvidjaar@mail.ru>
Bisected-by: Andrey Borzenkov <arvidjaar@mail.ru>
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
Acked-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f4c93f1cfc19..724adfc63cb9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-/*
- * Keyboard reset and triple fault may result in INIT, not RESET, which
- * doesn't work when we're in vmx root mode.  Try ACPI first.
- */
-enum reboot_type reboot_type = BOOT_ACPI;
+enum reboot_type reboot_type = BOOT_KBD;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
-- 
cgit v1.2.3


From 47cb2ed9df2789fc4a3fe1201e475078f93c4839 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 6 Nov 2008 13:48:24 -0800
Subject: x86, xen: fix use of pgd_page now that it really does return a page

Impact: fix 32-bit Xen guest boot crash

On 32-bit PAE, pud_page, for no good reason, didn't really return a
struct page *.  Since Jan Beulich's fix "i386/PAE: fix pud_page()",
pud_page does return a struct page *.

Because PAE has 3 pagetable levels, the pud level is folded into the
pgd level, so pgd_page() is the same as pud_page(), and now returns
a struct page *.  Update the xen/mmu.c code which uses pgd_page()
accordingly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aba77b2b7d18..49697d86c6a5 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -874,7 +874,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 #else /* CONFIG_X86_32 */
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is pinnable */
-	xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+	xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
 		     PT_PMD);
 #endif
 	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
@@ -991,7 +991,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
 
 #ifdef CONFIG_X86_PAE
 	/* Need to make sure unshared kernel PMD is unpinned */
-	xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
+	xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
 		       PT_PMD);
 #endif
 
-- 
cgit v1.2.3


From d05fdf316067cd311d5e7add08da26ded8a58080 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 28 Oct 2008 19:23:06 +1100
Subject: xen: make sure stray alias mappings are gone before pinning

Xen requires that all mappings of pagetable pages are read-only, so
that they can't be updated illegally.  As a result, if a page is being
turned into a pagetable page, we need to make sure all its mappings
are RO.

If the page had been used for ioremap or vmalloc, it may still have
left over mappings as a result of not having been lazily unmapped.
This change makes sure we explicitly mop them all up before pinning
the page.

Unlike aliases created by kmap, the there can be vmalloc aliases even
for non-high pages, so we must do the flush unconditionally.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Linux Memory Management List <linux-mm@kvack.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/enlighten.c | 5 +++--
 arch/x86/xen/mmu.c       | 9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b61534c7a4c4..5e4686d70f62 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -863,15 +863,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
 	if (PagePinned(virt_to_page(mm->pgd))) {
 		SetPagePinned(page);
 
+		vm_unmap_aliases();
 		if (!PageHighMem(page)) {
 			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
 			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
 				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
-		} else
+		} else {
 			/* make sure there are no stray mappings of
 			   this page */
 			kmap_flush_unused();
-			vm_unmap_aliases();
+		}
 	}
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aba77b2b7d18..89f3b6edc65a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -850,13 +850,16 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page,
    read-only, and can be pinned. */
 static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 {
+	vm_unmap_aliases();
+
 	xen_mc_batch();
 
-	if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
-		/* re-enable interrupts for kmap_flush_unused */
+	 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+		/* re-enable interrupts for flushing */
 		xen_mc_issue(0);
+
 		kmap_flush_unused();
-		vm_unmap_aliases();
+
 		xen_mc_batch();
 	}
 
-- 
cgit v1.2.3


From 7c64ade53a6f977d73f16243865c42ceae999aea Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 7 Nov 2008 14:02:49 +0100
Subject: oprofile: Fix p6 counter overflow check

Fix the counter overflow check for CPUs with counter width > 32

I had a similar change in a different patch that I didn't submit
and I didn't notice the problem earlier because it was always
tested together.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_ppro.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 0620d6d45f7d..3f1b81a83e2e 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -27,8 +27,7 @@ static int num_counters = 2;
 static int counter_width = 32;
 
 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
+#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
 
 #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
@@ -124,14 +123,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 static int ppro_check_ctrs(struct pt_regs * const regs,
 			   struct op_msrs const * const msrs)
 {
-	unsigned int low, high;
+	u64 val;
 	int i;
 
 	for (i = 0 ; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-		CTR_READ(low, high, msrs, i);
-		if (CTR_OVERFLOWED(low)) {
+		rdmsrl(msrs->counters[i].addr, val);
+		if (CTR_OVERFLOWED(val)) {
 			oprofile_add_sample(regs, i);
 			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 		}
-- 
cgit v1.2.3


From 0d12cdd5f883f508d33b85c1bae98fa28987c8c7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 8 Nov 2008 16:19:55 +0100
Subject: sched: improve sched_clock() performance

in scheduler-intense workloads native_read_tsc() overhead accounts for
20% of the system overhead:

 659567 system_call                              41222.9375
 686796 schedule                                 435.7843
 718382 __switch_to                              665.1685
 823875 switch_mm                                4526.7857
 1883122 native_read_tsc                          55385.9412
 9761990 total                                      2.8468

this is large part due to the rdtsc_barrier() that is done before
and after reading the TSC.

But sched_clock() is not a precise clock in the GTOD sense, using such
barriers is completely pointless. So remove the barriers and only use
them in vget_cycles().

This improves lat_ctx performance by about 5%.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/msr.h | 2 --
 arch/x86/include/asm/tsc.h | 8 +++++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 46be2fa7ac26..c2a812ebde89 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void)
 {
 	DECLARE_ARGS(val, low, high);
 
-	rdtsc_barrier();
 	asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
-	rdtsc_barrier();
 
 	return EAX_EDX_VAL(val, low, high);
 }
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 38ae163cc91b..9cd83a8e40d5 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,6 +34,8 @@ static inline cycles_t get_cycles(void)
 
 static __always_inline cycles_t vget_cycles(void)
 {
+	cycles_t cycles;
+
 	/*
 	 * We only do VDSOs on TSC capable CPUs, so this shouldnt
 	 * access boot_cpu_data (which is not VDSO-safe):
@@ -42,7 +44,11 @@ static __always_inline cycles_t vget_cycles(void)
 	if (!cpu_has_tsc)
 		return 0;
 #endif
-	return (cycles_t)__native_read_tsc();
+	rdtsc_barrier();
+	cycles = (cycles_t)__native_read_tsc();
+	rdtsc_barrier();
+
+	return cycles;
 }
 
 extern void tsc_init(void);
-- 
cgit v1.2.3


From 7cbaef9c83e58bbd4bdd534b09052b6c5ec457d5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 8 Nov 2008 17:05:38 +0100
Subject: sched: optimize sched_clock() a bit

sched_clock() uses cycles_2_ns() needlessly - which is an irq-disabling
variant of __cycles_2_ns().

Most of the time sched_clock() is called with irqs disabled already.
The few places that call it with irqs enabled need to be updated.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2ef80e301925..424093b157d3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -55,7 +55,7 @@ u64 native_sched_clock(void)
 	rdtscll(this_offset);
 
 	/* return the value in ns */
-	return cycles_2_ns(this_offset);
+	return __cycles_2_ns(this_offset);
 }
 
 /* We need to define a real function for sched_clock, to override the
-- 
cgit v1.2.3


From 3044646148cdfa83a311bf1c146a70e550280159 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 9 Nov 2008 10:07:58 -0800
Subject: x86: move iomap.h to the new include location

a new file was accidentally added to include/asm-x86;
move it to the new arch/x86/include/asm location

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 arch/x86/include/asm/iomap.h | 30 ++++++++++++++++++++++++++++++
 include/asm-x86/iomap.h      | 30 ------------------------------
 2 files changed, 30 insertions(+), 30 deletions(-)
 create mode 100644 arch/x86/include/asm/iomap.h
 delete mode 100644 include/asm-x86/iomap.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
new file mode 100644
index 000000000000..c1f06289b14b
--- /dev/null
+++ b/arch/x86/include/asm/iomap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type);
diff --git a/include/asm-x86/iomap.h b/include/asm-x86/iomap.h
deleted file mode 100644
index c1f06289b14b..000000000000
--- a/include/asm-x86/iomap.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright © 2008 Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-
-void *
-iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
-
-void
-iounmap_atomic(void *kvaddr, enum km_type type);
-- 
cgit v1.2.3


From 1de5b0854623d30d01d72cd4ea323eb5f39d1f16 Mon Sep 17 00:00:00 2001
From: Matt Fleming <mjf@gentoo.org>
Date: Sun, 2 Nov 2008 16:04:18 +0000
Subject: x86: HPET: convert WARN_ON to WARN_ON_ONCE

It is possible to flood the console with call traces if the WARN_ON
condition is true because of the frequency with which this function is
called.

Signed-off-by: Matt Fleming <mjf@gentoo.org>
Cc: mingo@elte.hu
Cc: venkatesh.pallipadi@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 77017e834cf7..f10f9461a43d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta,
 	 * what we wrote hit the chip before we compare it to the
 	 * counter.
 	 */
-	WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
+	WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt);
 
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
-- 
cgit v1.2.3


From 89d77a1eb60be916d85d9394bedbfa2037af89c5 Mon Sep 17 00:00:00 2001
From: Matt Fleming <mjf@gentoo.org>
Date: Sun, 2 Nov 2008 16:04:20 +0000
Subject: x86: HPET: read from HPET_Tn_CMP() not HPET_T0_CMP

In hpet_next_event() we check that the value we just wrote to
HPET_Tn_CMP(timer) has reached the chip. Currently, we're checking that
the value we wrote to HPET_Tn_CMP(timer) is in HPET_T0_CMP, which, if
timer is anything other than timer 0, is likely to fail.

Signed-off-by: Matt Fleming <mjf@gentoo.org>
Cc: mingo@elte.hu
Cc: venkatesh.pallipadi@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f10f9461a43d..cfe6aa56f71b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta,
 	 * what we wrote hit the chip before we compare it to the
 	 * counter.
 	 */
-	WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt);
+	WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
 
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
-- 
cgit v1.2.3


From 5ceb1a04187553e08c6ab60d30cee7c454ee139a Mon Sep 17 00:00:00 2001
From: Matt Fleming <mjf@gentoo.org>
Date: Sun, 2 Nov 2008 22:23:13 +0000
Subject: x86: HPET: enter hpet_interrupt_handler with interrupts disabled

Some functions that may be called from this handler require that
interrupts are disabled. Also, combining IRQF_DISABLED and
IRQF_SHARED does not reliably disable interrupts in a handler, so
remove IRQF_SHARED from the irq flags (this irq is not shared anyway).

Signed-off-by: Matt Fleming <mjf@gentoo.org>
Cc: mingo@elte.hu
Cc: venkatesh.pallipadi@intel.com
Cc: "Will Newton" <will.newton@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cfe6aa56f71b..067d8de913f6 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 {
 
 	if (request_irq(dev->irq, hpet_interrupt_handler,
-			IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+			IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
 		return -1;
 
 	disable_irq(dev->irq);
-- 
cgit v1.2.3


From 4694516d1987303dd83bfd0efdd36fa5b65d701b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 10 Nov 2008 21:52:47 +0100
Subject: x86: Make NUMA on 32-bit depend on BROKEN

While investigating the failure of hibernation on 32-bit x86 with
CONFIG_NUMA set, as described in this message
http://marc.info/?l=linux-kernel&m=122634118116226&w=4
I asked some people for help and I was told that it wasn't really
worth the effort, because CONFIG_NUMA was generally broken on 32-bit
x86 systems and it shouldn't be used in such configs.  For this
reason, make CONFIG_NUMA depend on BROKEN instead of EXPERIMENTAL on
x86-32.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Pavel Machek <pavel@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab13d187..93224b569187 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
 	depends on SMP
-	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
+	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
 	help
-- 
cgit v1.2.3


From 6cd10f8db385ba547811baa5b26f672fdff232e6 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 9 Nov 2008 11:53:14 -0600
Subject: x86, voyager: fix smp generic helper voyager breakage

Impact: build/boot fix for x86/Voyager

This change:

| commit 3d4422332711ef48ef0f132f1fcbfcbd56c7f3d1
| Author: Jens Axboe <jens.axboe@oracle.com>
| Date:   Thu Jun 26 11:21:34 2008 +0200
|
|     Add generic helpers for arch IPI function calls

didn't wire up the voyager smp call function correctly, so do that
here.  Also make CONFIG_USE_GENERIC_SMP_HELPERS a def_bool y again,
since we now use the generic helpers for every x86 architecture.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jens Axboe <Jens.Axboe@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                    |  5 ++++-
 arch/x86/mach-voyager/voyager_smp.c | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab13d187..ac22bb7719f7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -167,9 +167,12 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
-	select USE_GENERIC_SMP_HELPERS
 	default y
 
+config USE_GENERIC_SMP_HELPERS
+	def_bool y
+	depends on SMP
+
 config X86_32_SMP
 	def_bool y
 	depends on X86_32 && SMP
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0e331652681e..52145007bd7e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -7,6 +7,7 @@
  * This file provides all the same external entries as smp.c but uses
  * the voyager hal to provide the functionality
  */
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
@@ -1790,6 +1791,17 @@ void __init smp_setup_processor_id(void)
 	x86_write_percpu(cpu_number, hard_smp_processor_id());
 }
 
+static void voyager_send_call_func(cpumask_t callmask)
+{
+	__u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
+	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
+}
+
+static void voyager_send_call_func_single(int cpu)
+{
+	send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
+}
+
 struct smp_ops smp_ops = {
 	.smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = voyager_smp_prepare_cpus,
@@ -1799,6 +1811,6 @@ struct smp_ops smp_ops = {
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
 
-	.send_call_func_ipi = native_send_call_func_ipi,
-	.send_call_func_single_ipi = native_send_call_func_single_ipi,
+	.send_call_func_ipi = voyager_send_call_func,
+	.send_call_func_single_ipi = voyager_send_call_func_single,
 };
-- 
cgit v1.2.3


From c41ef344de212bd918f7765af21b5008628c03e0 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 28 Oct 2008 18:16:58 -0200
Subject: KVM: MMU: increase per-vcpu rmap cache alloc size

The page fault path can use two rmap_desc structures, if:

- walk_addr's dirty pte update allocates one rmap_desc.
- mmu_lock is dropped, sptes are zapped resulting in rmap_desc being
freed.
- fetch->mmu_set_spte allocates another rmap_desc.

Increase to 4 for safety.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a5e64881d9b..f1983d9477cd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 	if (r)
 		goto out;
 	r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
-				   rmap_desc_cache, 1);
+				   rmap_desc_cache, 4);
 	if (r)
 		goto out;
 	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
-- 
cgit v1.2.3


From a29a2af378f3f6362b68e126e2541c8bde885ead Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Wed, 29 Oct 2008 14:13:39 -0700
Subject: x86: KVM guest: fix section mismatch warning in kvmclock.c

WARNING: arch/x86/kernel/built-in.o(.text+0x1722c): Section mismatch
in reference from the function kvm_setup_secondary_clock() to the
function .devinit.text:setup_secondary_APIC_clock()
The function kvm_setup_secondary_clock() references
the function __devinit setup_secondary_APIC_clock().
This is often because kvm_setup_secondary_clock lacks a __devinit
annotation or the annotation of setup_secondary_APIC_clock is wrong.

Signed-off-by: Md.Rakib H. Mullick <rakib.mullick@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/kvmclock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 774ac4991568..1c9cc431ea4f 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static void kvm_setup_secondary_clock(void)
+static void __devinit kvm_setup_secondary_clock(void)
 {
 	/*
 	 * Now that the first cpu already had this clocksource initialized,
-- 
cgit v1.2.3


From ca93e992fdfdc6569ac2845d7560eeb5de4a4e0b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 4 Nov 2008 11:25:17 +0200
Subject: KVM: Require the PCI subsystem

PCI device assignment makes calls to pci code, so require it to be built
into the kernel.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/Kconfig | 2 ++
 arch/x86/kvm/Kconfig  | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 8e99fed6b3fd..f833a0b4188d 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM && EXPERIMENTAL
+	# for device assignment:
+	depends on PCI
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	---help---
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ce3251ce5504..b81125f0bdee 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
+	# for device assignment:
+	depends on PCI
 	select PREEMPT_NOTIFIERS
 	select MMU_NOTIFIER
 	select ANON_INODES
-- 
cgit v1.2.3


From 928d4bf747e9c290b690ff515d8f81e8ee226d97 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 6 Nov 2008 14:55:45 +0800
Subject: KVM: VMX: Set IGMT bit in EPT entry

There is a potential issue that, when guest using pagetable without vmexit when
EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory,
which would be inconsistent with host side and would cause host MCE due to
inconsistent cache attribute.

The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default
memory type to protect host (notice that all memory mapped by KVM should be WB).

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 arch/x86/kvm/vmx.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2643b430d83a..d06b4dc0e2ea 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3564,7 +3564,8 @@ static int __init vmx_init(void)
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK |
-			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
+			VMX_EPT_IGMT_BIT);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
 				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 3e010d21fdd7..ec5edc339da6 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
 #define VMX_EPT_READABLE_MASK			0x1ull
 #define VMX_EPT_WRITABLE_MASK			0x2ull
 #define VMX_EPT_EXECUTABLE_MASK			0x4ull
+#define VMX_EPT_IGMT_BIT    			(1ull << 6)
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
-- 
cgit v1.2.3


From e17d1dc0863767bab8fde4ba9be92c7f79e7fe50 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 11 Nov 2008 13:09:36 +0200
Subject: KVM: Fix pit memory leak if unable to allocate irq source id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported-By: Daniel Marjamäki <danielm77@spray.se>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8772dc946823..59ebd37ad79e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	mutex_lock(&kvm->lock);
 	pit->irq_source_id = kvm_request_irq_source_id(kvm);
 	mutex_unlock(&kvm->lock);
-	if (pit->irq_source_id < 0)
+	if (pit->irq_source_id < 0) {
+		kfree(pit);
 		return NULL;
+	}
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-- 
cgit v1.2.3


From 32836259ff25ce97010569706cd33ba94de81d62 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Wed, 5 Nov 2008 16:17:52 -0700
Subject: ACPI: pci_link: remove acpi_irq_balance_set() interface

This removes the acpi_irq_balance_set() interface from the PCI
interrupt link driver.

x86 used acpi_irq_balance_set() to tell the PCI interrupt link
driver to configure links to minimize IRQ sharing.  But the link
driver can easily figure out whether to turn on IRQ balancing
based on the IRQ model (PIC/IOAPIC/etc), so we can get rid of
that external interface.

It's better for the driver to figure this out at init-time.  If
we set it externally via the x86 code, the interface reduces
modularity, and we depend on the fact that acpi_process_madt()
happens before we process the kernel command line.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/acpi.h |  1 -
 arch/x86/kernel/acpi/boot.c |  1 -
 drivers/acpi/pci_link.c     | 11 +++++++++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d676d8ecde9..9830681446ad 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void)
 	acpi_pci_disabled = 1;
 	acpi_noirq_set();
 }
-extern int acpi_irq_balance_set(char *str);
 
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c1f76abae9e..4c51a2f8fd31 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void)
 			error = acpi_parse_madt_ioapic_entries();
 			if (!error) {
 				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
-				acpi_irq_balance_set(NULL);
 				acpi_ioapic = 1;
 
 				smp_found_config = 1;
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index fcfdef7b4fdd..e52ad91ce2dc 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -531,7 +531,7 @@ int __init acpi_irq_penalty_init(void)
 	return 0;
 }
 
-static int acpi_irq_balance;	/* 0: static, 1: balance */
+static int acpi_irq_balance = -1;	/* 0: static, 1: balance */
 
 static int acpi_pci_link_allocate(struct acpi_pci_link *link)
 {
@@ -950,10 +950,17 @@ device_initcall(irqrouter_init_sysfs);
 
 static int __init acpi_pci_link_init(void)
 {
-
 	if (acpi_noirq)
 		return 0;
 
+	if (acpi_irq_balance == -1) {
+		/* no command line switch: enable balancing in IOAPIC mode */
+		if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
+			acpi_irq_balance = 1;
+		else
+			acpi_irq_balance = 0;
+	}
+
 	acpi_link.count = 0;
 	INIT_LIST_HEAD(&acpi_link.entries);
 
-- 
cgit v1.2.3


From 97a70e548bd97d5a46ae9d44f24aafcc013fd701 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 12 Nov 2008 23:22:35 +0100
Subject: x86, hibernate: fix breakage on x86_32 with CONFIG_NUMA set

Impact: fix crash during hibernation on 32-bit NUMA

The NUMA code on x86_32 creates special memory mapping that allows
each node's pgdat to be located in this node's memory.  For this
purpose it allocates a memory area at the end of each node's memory
and maps this area so that it is accessible with virtual addresses
belonging to low memory.  As a result, if there is high memory,
these NUMA-allocated areas are physically located in high memory,
although they are mapped to low memory addresses.

Our hibernation code does not take that into account and for this
reason hibernation fails on all x86_32 systems with CONFIG_NUMA=y and
with high memory present.  Fix this by adding a special mapping for
the NUMA-allocated memory areas to the temporary page tables created
during the last phase of resume.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mmzone_32.h |  4 ++++
 arch/x86/mm/numa_32.c            | 35 +++++++++++++++++++++++++++++++++++
 arch/x86/power/hibernate_32.c    |  4 ++++
 3 files changed, 43 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 485bdf059ffb..07f1af494ca5 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)
 
 extern int early_pfn_to_nid(unsigned long pfn);
 
+extern void resume_map_numa_kva(pgd_t *pgd);
+
 #else /* !CONFIG_NUMA */
 
 #define get_memcfg_numa get_memcfg_numa_flat
 
+static inline void resume_map_numa_kva(pgd_t *pgd) {}
+
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f4..8518c678d83f 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
 	}
 }
 
+#ifdef CONFIG_HIBERNATION
+/**
+ * resume_map_numa_kva - add KVA mapping to the temporary page tables created
+ *                       during resume from hibernation
+ * @pgd_base - temporary resume page directory
+ */
+void resume_map_numa_kva(pgd_t *pgd_base)
+{
+	int node;
+
+	for_each_online_node(node) {
+		unsigned long start_va, start_pfn, size, pfn;
+
+		start_va = (unsigned long)node_remap_start_vaddr[node];
+		start_pfn = node_remap_start_pfn[node];
+		size = node_remap_size[node];
+
+		printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+
+		for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+			unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
+			pgd_t *pgd = pgd_base + pgd_index(vaddr);
+			pud_t *pud = pud_offset(pgd, vaddr);
+			pmd_t *pmd = pmd_offset(pud, vaddr);
+
+			set_pmd(pmd, pfn_pmd(start_pfn + pfn,
+						PAGE_KERNEL_LARGE_EXEC));
+
+			printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
+				__FUNCTION__, vaddr, start_pfn + pfn);
+		}
+	}
+}
+#endif
+
 static unsigned long calculate_numa_remap_pages(void)
 {
 	int nid;
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f2b6e3f11bfc..81197c62d5b3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
 #include <asm/system.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/mmzone.h>
 
 /* Defined in hibernate_asm_32.S */
 extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
 			}
 		}
 	}
+
+	resume_map_numa_kva(pgd_base);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 604d20554883cf03f888440d58ea7c6d36899839 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 12 Nov 2008 23:26:14 +0100
Subject: x86: make NUMA on 32-bit depend on EXPERIMENTAL again

My previous patch to make CONFIG_NUMA on x86_32 depend on BROKEN
turned out to be unnecessary, after all, since the source of the
hibernation vs CONFIG_NUMA problem turned out to be the fact that
we didn't take the NUMA KVA remapping into account in the
hibernation code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..4cf0ab13d187 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
 	depends on SMP
-	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN)
+	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
 	help
-- 
cgit v1.2.3


From 52168e60f7d86d83124903098ac8c2dba93cd1c4 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 14 Nov 2008 13:47:31 +0000
Subject: Revert "x86: blacklist DMAR on Intel G31/G33 chipsets"

This reverts commit e51af6630848406fc97adbd71443818cdcda297b, which was
wrongly hoovered up and submitted about a month after a better fix had
already been merged.

The better fix is commit cbda1ba898647aeb4ee770b803c922f595e97731
("PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets"), where we do
this blacklisting based on the DMI identification for the offending
motherboard, since sometimes this chipset (or at least a chipset with
the same PCI ID) apparently _does_ actually have an IOMMU.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/iommu.h   |  1 -
 arch/x86/kernel/early-quirks.c | 18 ------------------
 2 files changed, 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index e4a552d44465..0b500c5b6446 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,7 +6,6 @@ extern void no_iommu_init(void);
 extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-extern int dmar_disabled;
 
 extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
 
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3ce029ffaa55..1b894b72c0f5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
 }
 #endif
 
-#ifdef CONFIG_DMAR
-static void __init intel_g33_dmar(int num, int slot, int func)
-{
-	struct acpi_table_header *dmar_tbl;
-	acpi_status status;
-
-	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
-	if (ACPI_SUCCESS(status)) {
-		printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
-		dmar_disabled = 1;
-	}
-}
-#endif
-
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
 	{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
-#ifdef CONFIG_DMAR
-	{ PCI_VENDOR_ID_INTEL, 0x29c0,
-	  PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
-#endif
 	{}
 };
 
-- 
cgit v1.2.3


From d1f1e9c01006b4b050e090055c75278f80c2a5c5 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@googlemail.com>
Date: Sat, 15 Nov 2008 11:00:17 +0100
Subject: x86, bts: fix unlock problem in ds.c

Fix a problem where ds_request() returned an error without releasing the
ds lock.

Reported-by: Stephane Eranian <eranian@gmail.com>
Signed-off-by: Markus Metzger <markus.t.metzger@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a8..ac1d5b0586ba 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -384,8 +384,9 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
 
 	spin_lock(&ds_lock);
 
+	error = -EPERM;
 	if (!check_tracer(task))
-		return -EPERM;
+		goto out_unlock;
 
 	error = -ENOMEM;
 	context = ds_alloc_context(task);
-- 
cgit v1.2.3


From d3c6aa1e69f705ac3ab64584101b1d38435b1353 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 16 Nov 2008 00:49:31 -0800
Subject: x86: fix es7000 compiling

Impact: fix es7000 build

  CC      arch/x86/kernel/es7000_32.o
arch/x86/kernel/es7000_32.c: In function find_unisys_acpi_oem_table:
arch/x86/kernel/es7000_32.c:255: error: implicit declaration of function acpi_get_table_with_size
arch/x86/kernel/es7000_32.c:261: error: implicit declaration of function early_acpi_os_unmap_memory
arch/x86/kernel/es7000_32.c: In function unmap_unisys_acpi_oem_table:
arch/x86/kernel/es7000_32.c:277: error: implicit declaration of function __acpi_unmap_table
make[1]: *** [arch/x86/kernel/es7000_32.o] Error 1

we applied one patch out of order...

| commit a73aaedd95703bd49f4c3f9df06fb7b7373ba905
| Author: Yinghai Lu <yhlu.kernel@gmail.com>
| Date:   Sun Sep 14 02:33:14 2008 -0700
|
|    x86: check dsdt before find oem table for es7000, v2
|
|    v2: use __acpi_unmap_table()

that patch need:

	x86: use early_ioremap in __acpi_map_table
	x86: always explicitly map acpi memory
	acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap
	acpi/x86: introduce __apci_map_table, v4

submitted to the ACPI tree but not upstream yet.

fix it until those patches applied, need to revert this one

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/es7000_32.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef6..0aa2c443d600 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -250,31 +250,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
 {
 	struct acpi_table_header *header = NULL;
 	int i = 0;
-	acpi_size tbl_size;
 
-	while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
+	while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
 		if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
 			struct oem_table *t = (struct oem_table *)header;
 
 			oem_addrX = t->OEMTableAddr;
 			oem_size = t->OEMTableSize;
-			early_acpi_os_unmap_memory(header, tbl_size);
 
 			*oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
 								    oem_size);
 			return 0;
 		}
-		early_acpi_os_unmap_memory(header, tbl_size);
 	}
 	return -1;
 }
 
 void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
 {
-	if (!oem_addr)
-		return;
-
-	__acpi_unmap_table((char *)oem_addr, oem_size);
 }
 #endif
 
-- 
cgit v1.2.3


From 93ce99e849433ede4ce8b410b749dc0cad1100b2 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 17 Nov 2008 14:43:58 -0800
Subject: x86: add rdtsc barrier to TSC sync check

Impact: fix incorrectly marked unstable TSC clock

Patch (commit 0d12cdd "sched: improve sched_clock() performance") has
a regression on one of the test systems here.

With the patch, I see:

 checking TSC synchronization [CPU#0 -> CPU#1]:
 Measured 28 cycles TSC warp between CPUs, turning off TSC clock.
 Marking TSC unstable due to check_tsc_sync_source failed

Whereas, without the patch syncs pass fine on all CPUs:

 checking TSC synchronization [CPU#0 -> CPU#1]: passed.

Due to this, TSC is marked unstable, when it is not actually unstable.
This is because syncs in check_tsc_wrap() goes away due to this commit.

As per the discussion on this thread, correct way to fix this is to add
explicit syncs as below?

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc_sync.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c40..1c0dfbca87c1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void)
 	cycles_t start, now, prev, end;
 	int i;
 
+	rdtsc_barrier();
 	start = get_cycles();
+	rdtsc_barrier();
 	/*
 	 * The measurement runs for 20 msecs:
 	 */
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void)
 		 */
 		__raw_spin_lock(&sync_lock);
 		prev = last_tsc;
+		rdtsc_barrier();
 		now = get_cycles();
+		rdtsc_barrier();
 		last_tsc = now;
 		__raw_spin_unlock(&sync_lock);
 
-- 
cgit v1.2.3


From 10db4ef7b9a65b86e4d047671a1886f4c101a859 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 18 Nov 2008 15:23:08 +0100
Subject: x86, PEBS/DS: fix code flow in ds_request()

this compiler warning:

  arch/x86/kernel/ds.c: In function 'ds_request':
  arch/x86/kernel/ds.c:368: warning: 'context' may be used uninitialized in this function

Shows that the code flow in ds_request() is buggy - it goes into
the unlock+release-context path even when the context is not allocated
yet.

First allocate the context, then do the other checks.

Also, take care with GFP allocations under the ds_lock spinlock.

Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index ac1d5b0586ba..d1a121443bde 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -236,17 +236,33 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 	struct ds_context *context = *p_context;
 
 	if (!context) {
+		spin_unlock(&ds_lock);
+
 		context = kzalloc(sizeof(*context), GFP_KERNEL);
 
-		if (!context)
+		if (!context) {
+			spin_lock(&ds_lock);
 			return NULL;
+		}
 
 		context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
 		if (!context->ds) {
 			kfree(context);
+			spin_lock(&ds_lock);
 			return NULL;
 		}
 
+		spin_lock(&ds_lock);
+		/*
+		 * Check for race - another CPU could have allocated
+		 * it meanwhile:
+		 */
+		if (*p_context) {
+			kfree(context->ds);
+			kfree(context);
+			return *p_context;
+		}
+
 		*p_context = context;
 
 		context->this = p_context;
@@ -384,15 +400,15 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
 
 	spin_lock(&ds_lock);
 
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-
 	error = -ENOMEM;
 	context = ds_alloc_context(task);
 	if (!context)
 		goto out_unlock;
 
+	error = -EPERM;
+	if (!check_tracer(task))
+		goto out_unlock;
+
 	error = -EALREADY;
 	if (context->owner[qual] == current)
 		goto out_unlock;
-- 
cgit v1.2.3


From e5e1f606ecbf67e52ebe2df5d14f8b94ec6544d0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 15:07:17 +0100
Subject: AMD IOMMU: add parameter to disable device isolation

Impact: add a new AMD IOMMU kernel command line parameter

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/kernel-parameters.txt | 2 ++
 arch/x86/kernel/amd_iommu_init.c    | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9fa6508892c2..b56ee02d2514 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -295,6 +295,8 @@ and is between 256 and 4096 characters. It is defined in the file
 			isolate - enable device isolation (each device, as far
 			          as possible, will get its own protection
 			          domain)
+			share - put every device behind one IOMMU into the
+				same protection domain
 			fullflush - enable flushing of IO/TLB entries when
 				    they are unmapped. Otherwise they are
 				    flushed before they will be reused, which
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05f..838a2e1d5bb2 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1213,6 +1213,8 @@ static int __init parse_amd_iommu_options(char *str)
 	for (; *str; ++str) {
 		if (strncmp(str, "isolate", 7) == 0)
 			amd_iommu_isolate = 1;
+		if (strncmp(str, "share", 5) == 0)
+			amd_iommu_isolate = 0;
 		if (strncmp(str, "fullflush", 11) == 0)
 			amd_iommu_unmap_flush = true;
 	}
-- 
cgit v1.2.3


From 3ce1f93c6d53c3f91c3846cf66b018276c8ac2e7 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 15:09:20 +0100
Subject: AMD IOMMU: enable device isolation per default

Impact: makes device isolation the default for AMD IOMMU

Some device drivers showed double-free bugs of DMA memory while testing
them with AMD IOMMU. If all devices share the same protection domain
this can lead to data corruption and data loss. Prevent this by putting
each device into its own protection domain per default.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/kernel-parameters.txt | 2 +-
 arch/x86/kernel/amd_iommu_init.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b56ee02d2514..f2e1e7fc0ffd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -294,7 +294,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			Possible values are:
 			isolate - enable device isolation (each device, as far
 			          as possible, will get its own protection
-			          domain)
+			          domain) [default]
 			share - put every device behind one IOMMU into the
 				same protection domain
 			fullflush - enable flushing of IO/TLB entries when
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 838a2e1d5bb2..595edd2befc6 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate;			/* if 1, device isolation is enabled */
+int amd_iommu_isolate = 1;		/* if 1, device isolation is enabled */
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
-- 
cgit v1.2.3


From 695b5676c727d80921a2dc8737d5b3322222db85 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 15:16:43 +0100
Subject: AMD IOMMU: fix fullflush comparison length

Impact: fix comparison length for 'fullflush'

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 595edd2befc6..30ae2701b3df 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1215,7 +1215,7 @@ static int __init parse_amd_iommu_options(char *str)
 			amd_iommu_isolate = 1;
 		if (strncmp(str, "share", 5) == 0)
 			amd_iommu_isolate = 0;
-		if (strncmp(str, "fullflush", 11) == 0)
+		if (strncmp(str, "fullflush", 9) == 0)
 			amd_iommu_unmap_flush = true;
 	}
 
-- 
cgit v1.2.3


From 8501c45cc32c311ae755a2d5ac8c4a5f04908d42 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 19:11:46 +0100
Subject: AMD IOMMU: check for next_bit also in unmapped area

Impact: fix possible use of stale IO/TLB entries

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 331b318304eb..e4899e0e8787 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -537,7 +537,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 	address >>= PAGE_SHIFT;
 	iommu_area_free(dom->bitmap, address, pages);
 
-	if (address + pages >= dom->next_bit)
+	if (address >= dom->next_bit)
 		dom->need_flush = true;
 }
 
-- 
cgit v1.2.3


From 0af40a4b1050c050e62eb1dc30b82d5ab22bf221 Mon Sep 17 00:00:00 2001
From: Philipp Kohlbecher <xt28@gmx.de>
Date: Sun, 16 Nov 2008 12:11:01 +0100
Subject: x86: more general identifier for Phoenix BIOS

Impact: widen the reach of the low-memory-protect DMI quirk

Phoenix BIOSes variously identify their vendor as "Phoenix Technologies,
LTD" or "Phoenix Technologies LTD" (without the comma.)

This patch makes the identification string in the bad_bios_dmi_table
more general (following a suggestion by Ingo Molnar), so that both
versions are handled.

Again, the patched file compiles cleanly and the patch has been tested
successfully on my machine.

Signed-off-by: Philipp Kohlbecher <xt28@gmx.de>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..9d5674f7b6cc 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -764,7 +764,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 		.callback = dmi_low_memory_corruption,
 		.ident = "Phoenix BIOS",
 		.matches = {
-			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
 		},
 	},
 #endif
-- 
cgit v1.2.3


From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 19 Nov 2008 15:36:14 -0800
Subject: reintroduce accept4

Introduce a new accept4() system call.  The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument.  Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)

SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4().  This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec().  More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).

The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.

Here's a test program.  Works on x86-32.  Should work on x86-64, but
I (mtk) don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

  Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
       <mtk.manpages@gmail.com>

  Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
  accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC    O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK   O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
   printf("Calling accept4(): flags = %x", flags);
   if (flags != 0) {
       printf(" (");
       if (flags & SOCK_CLOEXEC)
           printf("SOCK_CLOEXEC");
       if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
           printf(" ");
       if (flags & SOCK_NONBLOCK)
           printf("SOCK_NONBLOCK");
       printf(")");
   }
   printf("\n");

#if USE_SOCKETCALL
   long args[6];

   args[0] = fd;
   args[1] = (long) sockaddr;
   args[2] = (long) addrlen;
   args[3] = flags;

   return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
   return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/

static int
do_test(int lfd, struct sockaddr_in *conn_addr,
       int closeonexec_flag, int nonblock_flag)
{
   int connfd, acceptfd;
   int fdf, flf, fdf_pass, flf_pass;
   struct sockaddr_in claddr;
   socklen_t addrlen;

   printf("=======================================\n");

   connfd = socket(AF_INET, SOCK_STREAM, 0);
   if (connfd == -1)
       die("socket");
   if (connect(connfd, (struct sockaddr *) conn_addr,
               sizeof(struct sockaddr_in)) == -1)
       die("connect");

   addrlen = sizeof(struct sockaddr_in);
   acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
                      closeonexec_flag | nonblock_flag);
   if (acceptfd == -1) {
       perror("accept4()");
       close(connfd);
       return 0;
   }

   fdf = fcntl(acceptfd, F_GETFD);
   if (fdf == -1)
       die("fcntl:F_GETFD");
   fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
              ((closeonexec_flag & SOCK_CLOEXEC) != 0);
   printf("Close-on-exec flag is %sset (%s); ",
           (fdf & FD_CLOEXEC) ? "" : "not ",
           fdf_pass ? "OK" : "failed");

   flf = fcntl(acceptfd, F_GETFL);
   if (flf == -1)
       die("fcntl:F_GETFD");
   flf_pass = ((flf & O_NONBLOCK) != 0) ==
              ((nonblock_flag & SOCK_NONBLOCK) !=0);
   printf("nonblock flag is %sset (%s)\n",
           (flf & O_NONBLOCK) ? "" : "not ",
           flf_pass ? "OK" : "failed");

   close(acceptfd);
   close(connfd);

   printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
   return fdf_pass && flf_pass;
}

static int
create_listening_socket(int port_num)
{
   struct sockaddr_in svaddr;
   int lfd;
   int optval;

   memset(&svaddr, 0, sizeof(struct sockaddr_in));
   svaddr.sin_family = AF_INET;
   svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
   svaddr.sin_port = htons(port_num);

   lfd = socket(AF_INET, SOCK_STREAM, 0);
   if (lfd == -1)
       die("socket");

   optval = 1;
   if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
                  sizeof(optval)) == -1)
       die("setsockopt");

   if (bind(lfd, (struct sockaddr *) &svaddr,
            sizeof(struct sockaddr_in)) == -1)
       die("bind");

   if (listen(lfd, 5) == -1)
       die("listen");

   return lfd;
}

int
main(int argc, char *argv[])
{
   struct sockaddr_in conn_addr;
   int lfd;
   int port_num;
   int passed;

   passed = 1;

   port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

   memset(&conn_addr, 0, sizeof(struct sockaddr_in));
   conn_addr.sin_family = AF_INET;
   conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   conn_addr.sin_port = htons(port_num);

   lfd = create_listening_socket(port_num);

   if (!do_test(lfd, &conn_addr, 0, 0))
       passed = 0;
   if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
       passed = 0;
   if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
       passed = 0;
   if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
       passed = 0;

   close(lfd);

   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/unistd_64.h |  4 +-
 include/linux/net.h              |  6 +--
 include/linux/syscalls.h         |  3 +-
 kernel/sys_ni.c                  |  2 +-
 net/compat.c                     | 50 +++----------------------
 net/socket.c                     | 80 +++++-----------------------------------
 6 files changed, 21 insertions(+), 124 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1d89fb..d2e415e6666f 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept				288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4				288
+__SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
diff --git a/include/linux/net.h b/include/linux/net.h
index 6dc14a240042..4515efae4c39 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -40,7 +40,7 @@
 #define SYS_GETSOCKOPT	15		/* sys_getsockopt(2)		*/
 #define SYS_SENDMSG	16		/* sys_sendmsg(2)		*/
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
-#define SYS_PACCEPT	18		/* sys_paccept(2)		*/
+#define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
@@ -100,7 +100,7 @@ enum sock_type {
  * remaining bits are used as flags. */
 #define SOCK_TYPE_MASK 0xf
 
-/* Flags for socket, socketpair, paccept */
+/* Flags for socket, socketpair, accept4 */
 #define SOCK_CLOEXEC	O_CLOEXEC
 #ifndef SOCK_NONBLOCK
 #define SOCK_NONBLOCK	O_NONBLOCK
@@ -223,8 +223,6 @@ extern int 	     sock_map_fd(struct socket *sock, int flags);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
-extern long	     do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-			       int __user *upeer_addrlen, int flags);
 
 #define net_random()		random32()
 #define net_srandom(seed)	srandom32((__force u32)seed)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d6ff145919ca..04fb47bfb920 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
 asmlinkage long sys_connect(int, struct sockaddr __user *, int);
 asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
-asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-			    const __user sigset_t *, size_t, int);
+asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
 asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a77b27b11b04..e14a23281707 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
 cond_syscall(sys_listen);
 cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
 cond_syscall(sys_connect);
 cond_syscall(sys_getsockname);
 cond_syscall(sys_getpeername);
diff --git a/net/compat.c b/net/compat.c
index 6ce1a1cadcc0..a3a2ba0fac08 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(6)};
+				AL(4)};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
-asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-				   int __user *upeer_addrlen,
-				   const compat_sigset_t __user *sigmask,
-				   compat_size_t sigsetsize, int flags)
-{
-	compat_sigset_t ss32;
-	sigset_t ksigmask, sigsaved;
-	int ret;
-
-	if (sigmask) {
-		if (sigsetsize != sizeof(compat_sigset_t))
-			return -EINVAL;
-		if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
-			return -EFAULT;
-		sigset_from_compat(&ksigmask, &ss32);
-
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-	}
-
-	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
-	if (ret == -ERESTARTNOHAND) {
-		/*
-		 * Don't restore the signal mask yet. Let do_signal() deliver
-		 * the signal on the way back to userspace, before the signal
-		 * mask is restored.
-		 */
-		if (sigmask) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		}
-	} else if (sigmask)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return ret;
-}
-
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_PACCEPT)
+	if (call < SYS_SOCKET || call > SYS_ACCEPT4)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 		ret = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
 		break;
 	case SYS_GETSOCKNAME:
 		ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
-	case SYS_PACCEPT:
-		ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
-					 compat_ptr(a[3]), a[4], a[5]);
+	case SYS_ACCEPT4:
+		ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/net/socket.c b/net/socket.c
index 57550c3bcabe..92764d836891 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1426,8 +1426,8 @@ asmlinkage long sys_listen(int fd, int backlog)
  *	clean when we restucture accept also.
  */
 
-long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
-	       int __user *upeer_addrlen, int flags)
+asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+			    int __user *upeer_addrlen, int flags)
 {
 	struct socket *sock, *newsock;
 	struct file *newfile;
@@ -1510,66 +1510,10 @@ out_fd:
 	goto out_put;
 }
 
-#if 0
-#ifdef HAVE_SET_RESTORE_SIGMASK
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-			    int __user *upeer_addrlen,
-			    const sigset_t __user *sigmask,
-			    size_t sigsetsize, int flags)
-{
-	sigset_t ksigmask, sigsaved;
-	int ret;
-
-	if (sigmask) {
-		/* XXX: Don't preclude handling different sized sigset_t's.  */
-		if (sigsetsize != sizeof(sigset_t))
-			return -EINVAL;
-		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
-			return -EFAULT;
-
-		sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
-		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
-        }
-
-	ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
-	if (ret < 0 && signal_pending(current)) {
-		/*
-		 * Don't restore the signal mask yet. Let do_signal() deliver
-		 * the signal on the way back to userspace, before the signal
-		 * mask is restored.
-		 */
-		if (sigmask) {
-			memcpy(&current->saved_sigmask, &sigsaved,
-			       sizeof(sigsaved));
-			set_restore_sigmask();
-		}
-	} else if (sigmask)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return ret;
-}
-#else
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
-			    int __user *upeer_addrlen,
-			    const sigset_t __user *sigmask,
-			    size_t sigsetsize, int flags)
-{
-	/* The platform does not support restoring the signal mask in the
-	 * return path.  So we do not allow using paccept() with a signal
-	 * mask.  */
-	if (sigmask)
-		return -EINVAL;
-
-	return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-}
-#endif
-#endif
-
 asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 			   int __user *upeer_addrlen)
 {
-	return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
 }
 
 /*
@@ -2096,7 +2040,7 @@ static const unsigned char nargs[19]={
 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
 	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(6)
+	AL(4)
 };
 
 #undef AL
@@ -2115,7 +2059,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	unsigned long a0, a1;
 	int err;
 
-	if (call < 1 || call > SYS_PACCEPT)
+	if (call < 1 || call > SYS_ACCEPT4)
 		return -EINVAL;
 
 	/* copy_from_user should be SMP safe. */
@@ -2143,9 +2087,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 		err = sys_listen(a0, a1);
 		break;
 	case SYS_ACCEPT:
-		err =
-		    do_accept(a0, (struct sockaddr __user *)a1,
-			      (int __user *)a[2], 0);
+		err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], 0);
 		break;
 	case SYS_GETSOCKNAME:
 		err =
@@ -2192,12 +2135,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
-	case SYS_PACCEPT:
-		err =
-		    sys_paccept(a0, (struct sockaddr __user *)a1,
-			        (int __user *)a[2],
-				(const sigset_t __user *) a[3],
-				a[4], a[5]);
+	case SYS_ACCEPT4:
+		err = sys_accept4(a0, (struct sockaddr __user *)a1,
+				  (int __user *)a[2], a[3]);
 		break;
 	default:
 		err = -EINVAL;
-- 
cgit v1.2.3