From 7760ec77ab2a9e48bdd0d13341446a8a51f0b9f1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 18:10:13 +0530
Subject: x86: smp.h remove obsolete function declaration

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/smp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 830b9fcb6427..83a4cc074315 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -22,7 +22,6 @@ extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
 extern cpumask_t cpu_callin_map;
 
-extern void (*mtrr_hook)(void);
 extern void zap_low_mappings(void);
 
 extern int __cpuinit get_local_pda(int cpu);
-- 
cgit v1.2.3


From dacf7333571d770366bff74d10b56aa545434605 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 17:26:35 +0530
Subject: x86: smp.h move zap_low_mappings declartion to tlbflush.h

Impact: cleanup, moving NON-SMP stuff from smp.h

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/smp.h      | 2 --
 arch/x86/include/asm/tlbflush.h | 2 ++
 arch/x86/mm/init_32.c           | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 83a4cc074315..64c9e848f137 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -22,8 +22,6 @@ extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
 extern cpumask_t cpu_callin_map;
 
-extern void zap_low_mappings(void);
-
 extern int __cpuinit get_local_pda(int cpu);
 
 extern int smp_num_siblings;
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e7bbb549116..aed0b700b837 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -175,4 +175,6 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 	flush_tlb_all();
 }
 
+extern void zap_low_mappings(void);
+
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f99a6c6c432e..a9dd0b7ad618 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,7 +49,6 @@
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
-#include <asm/smp.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
-- 
cgit v1.2.3


From 6e5385d44b2df05e50a8d07ba0e14d3e32685237 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 18:11:35 +0530
Subject: x86: smp.h move prefill_possible_map declartion to cpu.h

Impact: cleanup, moving NON-SMP stuff from smp.h

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpu.h | 10 ++++++++++
 arch/x86/include/asm/smp.h |  6 ------
 arch/x86/kernel/setup.c    |  2 +-
 arch/x86/kernel/smpboot.c  |  1 -
 4 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bae482df6039..29aa6d0752b9 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,16 @@
 #include <linux/nodemask.h>
 #include <linux/percpu.h>
 
+#ifdef CONFIG_SMP
+
+extern void prefill_possible_map(void);
+
+#else /* CONFIG_SMP */
+
+static inline void prefill_possible_map(void) {}
+
+#endif /* CONFIG_SMP */
+
 struct x86_cpu {
 	struct cpu cpu;
 };
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 64c9e848f137..62bd3f68269a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -138,8 +138,6 @@ void play_dead_common(void);
 void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
 
-extern void prefill_possible_map(void);
-
 void smp_store_cpu_info(int id);
 #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
@@ -148,10 +146,6 @@ static inline int num_booting_cpus(void)
 {
 	return cpus_weight(cpu_callout_map);
 }
-#else
-static inline void prefill_possible_map(void)
-{
-}
 #endif /* CONFIG_SMP */
 
 extern unsigned disabled_cpus __cpuinitdata;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf69..f41c4486c270 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -89,7 +89,7 @@
 
 #include <asm/system.h>
 #include <asm/vsyscall.h>
-#include <asm/smp.h>
+#include <asm/cpu.h>
 #include <asm/desc.h>
 #include <asm/dma.h>
 #include <asm/iommu.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 07576bee03ef..f8c885bed18c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,7 +53,6 @@
 #include <asm/nmi.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
-#include <asm/smp.h>
 #include <asm/trampoline.h>
 #include <asm/cpu.h>
 #include <asm/numa.h>
-- 
cgit v1.2.3


From f472cdba849cc3d838f3788469316e8572463a8c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 21:34:25 +0530
Subject: x86: smp.h move stack_processor_id declartion to cpu.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpu.h   | 2 ++
 arch/x86/include/asm/smp.h   | 1 -
 arch/x86/kernel/cpu/common.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 29aa6d0752b9..f958e7e49c05 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -15,6 +15,8 @@ extern void prefill_possible_map(void);
 
 static inline void prefill_possible_map(void) {}
 
+#define stack_smp_processor_id()		0
+
 #endif /* CONFIG_SMP */
 
 struct x86_cpu {
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 62bd3f68269a..ed4af9a89cfd 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -173,7 +173,6 @@ extern int safe_smp_processor_id(void);
 #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
 #define cpu_physical_id(cpu)		boot_cpu_physical_apicid
 #define safe_smp_processor_id()		0
-#define stack_smp_processor_id() 	0
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3f95a40f718a..f7619a2eaffe 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,6 +21,7 @@
 #include <asm/asm.h>
 #include <asm/numa.h>
 #include <asm/smp.h>
+#include <asm/cpu.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/mpspec.h>
 #include <asm/apic.h>
-- 
cgit v1.2.3


From 96b89dc6598a50e3aac8e2c6d826ae3795b7d030 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 21:35:48 +0530
Subject: x86: smp.h move safe_smp_processor_id declartion to cpu.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpu.h    | 1 +
 arch/x86/include/asm/smp.h    | 1 -
 arch/x86/kernel/crash.c       | 2 +-
 arch/x86/kernel/reboot.c      | 1 +
 arch/x86/mach-voyager/setup.c | 1 +
 5 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index f958e7e49c05..4c16888ffa3f 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -15,6 +15,7 @@ extern void prefill_possible_map(void);
 
 static inline void prefill_possible_map(void) {}
 
+#define safe_smp_processor_id()			0
 #define stack_smp_processor_id()		0
 
 #endif /* CONFIG_SMP */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ed4af9a89cfd..c92b93594ab3 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -172,7 +172,6 @@ extern int safe_smp_processor_id(void);
 
 #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
 #define cpu_physical_id(cpu)		boot_cpu_physical_apicid
-#define safe_smp_processor_id()		0
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..11b93cabdf78 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,7 +24,7 @@
 #include <asm/apic.h>
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
-#include <asm/smp.h>
+#include <asm/cpu.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..f8536fee5c12 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
 #include <asm/reboot.h>
 #include <asm/pci_x86.h>
 #include <asm/virtext.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/dmi.h>
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index a580b9562e76..0ade62555ff3 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -9,6 +9,7 @@
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/setup.h>
+#include <asm/cpu.h>
 
 void __init pre_intr_init_hook(void)
 {
-- 
cgit v1.2.3


From af8968abf09fe5984bdd206e54e0eeb1dc1fa29c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 21:37:33 +0530
Subject: x86: smp.h move cpu_physical_id declartion to cpu.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpu.h | 1 +
 arch/x86/include/asm/smp.h | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 4c16888ffa3f..89edafb93390 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -15,6 +15,7 @@ extern void prefill_possible_map(void);
 
 static inline void prefill_possible_map(void) {}
 
+#define cpu_physical_id(cpu)			boot_cpu_physical_apicid
 #define safe_smp_processor_id()			0
 #define stack_smp_processor_id()		0
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c92b93594ab3..c975b6f83c68 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -170,8 +170,6 @@ extern int safe_smp_processor_id(void);
 })
 #define safe_smp_processor_id()		smp_processor_id()
 
-#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
-#define cpu_physical_id(cpu)		boot_cpu_physical_apicid
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-- 
cgit v1.2.3


From 6d652ea1d056390a0c33db92b44ed219284b71af Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Wed, 7 Jan 2009 21:38:59 +0530
Subject: x86: smp.h move boot_cpu_id declartion to cpu.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpu.h   | 7 +++++++
 arch/x86/include/asm/smp.h   | 6 ------
 arch/x86/kernel/io_apic.c    | 1 +
 drivers/pci/intr_remapping.c | 1 +
 4 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 89edafb93390..f03b23e32864 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -31,4 +31,11 @@ extern void arch_unregister_cpu(int);
 #endif
 
 DECLARE_PER_CPU(int, cpu_state);
+
+#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
+extern unsigned char boot_cpu_id;
+#else
+#define boot_cpu_id				0
+#endif
+
 #endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c975b6f83c68..74ad9ef6ae02 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -212,11 +212,5 @@ static inline int hard_smp_processor_id(void)
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
-#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
-extern unsigned char boot_cpu_id;
-#else
-#define boot_cpu_id	0
-#endif
-
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 1c4a1302536c..109c91db2026 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -46,6 +46,7 @@
 #include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/smp.h>
+#include <asm/cpu.h>
 #include <asm/desc.h>
 #include <asm/proto.h>
 #include <asm/acpi.h>
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f78371b22529..5a57753ea9fc 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -6,6 +6,7 @@
 #include <linux/irq.h>
 #include <asm/io_apic.h>
 #include <asm/smp.h>
+#include <asm/cpu.h>
 #include <linux/intel-iommu.h>
 #include "intr_remapping.h"
 
-- 
cgit v1.2.3


From 41401db698cbb5d1869776bf336881db267e7d19 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Thu, 8 Jan 2009 15:42:46 +0530
Subject: x86: rename intel_mp_floating to mpf_intel

Impact: cleanup, solve 80 columns wrap problems

intel_mp_floating should be renamed to mpf_intel.

The reason: the 'f' in MPF already means 'floating'
which means MP Floating pointer structure -
no need to repeat that in the type name.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h |  3 ++-
 arch/x86/kernel/mpparse.c         | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 59568bc4767f..187dc9201932 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -24,7 +24,8 @@
 # endif
 #endif
 
-struct intel_mp_floating {
+/* Intel MP Floating Pointer Structure */
+struct mpf_intel {
 	char mpf_signature[4];		/* "_MP_"			*/
 	unsigned int mpf_physptr;	/* Configuration table address	*/
 	unsigned char mpf_length;	/* Our length (paragraphs)	*/
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index c0601c2848a1..6cea941c4dbb 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -569,14 +569,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	}
 }
 
-static struct intel_mp_floating *mpf_found;
+static struct mpf_intel *mpf_found;
 
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
 static void __init __get_smp_config(unsigned int early)
 {
-	struct intel_mp_floating *mpf = mpf_found;
+	struct mpf_intel *mpf = mpf_found;
 
 	if (!mpf)
 		return;
@@ -687,14 +687,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 				  unsigned reserve)
 {
 	unsigned int *bp = phys_to_virt(base);
-	struct intel_mp_floating *mpf;
+	struct mpf_intel *mpf;
 
 	apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
 			bp, length);
 	BUILD_BUG_ON(sizeof(*mpf) != 16);
 
 	while (length > 0) {
-		mpf = (struct intel_mp_floating *)bp;
+		mpf = (struct mpf_intel *)bp;
 		if ((*bp == SMP_MAGIC_IDENT) &&
 		    (mpf->mpf_length == 1) &&
 		    !mpf_checksum((unsigned char *)bp, 16) &&
@@ -1000,7 +1000,7 @@ static int __init update_mp_table(void)
 {
 	char str[16];
 	char oem[10];
-	struct intel_mp_floating *mpf;
+	struct mpf_intel *mpf;
 	struct mpc_table *mpc, *mpc_new;
 
 	if (!enable_update_mptable)
@@ -1052,7 +1052,7 @@ static int __init update_mp_table(void)
 		mpc = mpc_new;
 		/* check if we can modify that */
 		if (mpc_new_phys - mpf->mpf_physptr) {
-			struct intel_mp_floating *mpf_new;
+			struct mpf_intel *mpf_new;
 			/* steal 16 bytes from [0, 1k) */
 			printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
 			mpf_new = phys_to_virt(0x400 - 16);
-- 
cgit v1.2.3


From 1eb1b3b65dc3e3ffcc6a60e115c085c0c11c1077 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Thu, 8 Jan 2009 15:43:26 +0530
Subject: x86: rename all fields of mpf_intel mpf_X to X

Impact: cleanup, solve 80 columns wrap problems

It would be cleaner to rename all the mpf->mpf_X fields to
mpf->X - that alone would give 4 characters per usage site.
(we already know that it's an 'mpf' entity -
no need to duplicate that in the field too)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mpspec_def.h | 20 ++++++++--------
 arch/x86/kernel/mpparse.c         | 50 +++++++++++++++++++--------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 187dc9201932..4a7f96d7c188 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -26,16 +26,16 @@
 
 /* Intel MP Floating Pointer Structure */
 struct mpf_intel {
-	char mpf_signature[4];		/* "_MP_"			*/
-	unsigned int mpf_physptr;	/* Configuration table address	*/
-	unsigned char mpf_length;	/* Our length (paragraphs)	*/
-	unsigned char mpf_specification;/* Specification version	*/
-	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
-	unsigned char mpf_feature1;	/* Standard or configuration ?	*/
-	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
-	unsigned char mpf_feature3;	/* Unused (0)			*/
-	unsigned char mpf_feature4;	/* Unused (0)			*/
-	unsigned char mpf_feature5;	/* Unused (0)			*/
+	char signature[4];		/* "_MP_"			*/
+	unsigned int physptr;		/* Configuration table address	*/
+	unsigned char length;		/* Our length (paragraphs)	*/
+	unsigned char specification;	/* Specification version	*/
+	unsigned char checksum;		/* Checksum (makes sum 0)	*/
+	unsigned char feature1;		/* Standard or configuration ?	*/
+	unsigned char feature2;		/* Bit7 set for IMCR|PIC	*/
+	unsigned char feature3;		/* Unused (0)			*/
+	unsigned char feature4;		/* Unused (0)			*/
+	unsigned char feature5;		/* Unused (0)			*/
 };
 
 #define MPC_SIGNATURE "PCMP"
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6cea941c4dbb..8385d4e7e15d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -597,9 +597,9 @@ static void __init __get_smp_config(unsigned int early)
 	}
 
 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
-	       mpf->mpf_specification);
+	       mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-	if (mpf->mpf_feature2 & (1 << 7)) {
+	if (mpf->feature2 & (1 << 7)) {
 		printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
 		pic_mode = 1;
 	} else {
@@ -610,7 +610,7 @@ static void __init __get_smp_config(unsigned int early)
 	/*
 	 * Now see if we need to read further.
 	 */
-	if (mpf->mpf_feature1 != 0) {
+	if (mpf->feature1 != 0) {
 		if (early) {
 			/*
 			 * local APIC has default address
@@ -620,16 +620,16 @@ static void __init __get_smp_config(unsigned int early)
 		}
 
 		printk(KERN_INFO "Default MP configuration #%d\n",
-		       mpf->mpf_feature1);
-		construct_default_ISA_mptable(mpf->mpf_feature1);
+		       mpf->feature1);
+		construct_default_ISA_mptable(mpf->feature1);
 
-	} else if (mpf->mpf_physptr) {
+	} else if (mpf->physptr) {
 
 		/*
 		 * Read the physical hardware table.  Anything here will
 		 * override the defaults.
 		 */
-		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
+		if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
 #ifdef CONFIG_X86_LOCAL_APIC
 			smp_found_config = 0;
 #endif
@@ -696,10 +696,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 	while (length > 0) {
 		mpf = (struct mpf_intel *)bp;
 		if ((*bp == SMP_MAGIC_IDENT) &&
-		    (mpf->mpf_length == 1) &&
+		    (mpf->length == 1) &&
 		    !mpf_checksum((unsigned char *)bp, 16) &&
-		    ((mpf->mpf_specification == 1)
-		     || (mpf->mpf_specification == 4))) {
+		    ((mpf->specification == 1)
+		     || (mpf->specification == 4))) {
 #ifdef CONFIG_X86_LOCAL_APIC
 			smp_found_config = 1;
 #endif
@@ -712,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 				return 1;
 			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
 					BOOTMEM_DEFAULT);
-			if (mpf->mpf_physptr) {
+			if (mpf->physptr) {
 				unsigned long size = PAGE_SIZE;
 #ifdef CONFIG_X86_32
 				/*
@@ -721,14 +721,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 				 * the bottom is mapped now.
 				 * PC-9800's MPC table places on the very last
 				 * of physical memory; so that simply reserving
-				 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+				 * PAGE_SIZE from mpf->physptr yields BUG()
 				 * in reserve_bootmem.
 				 */
 				unsigned long end = max_low_pfn * PAGE_SIZE;
-				if (mpf->mpf_physptr + size > end)
-					size = end - mpf->mpf_physptr;
+				if (mpf->physptr + size > end)
+					size = end - mpf->physptr;
 #endif
-				reserve_bootmem_generic(mpf->mpf_physptr, size,
+				reserve_bootmem_generic(mpf->physptr, size,
 						BOOTMEM_DEFAULT);
 			}
 
@@ -1013,19 +1013,19 @@ static int __init update_mp_table(void)
 	/*
 	 * Now see if we need to go further.
 	 */
-	if (mpf->mpf_feature1 != 0)
+	if (mpf->feature1 != 0)
 		return 0;
 
-	if (!mpf->mpf_physptr)
+	if (!mpf->physptr)
 		return 0;
 
-	mpc = phys_to_virt(mpf->mpf_physptr);
+	mpc = phys_to_virt(mpf->physptr);
 
 	if (!smp_check_mpc(mpc, oem, str))
 		return 0;
 
 	printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
-	printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
+	printk(KERN_INFO "physptr: %x\n", mpf->physptr);
 
 	if (mpc_new_phys && mpc->length > mpc_new_length) {
 		mpc_new_phys = 0;
@@ -1046,23 +1046,23 @@ static int __init update_mp_table(void)
 		}
 		printk(KERN_INFO "use in-positon replacing\n");
 	} else {
-		mpf->mpf_physptr = mpc_new_phys;
+		mpf->physptr = mpc_new_phys;
 		mpc_new = phys_to_virt(mpc_new_phys);
 		memcpy(mpc_new, mpc, mpc->length);
 		mpc = mpc_new;
 		/* check if we can modify that */
-		if (mpc_new_phys - mpf->mpf_physptr) {
+		if (mpc_new_phys - mpf->physptr) {
 			struct mpf_intel *mpf_new;
 			/* steal 16 bytes from [0, 1k) */
 			printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
 			mpf_new = phys_to_virt(0x400 - 16);
 			memcpy(mpf_new, mpf, 16);
 			mpf = mpf_new;
-			mpf->mpf_physptr = mpc_new_phys;
+			mpf->physptr = mpc_new_phys;
 		}
-		mpf->mpf_checksum = 0;
-		mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
-		printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
+		mpf->checksum = 0;
+		mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
+		printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
 	}
 
 	/*
-- 
cgit v1.2.3


From 068790334cececc3d2d945617ccc585477da2e38 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sat, 10 Jan 2009 12:17:37 +0530
Subject: x86: smp.h move cpu_callin_mask and cpu_callin_map declartion to
 cpumask.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpumask.h | 19 +++++++++++++++++++
 arch/x86/include/asm/smp.h     |  3 ---
 arch/x86/kernel/cpu/common.c   |  1 +
 arch/x86/kernel/setup_percpu.c |  1 +
 arch/x86/kernel/smpboot.c      |  1 +
 5 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/include/asm/cpumask.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
new file mode 100644
index 000000000000..308dddd56329
--- /dev/null
+++ b/arch/x86/include/asm/cpumask.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_X86_CPUMASK_H
+#define _ASM_X86_CPUMASK_H
+#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_X86_64
+
+extern cpumask_var_t cpu_callin_mask;
+
+#else /* CONFIG_X86_32 */
+
+extern cpumask_t cpu_callin_map;
+
+#define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1963e27673c9..c35aa5c0dd11 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -20,19 +20,16 @@
 
 #ifdef CONFIG_X86_64
 
-extern cpumask_var_t cpu_callin_mask;
 extern cpumask_var_t cpu_callout_mask;
 extern cpumask_var_t cpu_initialized_mask;
 extern cpumask_var_t cpu_sibling_setup_mask;
 
 #else /* CONFIG_X86_32 */
 
-extern cpumask_t cpu_callin_map;
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
 extern cpumask_t cpu_sibling_setup_map;
 
-#define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
 #define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
 #define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
 #define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 14e543b6fd4f..f00258462444 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -22,6 +22,7 @@
 #include <asm/numa.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>
+#include <asm/cpumask.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/mpspec.h>
 #include <asm/apic.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 55c46074eba0..bf63de72b643 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,6 +13,7 @@
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
 #include <asm/highmem.h>
+#include <asm/cpumask.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 unsigned int num_processors;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6c2b8444b830..84ac1cf46d87 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -55,6 +55,7 @@
 #include <asm/idle.h>
 #include <asm/trampoline.h>
 #include <asm/cpu.h>
+#include <asm/cpumask.h>
 #include <asm/numa.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-- 
cgit v1.2.3


From fb8fd077fbf0de6662acfd240e8e6b25cf3202ca Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sat, 10 Jan 2009 12:20:24 +0530
Subject: x86: smp.h move cpu_callout_mask and cpu_callout_map declartion to
 cpumask.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpumask.h | 3 +++
 arch/x86/include/asm/smp.h     | 4 +---
 arch/x86/kernel/smpboot.c      | 1 -
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 308dddd56329..9933fcad3c82 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -6,12 +6,15 @@
 #ifdef CONFIG_X86_64
 
 extern cpumask_var_t cpu_callin_mask;
+extern cpumask_var_t cpu_callout_mask;
 
 #else /* CONFIG_X86_32 */
 
 extern cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callout_map;
 
 #define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
+#define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
 
 #endif /* CONFIG_X86_32 */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c35aa5c0dd11..a3afec5cad0b 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,20 +17,18 @@
 #endif
 #include <asm/pda.h>
 #include <asm/thread_info.h>
+#include <asm/cpumask.h>
 
 #ifdef CONFIG_X86_64
 
-extern cpumask_var_t cpu_callout_mask;
 extern cpumask_var_t cpu_initialized_mask;
 extern cpumask_var_t cpu_sibling_setup_mask;
 
 #else /* CONFIG_X86_32 */
 
-extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
 extern cpumask_t cpu_sibling_setup_map;
 
-#define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
 #define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
 #define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 84ac1cf46d87..6c2b8444b830 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -55,7 +55,6 @@
 #include <asm/idle.h>
 #include <asm/trampoline.h>
 #include <asm/cpu.h>
-#include <asm/cpumask.h>
 #include <asm/numa.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-- 
cgit v1.2.3


From 493f6ca54e1ea59732dd334e35c5fe2d8e440b06 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sat, 10 Jan 2009 12:48:22 +0530
Subject: x86: smp.h move cpu_initialized_mask and cpu_initialized declartion
 to cpumask.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpumask.h | 3 +++
 arch/x86/include/asm/smp.h     | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 9933fcad3c82..d4cfd120b84d 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -7,14 +7,17 @@
 
 extern cpumask_var_t cpu_callin_mask;
 extern cpumask_var_t cpu_callout_mask;
+extern cpumask_var_t cpu_initialized_mask;
 
 #else /* CONFIG_X86_32 */
 
 extern cpumask_t cpu_callin_map;
 extern cpumask_t cpu_callout_map;
+extern cpumask_t cpu_initialized;
 
 #define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
 #define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
+#define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
 
 #endif /* CONFIG_X86_32 */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a3afec5cad0b..7d2a80319e82 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -21,15 +21,12 @@
 
 #ifdef CONFIG_X86_64
 
-extern cpumask_var_t cpu_initialized_mask;
 extern cpumask_var_t cpu_sibling_setup_mask;
 
 #else /* CONFIG_X86_32 */
 
-extern cpumask_t cpu_initialized;
 extern cpumask_t cpu_sibling_setup_map;
 
-#define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
 #define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
 
 #endif /* CONFIG_X86_32 */
-- 
cgit v1.2.3


From 52811d8c9beb67da6bc4b770de3c4134376788a1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sat, 10 Jan 2009 12:58:50 +0530
Subject: x86: smp.h move cpu_sibling_setup_mask and cpu_sibling_setup_map
 declartion to cpumask.h

Impact: cleanup

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/cpumask.h |  3 +++
 arch/x86/include/asm/smp.h     | 12 ------------
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index d4cfd120b84d..26c6dad90479 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -8,16 +8,19 @@
 extern cpumask_var_t cpu_callin_mask;
 extern cpumask_var_t cpu_callout_mask;
 extern cpumask_var_t cpu_initialized_mask;
+extern cpumask_var_t cpu_sibling_setup_mask;
 
 #else /* CONFIG_X86_32 */
 
 extern cpumask_t cpu_callin_map;
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_initialized;
+extern cpumask_t cpu_sibling_setup_map;
 
 #define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
 #define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
 #define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
+#define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
 
 #endif /* CONFIG_X86_32 */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 7d2a80319e82..a8cea7b09434 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -19,18 +19,6 @@
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
 
-#ifdef CONFIG_X86_64
-
-extern cpumask_var_t cpu_sibling_setup_mask;
-
-#else /* CONFIG_X86_32 */
-
-extern cpumask_t cpu_sibling_setup_map;
-
-#define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
-
-#endif /* CONFIG_X86_32 */
-
 extern int __cpuinit get_local_pda(int cpu);
 
 extern int smp_num_siblings;
-- 
cgit v1.2.3


From 4595f9620cda8a1e973588e743cf5f8436dd20c6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 10 Jan 2009 21:58:09 -0800
Subject: x86: change flush_tlb_others to take a const struct cpumask

Impact: reduce stack usage, use new cpumask API.

This is made a little more tricky by uv_flush_tlb_others which
actually alters its argument, for an IPI to be sent to the remaining
cpus in the mask.

I solve this by allocating a cpumask_var_t for this case and falling back
to IPI should this fail.

To eliminate temporaries in the caller, all flush_tlb_others implementations
now do the this-cpu-elimination step themselves.

Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)"
which has been there since pre-git and yet f->flush_cpumask is always zero
at this point.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/paravirt.h  |  8 +++--
 arch/x86/include/asm/tlbflush.h  |  8 ++---
 arch/x86/include/asm/uv/uv_bau.h |  3 +-
 arch/x86/kernel/tlb_32.c         | 67 +++++++++++++++++-----------------------
 arch/x86/kernel/tlb_64.c         | 61 +++++++++++++++++++-----------------
 arch/x86/kernel/tlb_uv.c         | 16 +++++-----
 arch/x86/xen/enlighten.c         | 31 +++++++------------
 7 files changed, 93 insertions(+), 101 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ba3e2ff6aedc..c26c6bf4da00 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -244,7 +244,8 @@ struct pv_mmu_ops {
 	void (*flush_tlb_user)(void);
 	void (*flush_tlb_kernel)(void);
 	void (*flush_tlb_single)(unsigned long addr);
-	void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+	void (*flush_tlb_others)(const struct cpumask *cpus,
+				 struct mm_struct *mm,
 				 unsigned long va);
 
 	/* Hooks for allocating and freeing a pagetable top-level */
@@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr)
 	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
 }
 
-static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+static inline void flush_tlb_others(const struct cpumask *cpumask,
+				    struct mm_struct *mm,
 				    unsigned long va)
 {
-	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
 }
 
 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e7bbb549116..f4e1b550ce61 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 		__flush_tlb();
 }
 
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+static inline void native_flush_tlb_others(const struct cpumask *cpumask,
 					   struct mm_struct *mm,
 					   unsigned long va)
 {
@@ -142,8 +142,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 	flush_tlb_mm(vma->vm_mm);
 }
 
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
-			     unsigned long va);
+void native_flush_tlb_others(const struct cpumask *cpumask,
+			     struct mm_struct *mm, unsigned long va);
 
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
@@ -166,7 +166,7 @@ static inline void reset_lazy_tlbstate(void)
 #endif	/* SMP */
 
 #ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
+#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(mask, mm, va)
 #endif
 
 static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 50423c7b56b2..74e6393bfddb 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
 
-extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+extern int uv_flush_tlb_others(struct cpumask *,
+			       struct mm_struct *, unsigned long);
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ce5054642247..ec53818f4e38 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
  *	Optimizations Manfred Spraul <manfred@colorfullife.com>
  */
 
-static cpumask_t flush_cpumask;
+static cpumask_var_t flush_cpumask;
 static struct mm_struct *flush_mm;
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
@@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 
 	cpu = get_cpu();
 
-	if (!cpu_isset(cpu, flush_cpumask))
+	if (!cpumask_test_cpu(cpu, flush_cpumask))
 		goto out;
 		/*
 		 * This was a BUG() but until someone can quote me the
@@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 	}
 	ack_APIC_irq();
 	smp_mb__before_clear_bit();
-	cpu_clear(cpu, flush_cpumask);
+	cpumask_clear_cpu(cpu, flush_cpumask);
 	smp_mb__after_clear_bit();
 out:
 	put_cpu_no_resched();
 	inc_irq_stat(irq_tlb_count);
 }
 
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-			     unsigned long va)
+void native_flush_tlb_others(const struct cpumask *cpumask,
+			     struct mm_struct *mm, unsigned long va)
 {
-	cpumask_t cpumask = *cpumaskp;
-
 	/*
-	 * A couple of (to be removed) sanity checks:
-	 *
-	 * - current CPU must not be in mask
 	 * - mask must exist :)
 	 */
-	BUG_ON(cpus_empty(cpumask));
-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+	BUG_ON(cpumask_empty(cpumask));
 	BUG_ON(!mm);
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* If a CPU which we ran on has gone down, OK. */
-	cpus_and(cpumask, cpumask, cpu_online_map);
-	if (unlikely(cpus_empty(cpumask)))
-		return;
-#endif
-
 	/*
 	 * i'm not happy about this global shared spinlock in the
 	 * MM hot path, but we'll see how contended it is.
@@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 */
 	spin_lock(&tlbstate_lock);
 
+	cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
+#ifdef CONFIG_HOTPLUG_CPU
+	/* If a CPU which we ran on has gone down, OK. */
+	cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
+	if (unlikely(cpumask_empty(flush_cpumask))) {
+		spin_unlock(&tlbstate_lock);
+		return;
+	}
+#endif
 	flush_mm = mm;
 	flush_va = va;
-	cpus_or(flush_cpumask, cpumask, flush_cpumask);
 
 	/*
 	 * Make the above memory operations globally visible before
@@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
+	send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
 
-	while (!cpus_empty(flush_cpumask))
+	while (!cpumask_empty(flush_cpumask))
 		/* nothing. lockup detection does not belong here */
 		cpu_relax();
 
@@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
 
 	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 		else
 			leave_mm(smp_processor_id());
 	}
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
@@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 			leave_mm(smp_processor_id());
 	}
 
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, va);
-
+	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(&mm->cpu_vm_mask, mm, va);
 	preempt_enable();
 }
 EXPORT_SYMBOL(flush_tlb_page);
@@ -254,3 +239,9 @@ void reset_lazy_tlbstate(void)
 	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
 }
 
+static int init_flush_cpumask(void)
+{
+	alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
+	return 0;
+}
+early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index f8be6f1d2e48..38836aef51b4 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -43,10 +43,10 @@
 
 union smp_flush_state {
 	struct {
-		cpumask_t flush_cpumask;
 		struct mm_struct *flush_mm;
 		unsigned long flush_va;
 		spinlock_t tlbstate_lock;
+		DECLARE_BITMAP(flush_cpumask, NR_CPUS);
 	};
 	char pad[SMP_CACHE_BYTES];
 } ____cacheline_aligned;
@@ -131,7 +131,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
 	f = &per_cpu(flush_state, sender);
 
-	if (!cpu_isset(cpu, f->flush_cpumask))
+	if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
 		goto out;
 		/*
 		 * This was a BUG() but until someone can quote me the
@@ -153,19 +153,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 	}
 out:
 	ack_APIC_irq();
-	cpu_clear(cpu, f->flush_cpumask);
+	cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
 	inc_irq_stat(irq_tlb_count);
 }
 
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-			     unsigned long va)
+static void flush_tlb_others_ipi(const struct cpumask *cpumask,
+				 struct mm_struct *mm, unsigned long va)
 {
 	int sender;
 	union smp_flush_state *f;
-	cpumask_t cpumask = *cpumaskp;
-
-	if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
-		return;
 
 	/* Caller has disabled preemption */
 	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
@@ -180,7 +176,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 
 	f->flush_mm = mm;
 	f->flush_va = va;
-	cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+	cpumask_andnot(to_cpumask(f->flush_cpumask),
+		       cpumask, cpumask_of(smp_processor_id()));
 
 	/*
 	 * Make the above memory operations globally visible before
@@ -191,9 +188,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	 * We have to send the IPI only to
 	 * CPUs affected.
 	 */
-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
 
-	while (!cpus_empty(f->flush_cpumask))
+	while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
 		cpu_relax();
 
 	f->flush_mm = NULL;
@@ -201,6 +198,24 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 	spin_unlock(&f->tlbstate_lock);
 }
 
+void native_flush_tlb_others(const struct cpumask *cpumask,
+			     struct mm_struct *mm, unsigned long va)
+{
+	if (is_uv_system()) {
+		cpumask_var_t after_uv_flush;
+
+		if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) {
+			cpumask_andnot(after_uv_flush,
+				       cpumask, cpumask_of(smp_processor_id()));
+			if (!uv_flush_tlb_others(after_uv_flush, mm, va))
+				flush_tlb_others_ipi(after_uv_flush, mm, va);
+			free_cpumask_var(after_uv_flush);
+			return;
+		}
+	}
+	flush_tlb_others_ipi(cpumask, mm, va);
+}
+
 static int __cpuinit init_smp_flush(void)
 {
 	int i;
@@ -215,25 +230,18 @@ core_initcall(init_smp_flush);
 void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
 
 	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
 	preempt_enable();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	cpumask_t cpu_mask;
-
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -241,8 +249,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 		else
 			leave_mm(smp_processor_id());
 	}
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
 
 	preempt_enable();
 }
@@ -250,11 +258,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	cpumask_t cpu_mask;
 
 	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -263,8 +268,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 			leave_mm(smp_processor_id());
 	}
 
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, va);
+	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
+		flush_tlb_others(&mm->cpu_vm_mask, mm, va);
 
 	preempt_enable();
 }
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f885023167e0..690dcf1a27d4 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
  * The cpumaskp mask contains the cpus the broadcast was sent to.
  *
  * Returns 1 if all remote flushing was done. The mask is zeroed.
- * Returns 0 if some remote flushing remains to be done. The mask is left
- * unchanged.
+ * Returns 0 if some remote flushing remains to be done. The mask will have
+ * some bits still set.
  */
 int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
-			   cpumask_t *cpumaskp)
+			   struct cpumask *cpumaskp)
 {
 	int completion_status = 0;
 	int right_shift;
@@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
 	 * Success, so clear the remote cpu's from the mask so we don't
 	 * use the IPI method of shootdown on them.
 	 */
-	for_each_cpu_mask(bit, *cpumaskp) {
+	for_each_cpu(bit, cpumaskp) {
 		blade = uv_cpu_to_blade_id(bit);
 		if (blade == this_blade)
 			continue;
-		cpu_clear(bit, *cpumaskp);
+		cpumask_clear_cpu(bit, cpumaskp);
 	}
-	if (!cpus_empty(*cpumaskp))
+	if (!cpumask_empty(cpumaskp))
 		return 0;
 	return 1;
 }
@@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
  * Returns 1 if all remote flushing was done.
  * Returns 0 if some remote flushing remains to be done.
  */
-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
+int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
 			unsigned long va)
 {
 	int i;
@@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
 	bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
 	i = 0;
-	for_each_cpu_mask(bit, *cpumaskp) {
+	for_each_cpu(bit, cpumaskp) {
 		blade = uv_cpu_to_blade_id(bit);
 		BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
 		if (blade == this_blade) {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bea215230b20..965539ec425f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr)
 	preempt_enable();
 }
 
-static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
-				 unsigned long va)
+static void xen_flush_tlb_others(const struct cpumask *cpus,
+				 struct mm_struct *mm, unsigned long va)
 {
 	struct {
 		struct mmuext_op op;
-		cpumask_t mask;
+		DECLARE_BITMAP(mask, NR_CPUS);
 	} *args;
-	cpumask_t cpumask = *cpus;
 	struct multicall_space mcs;
 
-	/*
-	 * A couple of (to be removed) sanity checks:
-	 *
-	 * - current CPU must not be in mask
-	 * - mask must exist :)
-	 */
-	BUG_ON(cpus_empty(cpumask));
-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+	BUG_ON(cpumask_empty(cpus));
 	BUG_ON(!mm);
 
-	/* If a CPU which we ran on has gone down, OK. */
-	cpus_and(cpumask, cpumask, cpu_online_map);
-	if (cpus_empty(cpumask))
-		return;
-
 	mcs = xen_mc_entry(sizeof(*args));
 	args = mcs.args;
-	args->mask = cpumask;
-	args->op.arg2.vcpumask = &args->mask;
+	args->op.arg2.vcpumask = to_cpumask(args->mask);
+
+	/* Remove us, and any offline CPUS. */
+	cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
+	if (unlikely(cpumask_empty(to_cpumask(args->mask))))
+		goto issue;
 
 	if (va == TLB_FLUSH_ALL) {
 		args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
@@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
 
 	MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
 
+issue:
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 
-- 
cgit v1.2.3


From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sat, 10 Jan 2009 22:24:07 -0800
Subject: irq: initialize nr_irqs based on nr_cpu_ids

Impact: Reduce memory usage.

This is the second half of the changes to make the irq_desc_ptrs be
variable sized based on nr_cpu_ids.  This is done by adding a new
"max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to
return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids.

This necessitated moving the define of MAX_IO_APICS to a separate
file (asm/apicnum.h) so it could be included without the baggage
of the other asm/apicdef.h declarations.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/apicdef.h     |  8 ++------
 arch/x86/include/asm/apicnum.h     | 12 ++++++++++++
 arch/x86/include/asm/irq_vectors.h | 16 +++++++++++-----
 include/linux/irqnr.h              |  7 +++++++
 kernel/irq/handle.c                |  3 +++
 5 files changed, 35 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/include/asm/apicnum.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 63134e31e8b9..1a6454ef7f6c 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -132,12 +132,8 @@
 #define APIC_BASE_MSR	0x800
 #define X2APIC_ENABLE	(1UL << 10)
 
-#ifdef CONFIG_X86_32
-# define MAX_IO_APICS 64
-#else
-# define MAX_IO_APICS 128
-# define MAX_LOCAL_APIC 32768
-#endif
+/* get MAX_IO_APICS */
+#include <asm/apicnum.h>
 
 /*
  * All x86-64 systems are xAPIC compatible.
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
new file mode 100644
index 000000000000..82f613c607ce
--- /dev/null
+++ b/arch/x86/include/asm/apicnum.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_X86_APICNUM_H
+#define _ASM_X86_APICNUM_H
+
+/* define MAX_IO_APICS */
+#ifdef CONFIG_X86_32
+# define MAX_IO_APICS 64
+#else
+# define MAX_IO_APICS 128
+# define MAX_LOCAL_APIC 32768
+#endif
+
+#endif /* _ASM_X86_APICNUM_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f7ff65032b9d..602361ad0e74 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -105,6 +105,8 @@
 
 #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
 
+#include <asm/apicnum.h>	/* need MAX_IO_APICS */
+
 #ifndef CONFIG_SPARSE_IRQ
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
@@ -112,11 +114,15 @@
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
 #else
-# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
-#  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
-# else
-#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
-# endif
+
+/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */
+# define max_nr_irqs(nr_cpus)				\
+	((8 * nr_cpus) > (32 * MAX_IO_APICS) ?		\
+		(NR_VECTORS + (8 * NR_CPUS)) :		\
+		(NR_VECTORS + (32 * MAX_IO_APICS)))	\
+
+# define NR_IRQS max_nr_irqs(NR_CPUS)
+
 #endif
 
 #elif defined(CONFIG_X86_VOYAGER)
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 86af92e9e84c..de66e4e10406 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -20,11 +20,18 @@
 
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
+
 #else /* CONFIG_GENERIC_HARDIRQS */
 
+#include <asm/irq_vectors.h>	/* need possible max_nr_irqs() */
+
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 
+# ifndef max_nr_irqs
+#  define max_nr_irqs(nr_cpus)	NR_IRQS
+# endif
+
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
 	     irq++, desc = irq_to_desc(irq))				\
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d0b8f7e72790..ebba7a116f14 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -133,6 +133,9 @@ int __init early_irq_init(void)
 	int legacy_count;
 	int i;
 
+	/* initialize nr_irqs based on nr_cpu_ids */
+	nr_irqs = max_nr_irqs(nr_cpu_ids);
+
 	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
 
 	desc = irq_desc_legacy;
-- 
cgit v1.2.3


From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Sun, 11 Jan 2009 09:22:58 -0800
Subject: cpumask, irq: non-x86 build failures

Ingo Molnar wrote:

> All non-x86 architectures fail to build:
>
> In file included from /home/mingo/tip/include/linux/random.h:11,
>                  from /home/mingo/tip/include/linux/stackprotector.h:6,
>                  from /home/mingo/tip/init/main.c:17:
> /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory

Do not include asm/irq_vectors.h in generic code - it's not available
on all architectures.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicdef.h | 8 ++++++--
 include/linux/irqnr.h          | 6 ------
 kernel/irq/handle.c            | 5 +++++
 3 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 1a6454ef7f6c..63134e31e8b9 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -132,8 +132,12 @@
 #define APIC_BASE_MSR	0x800
 #define X2APIC_ENABLE	(1UL << 10)
 
-/* get MAX_IO_APICS */
-#include <asm/apicnum.h>
+#ifdef CONFIG_X86_32
+# define MAX_IO_APICS 64
+#else
+# define MAX_IO_APICS 128
+# define MAX_LOCAL_APIC 32768
+#endif
 
 /*
  * All x86-64 systems are xAPIC compatible.
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index de66e4e10406..887477bc2ab0 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -23,15 +23,9 @@
 
 #else /* CONFIG_GENERIC_HARDIRQS */
 
-#include <asm/irq_vectors.h>	/* need possible max_nr_irqs() */
-
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 
-# ifndef max_nr_irqs
-#  define max_nr_irqs(nr_cpus)	NR_IRQS
-# endif
-
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
 	     irq++, desc = irq_to_desc(irq))				\
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index b39f32ac8f80..04d3e46031e5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -58,6 +58,11 @@ int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
 #ifdef CONFIG_SPARSE_IRQ
+
+#ifndef max_nr_irqs
+#define max_nr_irqs(nr_cpus)	NR_IRQS
+#endif
+
 static struct irq_desc irq_desc_init = {
 	.irq	    = -1,
 	.status	    = IRQ_DISABLED,
-- 
cgit v1.2.3


From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 12 Jan 2009 17:39:24 -0800
Subject: x86: arch_probe_nr_irqs

Impact: save RAM with large NR_CPUS, get smaller nr_irqs

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/irq_vectors.h |  7 ++-----
 arch/x86/kernel/io_apic.c          | 16 ++++++++++++++++
 include/linux/interrupt.h          |  1 +
 kernel/irq/handle.c                |  9 ++-------
 kernel/softirq.c                   |  5 +++++
 5 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 602361ad0e74..a16a2ab2b429 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -115,14 +115,11 @@
 # endif
 #else
 
-/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */
-# define max_nr_irqs(nr_cpus)				\
-	((8 * nr_cpus) > (32 * MAX_IO_APICS) ?		\
+# define NR_IRQS					\
+	((8 * NR_CPUS) > (32 * MAX_IO_APICS) ?		\
 		(NR_VECTORS + (8 * NR_CPUS)) :		\
 		(NR_VECTORS + (32 * MAX_IO_APICS)))	\
 
-# define NR_IRQS max_nr_irqs(NR_CPUS)
-
 #endif
 
 #elif defined(CONFIG_X86_VOYAGER)
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index ae80638012de..157986916cd1 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3850,6 +3850,22 @@ void __init probe_nr_irqs_gsi(void)
 		nr_irqs_gsi = nr;
 }
 
+#ifdef CONFIG_SPARSE_IRQ
+int __init arch_probe_nr_irqs(void)
+{
+	int nr;
+
+	nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
+		(NR_VECTORS + (8 * nr_cpu_ids)) :
+		(NR_VECTORS + (32 * nr_ioapics)));
+
+	if (nr < nr_irqs && nr > nr_irqs_gsi)
+		nr_irqs = nr;
+
+	return 0;
+}
+#endif
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9127f6b51a39..472f11765f60 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v);
 struct irq_desc;
 
 extern int early_irq_init(void);
+extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
 extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 04d3e46031e5..375d68cd5bf0 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -59,10 +59,6 @@ EXPORT_SYMBOL_GPL(nr_irqs);
 
 #ifdef CONFIG_SPARSE_IRQ
 
-#ifndef max_nr_irqs
-#define max_nr_irqs(nr_cpus)	NR_IRQS
-#endif
-
 static struct irq_desc irq_desc_init = {
 	.irq	    = -1,
 	.status	    = IRQ_DISABLED,
@@ -137,9 +133,8 @@ int __init early_irq_init(void)
 	int legacy_count;
 	int i;
 
-	/* initialize nr_irqs based on nr_cpu_ids */
-	nr_irqs = max_nr_irqs(nr_cpu_ids);
-
+	 /* initialize nr_irqs based on nr_cpu_ids */
+	arch_probe_nr_irqs();
 	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
 
 	desc = irq_desc_legacy;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8d..0365b4899a3d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -795,6 +795,11 @@ int __init __weak early_irq_init(void)
 	return 0;
 }
 
+int __init __weak arch_probe_nr_irqs(void)
+{
+	return 0;
+}
+
 int __init __weak arch_early_irq_init(void)
 {
 	return 0;
-- 
cgit v1.2.3


From b5ba7e6d1e7e2ac808afd21be1e56dc34caf20e6 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 12 Jan 2009 17:46:17 +0530
Subject: x86: replacing mp_config_ioapic with mpc_ioapic

Impact: cleanup, solve 80 columns wrap problems

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io_apic.h | 10 +------
 arch/x86/kernel/acpi/boot.c    | 28 ++++++++++----------
 arch/x86/kernel/io_apic.c      | 60 ++++++++++++++++++++----------------------
 arch/x86/kernel/mpparse.c      | 12 ++++-----
 4 files changed, 50 insertions(+), 60 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7a1f44ac1f17..5a56ae9b505a 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -120,14 +120,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS];
 
 #define MP_MAX_IOAPIC_PIN 127
 
-struct mp_config_ioapic {
-	unsigned long mp_apicaddr;
-	unsigned int mp_apicid;
-	unsigned char mp_type;
-	unsigned char mp_apicver;
-	unsigned char mp_flags;
-};
-
 struct mp_config_intsrc {
 	unsigned int mp_dstapic;
 	unsigned char mp_type;
@@ -139,7 +131,7 @@ struct mp_config_intsrc {
 };
 
 /* I/O APIC entries */
-extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 
 /* # of MP IRQ source entries */
 extern int mp_irq_entries;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f438..2b27019e64f8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id)
 	DECLARE_BITMAP(used, 256);
 	bitmap_zero(used, 256);
 	for (i = 0; i < nr_ioapics; i++) {
-		struct mp_config_ioapic *ia = &mp_ioapics[i];
-		__set_bit(ia->mp_apicid, used);
+		struct mpc_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->apicid, used);
 	}
 	if (!test_bit(id, used))
 		return id;
@@ -945,29 +945,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 
 	idx = nr_ioapics;
 
-	mp_ioapics[idx].mp_type = MP_IOAPIC;
-	mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
-	mp_ioapics[idx].mp_apicaddr = address;
+	mp_ioapics[idx].type = MP_IOAPIC;
+	mp_ioapics[idx].flags = MPC_APIC_USABLE;
+	mp_ioapics[idx].apicaddr = address;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
+	mp_ioapics[idx].apicid = uniq_ioapic_id(id);
 #ifdef CONFIG_X86_32
-	mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
+	mp_ioapics[idx].apicver = io_apic_get_version(idx);
 #else
-	mp_ioapics[idx].mp_apicver = 0;
+	mp_ioapics[idx].apicver = 0;
 #endif
 	/*
 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
+	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
 	mp_ioapic_routing[idx].gsi_end = gsi_base +
 	    io_apic_get_redir_entries(idx);
 
-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
-	       "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
-	       mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
 	       mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
 
 	nr_ioapics++;
@@ -1026,7 +1026,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	mp_irq.mp_irqflag = (trigger << 2) | polarity;
 	mp_irq.mp_srcbus = MP_ISA_BUS;
 	mp_irq.mp_srcbusirq = bus_irq;	/* IRQ */
-	mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
+	mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
 	mp_irq.mp_dstirq = pin;	/* INTIN# */
 
 	save_mp_irq(&mp_irq);
@@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	ioapic = mp_find_ioapic(0);
 	if (ioapic < 0)
 		return;
-	dstapic = mp_ioapics[ioapic].mp_apicid;
+	dstapic = mp_ioapics[ioapic].apicid;
 
 	/*
 	 * Use the default configuration for the IRQs 0-15.  Unless
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 109c91db2026..6c51ecdfbf49 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(vector_lock);
 int nr_ioapic_registers[MAX_IO_APICS];
 
 /* I/O APIC entries */
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
@@ -387,7 +387,7 @@ struct io_apic {
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 {
 	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+		+ (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
 }
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -946,7 +946,7 @@ static int find_irq_entry(int apic, int pin, int type)
 
 	for (i = 0; i < mp_irq_entries; i++)
 		if (mp_irqs[i].mp_irqtype == type &&
-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid ||
 		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
 		    mp_irqs[i].mp_dstirq == pin)
 			return i;
@@ -988,7 +988,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	if (i < mp_irq_entries) {
 		int apic;
 		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic)
 				return apic;
 		}
 	}
@@ -1016,7 +1016,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		int lbus = mp_irqs[i].mp_srcbus;
 
 		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+			if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic ||
 			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
 				break;
 
@@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
 		    "IRQ %d Mode:%i Active:%i)\n",
-		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+		    apic, mp_ioapics[apic].apicid, pin, cfg->vector,
 		    irq, trigger, polarity);
 
 
-	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+	if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
 			       dest, trigger, polarity, cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-		       mp_ioapics[apic].mp_apicid, pin);
+		       mp_ioapics[apic].apicid, pin);
 		__clear_irq_vector(irq, cfg);
 		return;
 	}
@@ -1605,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void)
 					notcon = 1;
 					apic_printk(APIC_VERBOSE,
 						KERN_DEBUG " %d-%d",
-						mp_ioapics[apic].mp_apicid,
-						pin);
+						mp_ioapics[apic].apicid, pin);
 				} else
 					apic_printk(APIC_VERBOSE, " %d-%d",
-						mp_ioapics[apic].mp_apicid,
-						pin);
+						mp_ioapics[apic].apicid, pin);
 				continue;
 			}
 			if (notcon) {
@@ -1700,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
 	for (i = 0; i < nr_ioapics; i++)
 		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+		       mp_ioapics[i].apicid, nr_ioapic_registers[i]);
 
 	/*
 	 * We are a bit conservative about what we expect.  We have to
@@ -1720,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -2122,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		reg_00.raw = io_apic_read(apic, 0);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 
-		old_id = mp_ioapics[apic].mp_apicid;
+		old_id = mp_ioapics[apic].apicid;
 
-		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+		if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
+				apic, mp_ioapics[apic].apicid);
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				reg_00.bits.ID);
-			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+			mp_ioapics[apic].apicid = reg_00.bits.ID;
 		}
 
 		/*
@@ -2138,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 		 */
 		if (check_apicid_used(phys_id_present_map,
-					mp_ioapics[apic].mp_apicid)) {
+					mp_ioapics[apic].apicid)) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic, mp_ioapics[apic].mp_apicid);
+				apic, mp_ioapics[apic].apicid);
 			for (i = 0; i < get_physical_broadcast(); i++)
 				if (!physid_isset(i, phys_id_present_map))
 					break;
@@ -2149,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				i);
 			physid_set(i, phys_id_present_map);
-			mp_ioapics[apic].mp_apicid = i;
+			mp_ioapics[apic].apicid = i;
 		} else {
 			physid_mask_t tmp;
-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+			tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
 			apic_printk(APIC_VERBOSE, "Setting %d in the "
 					"phys_id_present_map\n",
-					mp_ioapics[apic].mp_apicid);
+					mp_ioapics[apic].apicid);
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 		}
 
@@ -2164,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 * We need to adjust the IRQ routing table
 		 * if the ID changed.
 		 */
-		if (old_id != mp_ioapics[apic].mp_apicid)
+		if (old_id != mp_ioapics[apic].apicid)
 			for (i = 0; i < mp_irq_entries; i++)
 				if (mp_irqs[i].mp_dstapic == old_id)
 					mp_irqs[i].mp_dstapic
-						= mp_ioapics[apic].mp_apicid;
+						= mp_ioapics[apic].apicid;
 
 		/*
 		 * Read the right value from the MPC table and
@@ -2176,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 */
 		apic_printk(APIC_VERBOSE, KERN_INFO
 			"...changing IO-APIC physical APIC ID to %d ...",
-			mp_ioapics[apic].mp_apicid);
+			mp_ioapics[apic].apicid);
 
-		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+		reg_00.bits.ID = mp_ioapics[apic].apicid;
 		spin_lock_irqsave(&ioapic_lock, flags);
 		io_apic_write(apic, 0, reg_00.raw);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2189,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		spin_lock_irqsave(&ioapic_lock, flags);
 		reg_00.raw = io_apic_read(apic, 0);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+		if (reg_00.bits.ID != mp_ioapics[apic].apicid)
 			printk("could not set ID!\n");
 		else
 			apic_printk(APIC_VERBOSE, " ok.\n");
@@ -3118,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev)
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+	if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
+		reg_00.bits.ID = mp_ioapics[dev->id].apicid;
 		io_apic_write(dev->id, 0, reg_00.raw);
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -4101,7 +4099,7 @@ void __init ioapic_init_mappings(void)
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
-			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+			ioapic_phys = mp_ioapics[i].apicaddr;
 #ifdef CONFIG_X86_32
 			if (!ioapic_phys) {
 				printk(KERN_ERR
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8385d4e7e15d..a86a65537433 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -143,11 +143,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
 	if (bad_ioapic(m->apicaddr))
 		return;
 
-	mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
-	mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
-	mp_ioapics[nr_ioapics].mp_type = m->type;
-	mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
-	mp_ioapics[nr_ioapics].mp_flags = m->flags;
+	mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
+	mp_ioapics[nr_ioapics].apicid = m->apicid;
+	mp_ioapics[nr_ioapics].type = m->type;
+	mp_ioapics[nr_ioapics].apicver = m->apicver;
+	mp_ioapics[nr_ioapics].flags = m->flags;
 	nr_ioapics++;
 }
 
@@ -416,7 +416,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 	intsrc.type = MP_INTSRC;
 	intsrc.irqflag = 0;	/* conforming */
 	intsrc.srcbus = 0;
-	intsrc.dstapic = mp_ioapics[0].mp_apicid;
+	intsrc.dstapic = mp_ioapics[0].apicid;
 
 	intsrc.irqtype = mp_INT;
 
-- 
cgit v1.2.3


From c2c21745ecba23c74690a124bcd371f83bd71e45 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 12 Jan 2009 17:47:22 +0530
Subject: x86: replacing mp_config_intsrc with mpc_intsrc

Impact: cleanup, solve 80 columns wrap problems

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/io_apic.h | 16 +---------
 arch/x86/kernel/acpi/boot.c    | 70 ++++++++++++++++++++----------------------
 arch/x86/kernel/io_apic.c      | 64 +++++++++++++++++++-------------------
 arch/x86/kernel/mpparse.c      | 68 ++++++++++++++++++++--------------------
 4 files changed, 101 insertions(+), 117 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 5a56ae9b505a..08ec793aa043 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -114,22 +114,8 @@ struct IR_IO_APIC_route_entry {
 extern int nr_ioapics;
 extern int nr_ioapic_registers[MAX_IO_APICS];
 
-/*
- * MP-BIOS irq configuration table structures:
- */
-
 #define MP_MAX_IOAPIC_PIN 127
 
-struct mp_config_intsrc {
-	unsigned int mp_dstapic;
-	unsigned char mp_type;
-	unsigned char mp_irqtype;
-	unsigned short mp_irqflag;
-	unsigned char mp_srcbus;
-	unsigned char mp_srcbusirq;
-	unsigned char mp_dstirq;
-};
-
 /* I/O APIC entries */
 extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 
@@ -137,7 +123,7 @@ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 extern int mp_irq_entries;
 
 /* MP IRQ source entries */
-extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* non-0 if default (table-less) MP configuration */
 extern int mpc_default_type;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2b27019e64f8..4cb5964f1499 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -973,19 +973,19 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 	nr_ioapics++;
 }
 
-static void assign_to_mp_irq(struct mp_config_intsrc *m,
-				    struct mp_config_intsrc *mp_irq)
+static void assign_to_mp_irq(struct mpc_intsrc *m,
+				    struct mpc_intsrc *mp_irq)
 {
-	memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
+	memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
 }
 
-static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
-				struct mp_config_intsrc *m)
+static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
+				struct mpc_intsrc *m)
 {
-	return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
+	return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
 }
 
-static void save_mp_irq(struct mp_config_intsrc *m)
+static void save_mp_irq(struct mpc_intsrc *m)
 {
 	int i;
 
@@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 {
 	int ioapic;
 	int pin;
-	struct mp_config_intsrc mp_irq;
+	struct mpc_intsrc mp_irq;
 
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
@@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	if ((bus_irq == 0) && (trigger == 3))
 		trigger = 1;
 
-	mp_irq.mp_type = MP_INTSRC;
-	mp_irq.mp_irqtype = mp_INT;
-	mp_irq.mp_irqflag = (trigger << 2) | polarity;
-	mp_irq.mp_srcbus = MP_ISA_BUS;
-	mp_irq.mp_srcbusirq = bus_irq;	/* IRQ */
-	mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
-	mp_irq.mp_dstirq = pin;	/* INTIN# */
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (trigger << 2) | polarity;
+	mp_irq.srcbus = MP_ISA_BUS;
+	mp_irq.srcbusirq = bus_irq;	/* IRQ */
+	mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
+	mp_irq.dstirq = pin;	/* INTIN# */
 
 	save_mp_irq(&mp_irq);
 }
@@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void)
 	int i;
 	int ioapic;
 	unsigned int dstapic;
-	struct mp_config_intsrc mp_irq;
+	struct mpc_intsrc mp_irq;
 
 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
 	/*
@@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void)
 		int idx;
 
 		for (idx = 0; idx < mp_irq_entries; idx++) {
-			struct mp_config_intsrc *irq = mp_irqs + idx;
+			struct mpc_intsrc *irq = mp_irqs + idx;
 
 			/* Do we already have a mapping for this ISA IRQ? */
-			if (irq->mp_srcbus == MP_ISA_BUS
-			    && irq->mp_srcbusirq == i)
+			if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
 				break;
 
 			/* Do we already have a mapping for this IOAPIC pin */
-			if (irq->mp_dstapic == dstapic &&
-			    irq->mp_dstirq == i)
+			if (irq->dstapic == dstapic && irq->dstirq == i)
 				break;
 		}
 
@@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void)
 			continue;	/* IRQ already used */
 		}
 
-		mp_irq.mp_type = MP_INTSRC;
-		mp_irq.mp_irqflag = 0;	/* Conforming */
-		mp_irq.mp_srcbus = MP_ISA_BUS;
-		mp_irq.mp_dstapic = dstapic;
-		mp_irq.mp_irqtype = mp_INT;
-		mp_irq.mp_srcbusirq = i; /* Identity mapped */
-		mp_irq.mp_dstirq = i;
+		mp_irq.type = MP_INTSRC;
+		mp_irq.irqflag = 0;	/* Conforming */
+		mp_irq.srcbus = MP_ISA_BUS;
+		mp_irq.dstapic = dstapic;
+		mp_irq.irqtype = mp_INT;
+		mp_irq.srcbusirq = i; /* Identity mapped */
+		mp_irq.dstirq = i;
 
 		save_mp_irq(&mp_irq);
 	}
@@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
 			u32 gsi, int triggering, int polarity)
 {
 #ifdef CONFIG_X86_MPPARSE
-	struct mp_config_intsrc mp_irq;
+	struct mpc_intsrc mp_irq;
 	int ioapic;
 
 	if (!acpi_ioapic)
 		return 0;
 
 	/* print the entry should happen on mptable identically */
-	mp_irq.mp_type = MP_INTSRC;
-	mp_irq.mp_irqtype = mp_INT;
-	mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
 				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
-	mp_irq.mp_srcbus = number;
-	mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+	mp_irq.srcbus = number;
+	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
 	ioapic = mp_find_ioapic(gsi);
-	mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
-	mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
+	mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
+	mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
 
 	save_mp_irq(&mp_irq);
 #endif
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 6c51ecdfbf49..79b8c0c72d34 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -87,7 +87,7 @@ struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
@@ -945,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == type &&
-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid ||
-		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-		    mp_irqs[i].mp_dstirq == pin)
+		if (mp_irqs[i].irqtype == type &&
+		    (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
+		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].dstirq == pin)
 			return i;
 
 	return -1;
@@ -962,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
+		int lbus = mp_irqs[i].srcbus;
 
 		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
+		    (mp_irqs[i].irqtype == type) &&
+		    (mp_irqs[i].srcbusirq == irq))
 
-			return mp_irqs[i].mp_dstirq;
+			return mp_irqs[i].dstirq;
 	}
 	return -1;
 }
@@ -978,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type)
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
+		int lbus = mp_irqs[i].srcbus;
 
 		if (test_bit(lbus, mp_bus_not_pci) &&
-		    (mp_irqs[i].mp_irqtype == type) &&
-		    (mp_irqs[i].mp_srcbusirq == irq))
+		    (mp_irqs[i].irqtype == type) &&
+		    (mp_irqs[i].srcbusirq == irq))
 			break;
 	}
 	if (i < mp_irq_entries) {
 		int apic;
 		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
 				return apic;
 		}
 	}
@@ -1013,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 		return -1;
 	}
 	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].mp_srcbus;
+		int lbus = mp_irqs[i].srcbus;
 
 		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic ||
-			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+			    mp_irqs[i].dstapic == MP_APIC_ALL)
 				break;
 
 		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].mp_irqtype &&
+		    !mp_irqs[i].irqtype &&
 		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
 
 			if (!(apic || IO_APIC_IRQ(irq)))
 				continue;
 
-			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+			if (pin == (mp_irqs[i].srcbusirq & 3))
 				return irq;
 			/*
 			 * Use the first all-but-pin matching entry as a
@@ -1072,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq)
  * EISA conforming in the MP table, that means its trigger type must
  * be read in from the ELCR */
 
-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].srcbusirq))
 #define default_EISA_polarity(idx)	default_ISA_polarity(idx)
 
 /* PCI interrupts are always polarity one level triggered,
@@ -1089,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq)
 
 static int MPBIOS_polarity(int idx)
 {
-	int bus = mp_irqs[idx].mp_srcbus;
+	int bus = mp_irqs[idx].srcbus;
 	int polarity;
 
 	/*
 	 * Determine IRQ line polarity (high active or low active):
 	 */
-	switch (mp_irqs[idx].mp_irqflag & 3)
+	switch (mp_irqs[idx].irqflag & 3)
 	{
 		case 0: /* conforms, ie. bus-type dependent polarity */
 			if (test_bit(bus, mp_bus_not_pci))
@@ -1131,13 +1131,13 @@ static int MPBIOS_polarity(int idx)
 
 static int MPBIOS_trigger(int idx)
 {
-	int bus = mp_irqs[idx].mp_srcbus;
+	int bus = mp_irqs[idx].srcbus;
 	int trigger;
 
 	/*
 	 * Determine IRQ trigger mode (edge or level sensitive):
 	 */
-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+	switch ((mp_irqs[idx].irqflag>>2) & 3)
 	{
 		case 0: /* conforms, ie. bus-type dependent */
 			if (test_bit(bus, mp_bus_not_pci))
@@ -1215,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
 	int irq, i;
-	int bus = mp_irqs[idx].mp_srcbus;
+	int bus = mp_irqs[idx].srcbus;
 
 	/*
 	 * Debugging check, we are in big trouble if this message pops up!
 	 */
-	if (mp_irqs[idx].mp_dstirq != pin)
+	if (mp_irqs[idx].dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
 	if (test_bit(bus, mp_bus_not_pci)) {
-		irq = mp_irqs[idx].mp_srcbusirq;
+		irq = mp_irqs[idx].srcbusirq;
 	} else {
 		/*
 		 * PCI IRQs are mapped in order
@@ -2164,8 +2164,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 		 */
 		if (old_id != mp_ioapics[apic].apicid)
 			for (i = 0; i < mp_irq_entries; i++)
-				if (mp_irqs[i].mp_dstapic == old_id)
-					mp_irqs[i].mp_dstapic
+				if (mp_irqs[i].dstapic == old_id)
+					mp_irqs[i].dstapic
 						= mp_ioapics[apic].apicid;
 
 		/*
@@ -3983,8 +3983,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 		return -1;
 
 	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].mp_irqtype == mp_INT &&
-		    mp_irqs[i].mp_srcbusirq == bus_irq)
+		if (mp_irqs[i].irqtype == mp_INT &&
+		    mp_irqs[i].srcbusirq == bus_irq)
 			break;
 	if (i >= mp_irq_entries)
 		return -1;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a86a65537433..ad36377dc935 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -159,55 +159,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
 		m->srcbusirq, m->dstapic, m->dstirq);
 }
 
-static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
+static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
 {
 	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-		mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
-		(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
-		mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
+		mp_irq->irqtype, mp_irq->irqflag & 3,
+		(mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
+		mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
 }
 
 static void __init assign_to_mp_irq(struct mpc_intsrc *m,
-				    struct mp_config_intsrc *mp_irq)
+				    struct mpc_intsrc *mp_irq)
 {
-	mp_irq->mp_dstapic = m->dstapic;
-	mp_irq->mp_type = m->type;
-	mp_irq->mp_irqtype = m->irqtype;
-	mp_irq->mp_irqflag = m->irqflag;
-	mp_irq->mp_srcbus = m->srcbus;
-	mp_irq->mp_srcbusirq = m->srcbusirq;
-	mp_irq->mp_dstirq = m->dstirq;
+	mp_irq->dstapic = m->dstapic;
+	mp_irq->type = m->type;
+	mp_irq->irqtype = m->irqtype;
+	mp_irq->irqflag = m->irqflag;
+	mp_irq->srcbus = m->srcbus;
+	mp_irq->srcbusirq = m->srcbusirq;
+	mp_irq->dstirq = m->dstirq;
 }
 
-static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
+static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
 					struct mpc_intsrc *m)
 {
-	m->dstapic = mp_irq->mp_dstapic;
-	m->type = mp_irq->mp_type;
-	m->irqtype = mp_irq->mp_irqtype;
-	m->irqflag = mp_irq->mp_irqflag;
-	m->srcbus = mp_irq->mp_srcbus;
-	m->srcbusirq = mp_irq->mp_srcbusirq;
-	m->dstirq = mp_irq->mp_dstirq;
+	m->dstapic = mp_irq->dstapic;
+	m->type = mp_irq->type;
+	m->irqtype = mp_irq->irqtype;
+	m->irqflag = mp_irq->irqflag;
+	m->srcbus = mp_irq->srcbus;
+	m->srcbusirq = mp_irq->srcbusirq;
+	m->dstirq = mp_irq->dstirq;
 }
 
-static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
+static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
 					struct mpc_intsrc *m)
 {
-	if (mp_irq->mp_dstapic != m->dstapic)
+	if (mp_irq->dstapic != m->dstapic)
 		return 1;
-	if (mp_irq->mp_type != m->type)
+	if (mp_irq->type != m->type)
 		return 2;
-	if (mp_irq->mp_irqtype != m->irqtype)
+	if (mp_irq->irqtype != m->irqtype)
 		return 3;
-	if (mp_irq->mp_irqflag != m->irqflag)
+	if (mp_irq->irqflag != m->irqflag)
 		return 4;
-	if (mp_irq->mp_srcbus != m->srcbus)
+	if (mp_irq->srcbus != m->srcbus)
 		return 5;
-	if (mp_irq->mp_srcbusirq != m->srcbusirq)
+	if (mp_irq->srcbusirq != m->srcbusirq)
 		return 6;
-	if (mp_irq->mp_dstirq != m->dstirq)
+	if (mp_irq->dstirq != m->dstirq)
 		return 7;
 
 	return 0;
@@ -808,15 +808,15 @@ static int  __init get_MP_intsrc_index(struct mpc_intsrc *m)
 	/* not legacy */
 
 	for (i = 0; i < mp_irq_entries; i++) {
-		if (mp_irqs[i].mp_irqtype != mp_INT)
+		if (mp_irqs[i].irqtype != mp_INT)
 			continue;
 
-		if (mp_irqs[i].mp_irqflag != 0x0f)
+		if (mp_irqs[i].irqflag != 0x0f)
 			continue;
 
-		if (mp_irqs[i].mp_srcbus != m->srcbus)
+		if (mp_irqs[i].srcbus != m->srcbus)
 			continue;
-		if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
+		if (mp_irqs[i].srcbusirq != m->srcbusirq)
 			continue;
 		if (irq_used[i]) {
 			/* already claimed */
@@ -921,10 +921,10 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
 		if (irq_used[i])
 			continue;
 
-		if (mp_irqs[i].mp_irqtype != mp_INT)
+		if (mp_irqs[i].irqtype != mp_INT)
 			continue;
 
-		if (mp_irqs[i].mp_irqflag != 0x0f)
+		if (mp_irqs[i].irqflag != 0x0f)
 			continue;
 
 		if (nr_m_spare > 0) {
-- 
cgit v1.2.3


From b665967979d0e990f196e7c4ba88e17c9ed9b781 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 12 Jan 2009 11:54:27 -0800
Subject: x86: make 32bit MAX_HARDIRQS_PER_CPU to be NR_VECTORS

Impact: clean up to be same as 64bit

32-bit is using per-cpu vector too, so don't use default NR_IRQS.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hardirq_32.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index cf7954d1405f..d4b5d731073f 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -19,6 +19,9 @@ typedef struct {
 
 DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
 
+/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
+#define MAX_HARDIRQS_PER_CPU NR_VECTORS
+
 #define __ARCH_IRQ_STAT
 #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
 
-- 
cgit v1.2.3


From f2a082711905312dc7b6675e913fee0c4689f7ae Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Thu, 15 Jan 2009 09:19:32 -0800
Subject: x86: fix build warning when CONFIG_NUMA not defined.

Impact: fix build warning

The macro cpu_to_node did not reference it's argument, and instead
simply returned a 0.  This causes a "unused variable" warning if
it's the only reference in a function (show_cache_disable).

Replace it with the more correct inline function.

Signed-off-by: Mike Travis <travis@sgi.com>
---
 arch/x86/include/asm/topology.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4e2f2e0aab27..d0c68e291635 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -192,9 +192,20 @@ extern int __node_distance(int, int);
 
 #else /* !CONFIG_NUMA */
 
-#define numa_node_id()		0
-#define	cpu_to_node(cpu)	0
-#define	early_cpu_to_node(cpu)	0
+static inline int numa_node_id(void)
+{
+	return 0;
+}
+
+static inline int cpu_to_node(int cpu)
+{
+	return 0;
+}
+
+static inline int early_cpu_to_node(int cpu)
+{
+	return 0;
+}
 
 static inline const cpumask_t *cpumask_of_node(int node)
 {
-- 
cgit v1.2.3


From 7de6883faad71e3a253d55b9e1a47b89ebce0a31 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:34 +0900
Subject: x86: fix pda_to_op()

There's no instruction to move a 64bit immediate into memory location.
Drop "i".

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pda.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 2fbfff88df37..cbd3f48a8320 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -78,7 +78,7 @@ do {									\
 	case 8:								\
 		asm(op "q %1,%%gs:%c2":					\
 		    "+m" (_proxy_pda.field) :				\
-		    "ri" ((T__)val),					\
+		    "r" ((T__)val),					\
 		    "i"(pda_offset(field)));				\
 		break;							\
 	default:							\
-- 
cgit v1.2.3


From f10fcd47120e80f66665567dbe17f5071c7aef52 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:34 +0900
Subject: x86: make early_per_cpu() a lvalue and use it

Make early_per_cpu() a lvalue as per_cpu() is and use it where
applicable.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/percpu.h   |  6 +++---
 arch/x86/include/asm/topology.h |  5 +----
 arch/x86/kernel/apic.c          | 13 ++-----------
 3 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece72053ba63..df644f3e53e6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -195,9 +195,9 @@ do {							\
 #define	early_per_cpu_ptr(_name) (_name##_early_ptr)
 #define	early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
 #define	early_per_cpu(_name, _cpu) 				\
-	(early_per_cpu_ptr(_name) ?				\
-		early_per_cpu_ptr(_name)[_cpu] :		\
-		per_cpu(_name, _cpu))
+	*(early_per_cpu_ptr(_name) ?				\
+		&early_per_cpu_ptr(_name)[_cpu] :		\
+		&per_cpu(_name, _cpu))
 
 #else	/* !CONFIG_SMP */
 #define	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)		\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4e2f2e0aab27..87ca3fd86e88 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -102,10 +102,7 @@ static inline int cpu_to_node(int cpu)
 /* Same function but used if called before per_cpu areas are setup */
 static inline int early_cpu_to_node(int cpu)
 {
-	if (early_per_cpu_ptr(x86_cpu_to_node_map))
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
-	return per_cpu(x86_cpu_to_node_map, cpu);
+	return early_per_cpu(x86_cpu_to_node_map, cpu);
 }
 
 /* Returns a pointer to the cpumask of CPUs on Node 'node'. */
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 38d6aab2358d..485787955834 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1877,17 +1877,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
 #endif
 
 #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-	/* are we being called early in kernel startup? */
-	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-		cpu_to_apicid[cpu] = apicid;
-		bios_cpu_apicid[cpu] = apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-	}
+	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
 #endif
 
 	set_cpu_possible(cpu, true);
-- 
cgit v1.2.3


From f32ff5388d86518c0375ccdb330d3b459b9c405e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: load pointer to pda into %gs while brining up a CPU

[ Based on original patch from Christoph Lameter and Mike Travis. ]

CPU startup code in head_64.S loaded address of a zero page into %gs
for temporary use till pda is loaded but address to the actual pda is
available at the point.  Load the real address directly instead.

This will help unifying percpu and pda handling later on.

This patch is mostly taken from Mike Travis' "x86_64: Fold pda into
per cpu area" patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/trampoline.h |  1 +
 arch/x86/kernel/acpi/sleep.c      |  1 +
 arch/x86/kernel/head64.c          |  4 ++--
 arch/x86/kernel/head_64.S         | 15 ++++++++++-----
 arch/x86/kernel/smpboot.c         |  1 +
 5 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 780ba0ab94f9..90f06c25221d 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,6 +13,7 @@ extern unsigned char *trampoline_base;
 
 extern unsigned long init_rsp;
 extern unsigned long initial_code;
+extern unsigned long initial_gs;
 
 #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
 #define TRAMPOLINE_BASE 0x6000
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95fa..9ff67f8dc2c0 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
 	stack_start.sp = temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
 			(unsigned long)get_cpu_gdt_table(smp_processor_id());
+	initial_gs = (unsigned long)cpu_pda(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
 	saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bc2900ca82c7..76ffba2aa66d 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,8 +26,8 @@
 #include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
 
-/* boot cpu pda */
-static struct x8664_pda _boot_cpu_pda;
+/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */
+struct x8664_pda _boot_cpu_pda;
 
 #ifdef CONFIG_SMP
 /*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7ee0363871e8..2f0ab0089883 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -243,12 +243,15 @@ ENTRY(secondary_startup_64)
 	movl %eax,%fs
 	movl %eax,%gs
 
-	/* 
-	 * Setup up a dummy PDA. this is just for some early bootup code
-	 * that does in_interrupt() 
-	 */ 
+	/* Set up %gs.
+	 *
+	 * %gs should point to the pda.  For initial boot, make %gs point
+	 * to the _boot_cpu_pda in data section.  For a secondary CPU,
+	 * initial_gs should be set to its pda address before the CPU runs
+	 * this code.
+	 */
 	movl	$MSR_GS_BASE,%ecx
-	movq	$empty_zero_page,%rax
+	movq	initial_gs(%rip),%rax
 	movq    %rax,%rdx
 	shrq	$32,%rdx
 	wrmsr	
@@ -274,6 +277,8 @@ ENTRY(secondary_startup_64)
 	.align	8
 	ENTRY(initial_code)
 	.quad	x86_64_start_kernel
+	ENTRY(initial_gs)
+	.quad	_boot_cpu_pda
 	__FINITDATA
 
 	ENTRY(stack_start)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 1a712da1dfa0..70d846628bbf 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -854,6 +854,7 @@ do_rest:
 #else
 	cpu_pda(cpu)->pcurrent = c_idle.idle;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+	initial_gs = (unsigned long)cpu_pda(cpu);
 #endif
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
-- 
cgit v1.2.3


From c8f3329a0ddd751241e96b4100df7eda14b2cbc6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: use static _cpu_pda array

_cpu_pda array first uses statically allocated storage in data.init
and then switches to allocated bootmem to conserve space.  However,
after folding pda area into percpu area, _cpu_pda array will be
removed completely.  Drop the reallocation part to simplify the code
for soon-to-follow changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pda.h     |  3 ++-
 arch/x86/kernel/cpu/common.c   |  2 +-
 arch/x86/kernel/head64.c       | 12 ------------
 arch/x86/kernel/setup_percpu.c | 14 +++-----------
 4 files changed, 6 insertions(+), 25 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index cbd3f48a8320..2d5b49c3248e 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/cache.h>
+#include <linux/threads.h>
 #include <asm/page.h>
 
 /* Per processor datastructure. %gs points to it while the kernel runs */
@@ -39,7 +40,7 @@ struct x8664_pda {
 	unsigned irq_spurious_count;
 } ____cacheline_aligned_in_smp;
 
-extern struct x8664_pda **_cpu_pda;
+extern struct x8664_pda *_cpu_pda[NR_CPUS];
 extern void pda_init(int);
 
 #define cpu_pda(i) (_cpu_pda[i])
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f00258462444..c116c599326e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -879,7 +879,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
-struct x8664_pda **_cpu_pda __read_mostly;
+struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(_cpu_pda);
 
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 76ffba2aa66d..462d0beccb6b 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -29,20 +29,8 @@
 /* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */
 struct x8664_pda _boot_cpu_pda;
 
-#ifdef CONFIG_SMP
-/*
- * We install an empty cpu_pda pointer table to indicate to early users
- * (numa_set_node) that the cpu_pda pointer table for cpus other than
- * the boot cpu is not yet setup.
- */
-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
-#else
-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
-#endif
-
 void __init x86_64_init_pda(void)
 {
-	_cpu_pda = __cpu_pda;
 	cpu_pda(0) = &_boot_cpu_pda;
 	cpu_pda(0)->data_offset =
 		(unsigned long)(__per_cpu_load - __per_cpu_start);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 44845842e722..73ab01b297c5 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -114,7 +114,6 @@ static inline void setup_cpu_pda_map(void) { }
 static void __init setup_cpu_pda_map(void)
 {
 	char *pda;
-	struct x8664_pda **new_cpu_pda;
 	unsigned long size;
 	int cpu;
 
@@ -122,28 +121,21 @@ static void __init setup_cpu_pda_map(void)
 
 	/* allocate cpu_pda array and pointer table */
 	{
-		unsigned long tsize = nr_cpu_ids * sizeof(void *);
 		unsigned long asize = size * (nr_cpu_ids - 1);
 
-		tsize = roundup(tsize, cache_line_size());
-		new_cpu_pda = alloc_bootmem(tsize + asize);
-		pda = (char *)new_cpu_pda + tsize;
+		pda = alloc_bootmem(asize);
 	}
 
 	/* initialize pointer table to static pda's */
 	for_each_possible_cpu(cpu) {
 		if (cpu == 0) {
 			/* leave boot cpu pda in place */
-			new_cpu_pda[0] = cpu_pda(0);
 			continue;
 		}
-		new_cpu_pda[cpu] = (struct x8664_pda *)pda;
-		new_cpu_pda[cpu]->in_bootmem = 1;
+		cpu_pda(cpu) = (struct x8664_pda *)pda;
+		cpu_pda(cpu)->in_bootmem = 1;
 		pda += size;
 	}
-
-	/* point to new pointer table */
-	_cpu_pda = new_cpu_pda;
 }
 
 #endif /* CONFIG_SMP && CONFIG_X86_64 */
-- 
cgit v1.2.3


From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: fold pda into percpu area on SMP

[ Based on original patch from Christoph Lameter and Mike Travis. ]

Currently pdas and percpu areas are allocated separately.  %gs points
to local pda and percpu area can be reached using pda->data_offset.
This patch folds pda into percpu area.

Due to strange gcc requirement, pda needs to be at the beginning of
the percpu area so that pda->stack_canary is at %gs:40.  To achieve
this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is
added and used to reserve pda sized chunk at the start of the percpu
area.

After this change, for boot cpu, %gs first points to pda in the
data.init area and later during setup_per_cpu_areas() gets updated to
point to the actual pda.  This means that setup_per_cpu_areas() need
to reload %gs for CPU0 while clearing pda area for other cpus as cpu0
already has modified it when control reaches setup_per_cpu_areas().

This patch also removes now unnecessary get_local_pda() and its call
sites.

A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into
per cpu area" patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/percpu.h     |   8 +++
 arch/x86/include/asm/smp.h        |   2 -
 arch/x86/kernel/asm-offsets_64.c  |   1 +
 arch/x86/kernel/cpu/common.c      |   6 +--
 arch/x86/kernel/head64.c          |   8 ++-
 arch/x86/kernel/head_64.S         |  15 ++++--
 arch/x86/kernel/setup_percpu.c    | 107 ++++++++++++++++----------------------
 arch/x86/kernel/smpboot.c         |  60 +--------------------
 arch/x86/kernel/vmlinux_64.lds.S  |   6 ++-
 arch/x86/xen/smp.c                |  10 ----
 include/asm-generic/vmlinux.lds.h |  25 ++++++++-
 11 files changed, 104 insertions(+), 144 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index df644f3e53e6..0ed77cf33f76 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -1,6 +1,14 @@
 #ifndef _ASM_X86_PERCPU_H
 #define _ASM_X86_PERCPU_H
 
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_64
+extern void load_pda_offset(int cpu);
+#else
+static inline void load_pda_offset(int cpu) { }
+#endif
+#endif
+
 #ifdef CONFIG_X86_64
 #include <linux/compiler.h>
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a8cea7b09434..127415402ea1 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -19,8 +19,6 @@
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
 
-extern int __cpuinit get_local_pda(int cpu);
-
 extern int smp_num_siblings;
 extern unsigned int num_processors;
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..f8d1b047ef4f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,7 @@ int main(void)
 	ENTRY(cpunumber);
 	ENTRY(irqstackptr);
 	ENTRY(data_offset);
+	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
 #undef ENTRY
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c116c599326e..7041acdf5579 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu)
 	/* Setup up data that may be needed in __get_free_pages early */
 	loadsegment(fs, 0);
 	loadsegment(gs, 0);
-	/* Memory clobbers used to order PDA accessed */
-	mb();
-	wrmsrl(MSR_GS_BASE, pda);
-	mb();
+
+	load_pda_offset(cpu);
 
 	pda->cpunumber = cpu;
 	pda->irqcount = -1;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 462d0beccb6b..1a311293f733 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,12 +26,18 @@
 #include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
 
-/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */
+#ifndef CONFIG_SMP
+/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */
 struct x8664_pda _boot_cpu_pda;
+#endif
 
 void __init x86_64_init_pda(void)
 {
+#ifdef CONFIG_SMP
+	cpu_pda(0) = (void *)__per_cpu_load;
+#else
 	cpu_pda(0) = &_boot_cpu_pda;
+#endif
 	cpu_pda(0)->data_offset =
 		(unsigned long)(__per_cpu_load - __per_cpu_start);
 	pda_init(0);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2f0ab0089883..7a995d0e9f78 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -245,10 +245,13 @@ ENTRY(secondary_startup_64)
 
 	/* Set up %gs.
 	 *
-	 * %gs should point to the pda.  For initial boot, make %gs point
-	 * to the _boot_cpu_pda in data section.  For a secondary CPU,
-	 * initial_gs should be set to its pda address before the CPU runs
-	 * this code.
+	 * On SMP, %gs should point to the per-cpu area.  For initial
+	 * boot, make %gs point to the init data section.  For a
+	 * secondary CPU,initial_gs should be set to its pda address
+	 * before the CPU runs this code.
+	 *
+	 * On UP, initial_gs points to _boot_cpu_pda and doesn't
+	 * change.
 	 */
 	movl	$MSR_GS_BASE,%ecx
 	movq	initial_gs(%rip),%rax
@@ -278,7 +281,11 @@ ENTRY(secondary_startup_64)
 	ENTRY(initial_code)
 	.quad	x86_64_start_kernel
 	ENTRY(initial_gs)
+#ifdef CONFIG_SMP
+	.quad	__per_cpu_load
+#else
 	.quad	_boot_cpu_pda
+#endif
 	__FINITDATA
 
 	ENTRY(stack_start)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 73ab01b297c5..63d462802272 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,6 +13,7 @@
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
 #include <asm/highmem.h>
+#include <asm/proto.h>
 #include <asm/cpumask.h>
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
@@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void);
 static inline void setup_node_to_cpumask_map(void) { }
 #endif
 
+#ifdef CONFIG_X86_64
+void __cpuinit load_pda_offset(int cpu)
+{
+	/* Memory clobbers used to order pda/percpu accesses */
+	mb();
+	wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
+	mb();
+}
+
+#endif /* CONFIG_SMP && CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_64
+
+/* correctly size the local cpu masks */
+static void setup_cpu_local_masks(void)
+{
+	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+	alloc_bootmem_cpumask_var(&cpu_callin_mask);
+	alloc_bootmem_cpumask_var(&cpu_callout_mask);
+	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
+#else /* CONFIG_X86_32 */
+
+static inline void setup_cpu_local_masks(void)
+{
+}
+
+#endif /* CONFIG_X86_32 */
+
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 /*
  * Copy data used in early init routines from the initial arrays to the
@@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void)
  */
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
-static inline void setup_cpu_pda_map(void) { }
-
-#elif !defined(CONFIG_SMP)
-static inline void setup_cpu_pda_map(void) { }
-
-#else /* CONFIG_SMP && CONFIG_X86_64 */
-
-/*
- * Allocate cpu_pda pointer table and array via alloc_bootmem.
- */
-static void __init setup_cpu_pda_map(void)
-{
-	char *pda;
-	unsigned long size;
-	int cpu;
-
-	size = roundup(sizeof(struct x8664_pda), cache_line_size());
-
-	/* allocate cpu_pda array and pointer table */
-	{
-		unsigned long asize = size * (nr_cpu_ids - 1);
-
-		pda = alloc_bootmem(asize);
-	}
-
-	/* initialize pointer table to static pda's */
-	for_each_possible_cpu(cpu) {
-		if (cpu == 0) {
-			/* leave boot cpu pda in place */
-			continue;
-		}
-		cpu_pda(cpu) = (struct x8664_pda *)pda;
-		cpu_pda(cpu)->in_bootmem = 1;
-		pda += size;
-	}
-}
-
-#endif /* CONFIG_SMP && CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_64
-
-/* correctly size the local cpu masks */
-static void setup_cpu_local_masks(void)
-{
-	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-	alloc_bootmem_cpumask_var(&cpu_callin_mask);
-	alloc_bootmem_cpumask_var(&cpu_callout_mask);
-	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
-}
-
-#else /* CONFIG_X86_32 */
-
-static inline void setup_cpu_local_masks(void)
-{
-}
-
-#endif /* CONFIG_X86_32 */
+#endif
 
 /*
  * Great future plan:
@@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void)
 	int cpu;
 	unsigned long align = 1;
 
-	/* Setup cpu_pda map */
-	setup_cpu_pda_map();
-
 	/* Copy section for each CPU (we discard the original) */
 	old_size = PERCPU_ENOUGH_ROOM;
 	align = max_t(unsigned long, PAGE_SIZE, align);
@@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void)
 				cpu, node, __pa(ptr));
 		}
 #endif
-		per_cpu_offset(cpu) = ptr - __per_cpu_start;
+
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
+#ifdef CONFIG_X86_64
+		cpu_pda(cpu) = (void *)ptr;
+
+		/*
+		 * CPU0 modified pda in the init data area, reload pda
+		 * offset for CPU0 and clear the area for others.
+		 */
+		if (cpu == 0)
+			load_pda_offset(0);
+		else
+			memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
+#endif
+		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 70d846628bbf..f2f77ca494d4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
 	complete(&c_idle->done);
 }
 
-#ifdef CONFIG_X86_64
-
-/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
-static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
-{
-	if (!after_bootmem)
-		free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
-}
-
-/*
- * Allocate node local memory for the AP pda.
- *
- * Must be called after the _cpu_pda pointer table is initialized.
- */
-int __cpuinit get_local_pda(int cpu)
-{
-	struct x8664_pda *oldpda, *newpda;
-	unsigned long size = sizeof(struct x8664_pda);
-	int node = cpu_to_node(cpu);
-
-	if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
-		return 0;
-
-	oldpda = cpu_pda(cpu);
-	newpda = kmalloc_node(size, GFP_ATOMIC, node);
-	if (!newpda) {
-		printk(KERN_ERR "Could not allocate node local PDA "
-			"for CPU %d on node %d\n", cpu, node);
-
-		if (oldpda)
-			return 0;	/* have a usable pda */
-		else
-			return -1;
-	}
-
-	if (oldpda) {
-		memcpy(newpda, oldpda, size);
-		free_bootmem_pda(oldpda);
-	}
-
-	newpda->in_bootmem = 0;
-	cpu_pda(cpu) = newpda;
-	return 0;
-}
-#endif /* CONFIG_X86_64 */
-
 static int __cpuinit do_boot_cpu(int apicid, int cpu)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	};
 	INIT_WORK(&c_idle.work, do_fork_idle);
 
-#ifdef CONFIG_X86_64
-	/* Allocate node local memory for AP pdas */
-	if (cpu > 0) {
-		boot_error = get_local_pda(cpu);
-		if (boot_error)
-			goto restore_state;
-			/* if can't get pda memory, can't start cpu */
-	}
-#endif
-
 	alternatives_smp_switch(1);
 
 	c_idle.idle = get_idle_for_cpu(cpu);
@@ -931,9 +875,7 @@ do_rest:
 				inquire_remote_apic(apicid);
 		}
 	}
-#ifdef CONFIG_X86_64
-restore_state:
-#endif
+
 	if (boot_error) {
 		/* Try to put things back the way they were before ... */
 		numa_remove_cpu(cpu); /* was set by numa_add_cpu */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index f50280db0dfe..962f21f1d4d7 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
 #define LOAD_OFFSET __START_KERNEL_map
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/asm-offsets.h>
 #include <asm/page.h>
 
 #undef i386	/* in case the preprocessor is a 32bit one */
@@ -215,10 +216,11 @@ SECTIONS
   /*
    * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
    * output PHDR, so the next output section - __data_nosave - should
-   * switch it back to data.init.
+   * switch it back to data.init.  Also, pda should be at the head of
+   * percpu area.  Preallocate it.
    */
   . = ALIGN(PAGE_SIZE);
-  PERCPU_VADDR(0, :percpu)
+  PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
 #else
   PERCPU(PAGE_SIZE)
 #endif
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c44e2069c7c7..83fa4236477d 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -283,16 +283,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	struct task_struct *idle = idle_task(cpu);
 	int rc;
 
-#ifdef CONFIG_X86_64
-	/* Allocate node local memory for AP pdas */
-	WARN_ON(cpu == 0);
-	if (cpu > 0) {
-		rc = get_local_pda(cpu);
-		if (rc)
-			return rc;
-	}
-#endif
-
 #ifdef CONFIG_X86_32
 	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fc2f55f2dcd6..e53319cf29cb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -441,9 +441,10 @@
 	. = __per_cpu_load + SIZEOF(.data.percpu);
 
 /**
- * PERCPU_VADDR - define output section for percpu area
+ * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc
  * @vaddr: explicit base address (optional)
  * @phdr: destination PHDR (optional)
+ * @prealloc: the size of prealloc area
  *
  * Macro which expands to output section for percpu area.  If @vaddr
  * is not blank, it specifies explicit base address and all percpu
@@ -455,11 +456,33 @@
  * section in the linker script will go there too.  @phdr should have
  * a leading colon.
  *
+ * If @prealloc is non-zero, the specified number of bytes will be
+ * reserved at the start of percpu area.  As the prealloc area is
+ * likely to break alignment, this macro puts areas in increasing
+ * alignment order.
+ *
  * This macro defines three symbols, __per_cpu_load, __per_cpu_start
  * and __per_cpu_end.  The first one is the vaddr of loaded percpu
  * init data.  __per_cpu_start equals @vaddr and __per_cpu_end is the
  * end offset.
  */
+#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc)			\
+	PERCPU_PROLOG(vaddr)						\
+		. += prealloc;						\
+		*(.data.percpu)						\
+		*(.data.percpu.shared_aligned)				\
+		*(.data.percpu.page_aligned)				\
+	PERCPU_EPILOG(segment)
+
+/**
+ * PERCPU_VADDR - define output section for percpu area
+ * @vaddr: explicit base address (optional)
+ * @phdr: destination PHDR (optional)
+ *
+ * Macro which expands to output section for percpu area.  Mostly
+ * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
+ * using slighly different layout.
+ */
 #define PERCPU_VADDR(vaddr, phdr)					\
 	PERCPU_PROLOG(vaddr)						\
 		*(.data.percpu.page_aligned)				\
-- 
cgit v1.2.3


From 9939ddaff52787b2a7c1adf1b2afc95421aa0884 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: merge 64 and 32 SMP percpu handling

Now that pda is allocated as part of percpu, percpu doesn't need to be
accessed through pda.  Unify x86_64 SMP percpu access with x86_32 SMP
one.  Other than the segment register, operand size and the base of
percpu symbols, they behave identical now.

This patch replaces now unnecessary pda->data_offset with a dummy
field which is necessary to keep stack_canary at its place.  This
patch also moves per_cpu_offset initialization out of init_gdt() into
setup_per_cpu_areas().  Note that this change also necessitates
explicit per_cpu_offset initializations in voyager_smp.c.

With this change, x86_OP_percpu()'s are as efficient on x86_64 as on
x86_32 and also x86_64 can use assembly PER_CPU macros.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pda.h          |   3 +-
 arch/x86/include/asm/percpu.h       | 127 +++++++++++-------------------------
 arch/x86/kernel/asm-offsets_64.c    |   1 -
 arch/x86/kernel/entry_64.S          |   7 +-
 arch/x86/kernel/head64.c            |   2 -
 arch/x86/kernel/setup_percpu.c      |  15 +++--
 arch/x86/kernel/smpcommon.c         |   3 +-
 arch/x86/mach-voyager/voyager_smp.c |   2 +
 8 files changed, 55 insertions(+), 105 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 2d5b49c3248e..e91558e37850 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -11,8 +11,7 @@
 /* Per processor datastructure. %gs points to it while the kernel runs */
 struct x8664_pda {
 	struct task_struct *pcurrent;	/* 0  Current process */
-	unsigned long data_offset;	/* 8 Per cpu data offset from linker
-					   address */
+	unsigned long dummy;
 	unsigned long kernelstack;	/* 16 top of kernel stack for current */
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0ed77cf33f76..556f84b9ea96 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -1,62 +1,13 @@
 #ifndef _ASM_X86_PERCPU_H
 #define _ASM_X86_PERCPU_H
 
-#ifndef __ASSEMBLY__
 #ifdef CONFIG_X86_64
-extern void load_pda_offset(int cpu);
+#define __percpu_seg		gs
+#define __percpu_mov_op		movq
 #else
-static inline void load_pda_offset(int cpu) { }
-#endif
-#endif
-
-#ifdef CONFIG_X86_64
-#include <linux/compiler.h>
-
-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
-   in the PDA. Longer term the PDA and every per cpu variable
-   should be just put into a single section and referenced directly
-   from %gs */
-
-#ifdef CONFIG_SMP
-#include <asm/pda.h>
-
-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
-#define __my_cpu_offset read_pda(data_offset)
-
-#define per_cpu_offset(x) (__per_cpu_offset(x))
-
+#define __percpu_seg		fs
+#define __percpu_mov_op		movl
 #endif
-#include <asm-generic/percpu.h>
-
-DECLARE_PER_CPU(struct x8664_pda, pda);
-
-/*
- * These are supposed to be implemented as a single instruction which
- * operates on the per-cpu data base segment.  x86-64 doesn't have
- * that yet, so this is a fairly inefficient workaround for the
- * meantime.  The single instruction is atomic with respect to
- * preemption and interrupts, so we need to explicitly disable
- * interrupts here to achieve the same effect.  However, because it
- * can be used from within interrupt-disable/enable, we can't actually
- * disable interrupts; disabling preemption is enough.
- */
-#define x86_read_percpu(var)						\
-	({								\
-		typeof(per_cpu_var(var)) __tmp;				\
-		preempt_disable();					\
-		__tmp = __get_cpu_var(var);				\
-		preempt_enable();					\
-		__tmp;							\
-	})
-
-#define x86_write_percpu(var, val)					\
-	do {								\
-		preempt_disable();					\
-		__get_cpu_var(var) = (val);				\
-		preempt_enable();					\
-	} while(0)
-
-#else /* CONFIG_X86_64 */
 
 #ifdef __ASSEMBLY__
 
@@ -73,42 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
  *    PER_CPU(cpu_gdt_descr, %ebx)
  */
 #ifdef CONFIG_SMP
-#define PER_CPU(var, reg)				\
-	movl %fs:per_cpu__##this_cpu_off, reg;		\
+#define PER_CPU(var, reg)						\
+	__percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg;	\
 	lea per_cpu__##var(reg), reg
-#define PER_CPU_VAR(var)	%fs:per_cpu__##var
+#define PER_CPU_VAR(var)	%__percpu_seg:per_cpu__##var
 #else /* ! SMP */
-#define PER_CPU(var, reg)			\
-	movl $per_cpu__##var, reg
+#define PER_CPU(var, reg)						\
+	__percpu_mov_op $per_cpu__##var, reg
 #define PER_CPU_VAR(var)	per_cpu__##var
 #endif	/* SMP */
 
 #else /* ...!ASSEMBLY */
 
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- *    var - variable name
- *    cpu - 32bit register containing the current CPU number
- *
- * The resulting address is stored in the "cpu" argument.
- *
- * Example:
- *    PER_CPU(cpu_gdt_descr, %ebx)
- */
-#ifdef CONFIG_SMP
-
-#define __my_cpu_offset x86_read_percpu(this_cpu_off)
-
-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
-#define __percpu_seg "%%fs:"
+#include <linux/stringify.h>
 
-#else  /* !SMP */
-
-#define __percpu_seg ""
-
-#endif	/* SMP */
+#ifdef CONFIG_SMP
+#define __percpu_seg_str	"%%"__stringify(__percpu_seg)":"
+#define __my_cpu_offset		x86_read_percpu(this_cpu_off)
+#else
+#define __percpu_seg_str
+#endif
 
 #include <asm-generic/percpu.h>
 
@@ -128,20 +63,25 @@ do {							\
 	}						\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b %1,"__percpu_seg"%0"		\
+		asm(op "b %1,"__percpu_seg_str"%0"	\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		break;					\
 	case 2:						\
-		asm(op "w %1,"__percpu_seg"%0"		\
+		asm(op "w %1,"__percpu_seg_str"%0"	\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		break;					\
 	case 4:						\
-		asm(op "l %1,"__percpu_seg"%0"		\
+		asm(op "l %1,"__percpu_seg_str"%0"	\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		break;					\
+	case 8:						\
+		asm(op "q %1,"__percpu_seg_str"%0"	\
+		    : "+m" (var)			\
+		    : "r" ((T__)val));			\
+		break;					\
 	default: __bad_percpu_size();			\
 	}						\
 } while (0)
@@ -151,17 +91,22 @@ do {							\
 	typeof(var) ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b "__percpu_seg"%1,%0"		\
+		asm(op "b "__percpu_seg_str"%1,%0"	\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 2:						\
-		asm(op "w "__percpu_seg"%1,%0"		\
+		asm(op "w "__percpu_seg_str"%1,%0"	\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 4:						\
-		asm(op "l "__percpu_seg"%1,%0"		\
+		asm(op "l "__percpu_seg_str"%1,%0"	\
+		    : "=r" (ret__)			\
+		    : "m" (var));			\
+		break;					\
+	case 8:						\
+		asm(op "q "__percpu_seg_str"%1,%0"	\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
@@ -175,8 +120,14 @@ do {							\
 #define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
 #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
 #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+
+#ifdef CONFIG_X86_64
+extern void load_pda_offset(int cpu);
+#else
+static inline void load_pda_offset(int cpu) { }
+#endif
+
 #endif /* !__ASSEMBLY__ */
-#endif /* !CONFIG_X86_64 */
 
 #ifdef CONFIG_SMP
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f8d1b047ef4f..f4cc81bfbf89 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -55,7 +55,6 @@ int main(void)
 	ENTRY(irqcount);
 	ENTRY(cpunumber);
 	ENTRY(irqstackptr);
-	ENTRY(data_offset);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
 #undef ENTRY
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e28c7a987793..4833f3a19650 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
 #include <asm/irqflags.h>
 #include <asm/paravirt.h>
 #include <asm/ftrace.h>
+#include <asm/percpu.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -1072,10 +1073,10 @@ ENTRY(\sym)
 	TRACE_IRQS_OFF
 	movq %rsp,%rdi		/* pt_regs pointer */
 	xorl %esi,%esi		/* no error code */
-	movq %gs:pda_data_offset, %rbp
-	subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+	PER_CPU(init_tss, %rbp)
+	subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
 	call \do_sym
-	addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+	addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
 	jmp paranoid_exit	/* %ebx: no swapgs flag */
 	CFI_ENDPROC
 END(\sym)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1a311293f733..e99b661a97f4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -38,8 +38,6 @@ void __init x86_64_init_pda(void)
 #else
 	cpu_pda(0) = &_boot_cpu_pda;
 #endif
-	cpu_pda(0)->data_offset =
-		(unsigned long)(__per_cpu_load - __per_cpu_start);
 	pda_init(0);
 }
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 63d462802272..be1ff34db112 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -125,14 +125,14 @@ static void __init setup_per_cpu_maps(void)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-/*
- * Great future not-so-futuristic plan: make i386 and x86_64 do it
- * the same way
- */
+#ifdef CONFIG_X86_64
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+	[0] = (unsigned long)__per_cpu_load,
+};
+#else
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
 #endif
+EXPORT_SYMBOL(__per_cpu_offset);
 
 /*
  * Great future plan:
@@ -178,6 +178,7 @@ void __init setup_per_cpu_areas(void)
 #endif
 
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
+		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 #ifdef CONFIG_X86_64
 		cpu_pda(cpu) = (void *)ptr;
 
@@ -190,7 +191,7 @@ void __init setup_per_cpu_areas(void)
 		else
 			memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
 #endif
-		per_cpu_offset(cpu) = ptr - __per_cpu_start;
+		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 397e309839dd..84395fabc410 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -4,10 +4,10 @@
 #include <linux/module.h>
 #include <asm/smp.h>
 
-#ifdef CONFIG_X86_32
 DEFINE_PER_CPU(unsigned long, this_cpu_off);
 EXPORT_PER_CPU_SYMBOL(this_cpu_off);
 
+#ifdef CONFIG_X86_32
 /*
  * Initialize the CPU's GDT.  This is either the boot CPU doing itself
  * (still using the master per-cpu area), or a CPU doing it for a
@@ -24,7 +24,6 @@ __cpuinit void init_gdt(int cpu)
 	write_gdt_entry(get_cpu_gdt_table(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 
-	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
 	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9840b7ec749a..1a48368acb09 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -531,6 +531,7 @@ static void __init do_boot_cpu(__u8 cpu)
 	stack_start.sp = (void *)idle->thread.sp;
 
 	init_gdt(cpu);
+	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
 	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	irq_ctx_init(cpu);
@@ -1748,6 +1749,7 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
 static void __cpuinit voyager_smp_prepare_boot_cpu(void)
 {
 	init_gdt(smp_processor_id());
+	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
 	switch_to_new_gdt();
 
 	cpu_set(smp_processor_id(), cpu_online_map);
-- 
cgit v1.2.3


From b12d8db8fbfaed1e8222a15333a3645599636854 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: make pda a percpu variable

[ Based on original patch from Christoph Lameter and Mike Travis. ]

As pda is now allocated in percpu area, it can easily be made a proper
percpu variable.  Make it so by defining per cpu symbol from linker
script and declaring it in C code for SMP and simply defining it for
UP.  This change cleans up code and brings SMP and UP closer a bit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pda.h       |  5 +++--
 arch/x86/kernel/cpu/common.c     |  3 ---
 arch/x86/kernel/head64.c         | 10 ----------
 arch/x86/kernel/head_64.S        |  5 +++--
 arch/x86/kernel/setup_percpu.c   | 16 ++++++++++++++--
 arch/x86/kernel/vmlinux_64.lds.S |  4 +++-
 6 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index e91558e37850..66ae1043393d 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -7,6 +7,7 @@
 #include <linux/cache.h>
 #include <linux/threads.h>
 #include <asm/page.h>
+#include <asm/percpu.h>
 
 /* Per processor datastructure. %gs points to it while the kernel runs */
 struct x8664_pda {
@@ -39,10 +40,10 @@ struct x8664_pda {
 	unsigned irq_spurious_count;
 } ____cacheline_aligned_in_smp;
 
-extern struct x8664_pda *_cpu_pda[NR_CPUS];
+DECLARE_PER_CPU(struct x8664_pda, __pda);
 extern void pda_init(int);
 
-#define cpu_pda(i) (_cpu_pda[i])
+#define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
 
 /*
  * There is no fast way to get the base address of the PDA, all the accesses
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7041acdf5579..c49498d40830 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -879,9 +879,6 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
-
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
 static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index e99b661a97f4..71b6f6ec96a2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,18 +26,8 @@
 #include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
 
-#ifndef CONFIG_SMP
-/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */
-struct x8664_pda _boot_cpu_pda;
-#endif
-
 void __init x86_64_init_pda(void)
 {
-#ifdef CONFIG_SMP
-	cpu_pda(0) = (void *)__per_cpu_load;
-#else
-	cpu_pda(0) = &_boot_cpu_pda;
-#endif
 	pda_init(0);
 }
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7a995d0e9f78..c8ace880661b 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
 #include <asm/msr.h>
 #include <asm/cache.h>
 #include <asm/processor-flags.h>
+#include <asm/percpu.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -250,7 +251,7 @@ ENTRY(secondary_startup_64)
 	 * secondary CPU,initial_gs should be set to its pda address
 	 * before the CPU runs this code.
 	 *
-	 * On UP, initial_gs points to _boot_cpu_pda and doesn't
+	 * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
 	 * change.
 	 */
 	movl	$MSR_GS_BASE,%ecx
@@ -284,7 +285,7 @@ ENTRY(secondary_startup_64)
 #ifdef CONFIG_SMP
 	.quad	__per_cpu_load
 #else
-	.quad	_boot_cpu_pda
+	.quad	PER_CPU_VAR(__pda)
 #endif
 	__FINITDATA
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index be1ff34db112..daeedf82c15f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -66,6 +66,16 @@ static void __init setup_node_to_cpumask_map(void);
 static inline void setup_node_to_cpumask_map(void) { }
 #endif
 
+/*
+ * Define load_pda_offset() and per-cpu __pda for x86_64.
+ * load_pda_offset() is responsible for loading the offset of pda into
+ * %gs.
+ *
+ * On SMP, pda offset also duals as percpu base address and thus it
+ * should be at the start of per-cpu area.  To achieve this, it's
+ * preallocated in vmlinux_64.lds.S directly instead of using
+ * DEFINE_PER_CPU().
+ */
 #ifdef CONFIG_X86_64
 void __cpuinit load_pda_offset(int cpu)
 {
@@ -74,6 +84,10 @@ void __cpuinit load_pda_offset(int cpu)
 	wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
 	mb();
 }
+#ifndef CONFIG_SMP
+DEFINE_PER_CPU(struct x8664_pda, __pda);
+EXPORT_PER_CPU_SYMBOL(__pda);
+#endif
 
 #endif /* CONFIG_SMP && CONFIG_X86_64 */
 
@@ -180,8 +194,6 @@ void __init setup_per_cpu_areas(void)
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 #ifdef CONFIG_X86_64
-		cpu_pda(cpu) = (void *)ptr;
-
 		/*
 		 * CPU0 modified pda in the init data area, reload pda
 		 * offset for CPU0 and clear the area for others.
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 962f21f1d4d7..d2a0baa87d1b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -217,10 +217,12 @@ SECTIONS
    * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
    * output PHDR, so the next output section - __data_nosave - should
    * switch it back to data.init.  Also, pda should be at the head of
-   * percpu area.  Preallocate it.
+   * percpu area.  Preallocate it and define the percpu offset symbol
+   * so that it can be accessed as a percpu variable.
    */
   . = ALIGN(PAGE_SIZE);
   PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
+  per_cpu____pda = __per_cpu_start;
 #else
   PERCPU(PAGE_SIZE)
 #endif
-- 
cgit v1.2.3


From 49357d19e4fb31e28796eaff83499e7584c26878 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: convert pda ops to wrappers around x86 percpu accessors

pda is now a percpu variable and there's no reason it can't use plain
x86 percpu accessors.  Add x86_test_and_clear_bit_percpu() and replace
pda op implementations with wrappers around x86 percpu accessors.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pda.h       | 88 +++-------------------------------------
 arch/x86/include/asm/percpu.h    | 10 +++++
 arch/x86/kernel/vmlinux_64.lds.S |  1 -
 arch/x86/kernel/x8664_ksyms_64.c |  2 -
 4 files changed, 16 insertions(+), 85 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 66ae1043393d..e3d3a081d798 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -45,91 +45,15 @@ extern void pda_init(int);
 
 #define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
 
-/*
- * There is no fast way to get the base address of the PDA, all the accesses
- * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
- */
-extern void __bad_pda_field(void) __attribute__((noreturn));
-
-/*
- * proxy_pda doesn't actually exist, but tell gcc it is accessed for
- * all PDA accesses so it gets read/write dependencies right.
- */
-extern struct x8664_pda _proxy_pda;
-
-#define pda_offset(field) offsetof(struct x8664_pda, field)
-
-#define pda_to_op(op, field, val)					\
-do {									\
-	typedef typeof(_proxy_pda.field) T__;				\
-	if (0) { T__ tmp__; tmp__ = (val); }	/* type checking */	\
-	switch (sizeof(_proxy_pda.field)) {				\
-	case 2:								\
-		asm(op "w %1,%%gs:%c2" :				\
-		    "+m" (_proxy_pda.field) :				\
-		    "ri" ((T__)val),					\
-		    "i"(pda_offset(field)));				\
-		break;							\
-	case 4:								\
-		asm(op "l %1,%%gs:%c2" :				\
-		    "+m" (_proxy_pda.field) :				\
-		    "ri" ((T__)val),					\
-		    "i" (pda_offset(field)));				\
-		break;							\
-	case 8:								\
-		asm(op "q %1,%%gs:%c2":					\
-		    "+m" (_proxy_pda.field) :				\
-		    "r" ((T__)val),					\
-		    "i"(pda_offset(field)));				\
-		break;							\
-	default:							\
-		__bad_pda_field();					\
-	}								\
-} while (0)
-
-#define pda_from_op(op, field)			\
-({						\
-	typeof(_proxy_pda.field) ret__;		\
-	switch (sizeof(_proxy_pda.field)) {	\
-	case 2:					\
-		asm(op "w %%gs:%c1,%0" :	\
-		    "=r" (ret__) :		\
-		    "i" (pda_offset(field)),	\
-		    "m" (_proxy_pda.field));	\
-		break;				\
-	case 4:					\
-		asm(op "l %%gs:%c1,%0":		\
-		    "=r" (ret__):		\
-		    "i" (pda_offset(field)),	\
-		    "m" (_proxy_pda.field));	\
-		break;				\
-	case 8:					\
-		asm(op "q %%gs:%c1,%0":		\
-		    "=r" (ret__) :		\
-		    "i" (pda_offset(field)),	\
-		    "m" (_proxy_pda.field));	\
-		break;				\
-	default:				\
-		__bad_pda_field();		\
-	}					\
-	ret__;					\
-})
-
-#define read_pda(field)		pda_from_op("mov", field)
-#define write_pda(field, val)	pda_to_op("mov", field, val)
-#define add_pda(field, val)	pda_to_op("add", field, val)
-#define sub_pda(field, val)	pda_to_op("sub", field, val)
-#define or_pda(field, val)	pda_to_op("or", field, val)
+#define read_pda(field)		x86_read_percpu(__pda.field)
+#define write_pda(field, val)	x86_write_percpu(__pda.field, val)
+#define add_pda(field, val)	x86_add_percpu(__pda.field, val)
+#define sub_pda(field, val)	x86_sub_percpu(__pda.field, val)
+#define or_pda(field, val)	x86_or_percpu(__pda.field, val)
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define test_and_clear_bit_pda(bit, field)				\
-({									\
-	int old__;							\
-	asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0"			\
-		     : "=r" (old__), "+m" (_proxy_pda.field)		\
-		     : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
-	old__;								\
-})
+	x86_test_and_clear_bit_percpu(bit, __pda.field)
 
 #endif
 
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 556f84b9ea96..328b31a429d7 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -121,6 +121,16 @@ do {							\
 #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
 #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
 
+/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+#define x86_test_and_clear_bit_percpu(bit, var)				\
+({									\
+	int old__;							\
+	asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0"	\
+		     : "=r" (old__)					\
+		     : "dIr" (bit), "i" (&per_cpu__##var) : "memory");	\
+	old__;								\
+})
+
 #ifdef CONFIG_X86_64
 extern void load_pda_offset(int cpu);
 #else
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index d2a0baa87d1b..a09abb8fb97f 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -14,7 +14,6 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
 OUTPUT_ARCH(i386:x86-64)
 ENTRY(phys_startup_64)
 jiffies_64 = jiffies;
-_proxy_pda = 1;
 PHDRS {
 	text PT_LOAD FLAGS(5);	/* R_E */
 	data PT_LOAD FLAGS(7);	/* RWE */
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(init_level4_pgt);
 EXPORT_SYMBOL(load_gs_index);
-
-EXPORT_SYMBOL(_proxy_pda);
-- 
cgit v1.2.3


From 004aa322f855a765741d9437a98dd8fe2e4f32a6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 Jan 2009 20:41:35 +0900
Subject: x86: misc clean up after the percpu update

Do the following cleanups:

* kill x86_64_init_pda() which now is equivalent to pda_init()

* use per_cpu_offset() instead of cpu_pda() when initializing
  initial_gs

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/setup.h | 1 -
 arch/x86/kernel/acpi/sleep.c | 2 +-
 arch/x86/kernel/head64.c     | 7 +------
 arch/x86/kernel/smpboot.c    | 2 +-
 arch/x86/xen/enlighten.c     | 2 +-
 5 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858cdc8a3..536949749bc2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start;
 extern unsigned long init_pg_tables_end;
 
 #else
-void __init x86_64_init_pda(void);
 void __init x86_64_start_kernel(char *real_mode);
 void __init x86_64_start_reservations(char *real_mode_data);
 
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 9ff67f8dc2c0..4abff454c55b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int acpi_save_state_mem(void)
 	stack_start.sp = temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
 			(unsigned long)get_cpu_gdt_table(smp_processor_id());
-	initial_gs = (unsigned long)cpu_pda(smp_processor_id());
+	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
 	saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 71b6f6ec96a2..af67d3227ea6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,11 +26,6 @@
 #include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
 
-void __init x86_64_init_pda(void)
-{
-	pda_init(0);
-}
-
 static void __init zap_identity_mappings(void)
 {
 	pgd_t *pgd = pgd_offset_k(0UL);
@@ -96,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	if (console_loglevel == 10)
 		early_printk("Kernel alive\n");
 
-	x86_64_init_pda();
+	pda_init(0);
 
 	x86_64_start_reservations(real_mode_data);
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f2f77ca494d4..2f0e0f1090f6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -798,7 +798,7 @@ do_rest:
 #else
 	cpu_pda(cpu)->pcurrent = c_idle.idle;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
-	initial_gs = (unsigned long)cpu_pda(cpu);
+	initial_gs = per_cpu_offset(cpu);
 #endif
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 965539ec425f..312414ef9365 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1645,7 +1645,7 @@ asmlinkage void __init xen_start_kernel(void)
 #ifdef CONFIG_X86_64
 	/* Disable until direct per-cpu data access. */
 	have_vcpu_info_placement = 0;
-	x86_64_init_pda();
+	pda_init(0);
 #endif
 
 	xen_smp_init();
-- 
cgit v1.2.3


From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 15 Jan 2009 22:15:53 +0900
Subject: percpu: add optimized generic percpu accessors

It is an optimization and a cleanup, and adds the following new
generic percpu methods:

  percpu_read()
  percpu_write()
  percpu_add()
  percpu_sub()
  percpu_and()
  percpu_or()
  percpu_xor()

and implements support for them on x86. (other architectures will fall
back to a default implementation)

The advantage is that for example to read a local percpu variable,
instead of this sequence:

 return __get_cpu_var(var);

 ffffffff8102ca2b:	48 8b 14 fd 80 09 74 	mov    -0x7e8bf680(,%rdi,8),%rdx
 ffffffff8102ca32:	81
 ffffffff8102ca33:	48 c7 c0 d8 59 00 00 	mov    $0x59d8,%rax
 ffffffff8102ca3a:	48 8b 04 10          	mov    (%rax,%rdx,1),%rax

We can get a single instruction by using the optimized variants:

 return percpu_read(var);

 ffffffff8102ca3f:	65 48 8b 05 91 8f fd 	mov    %gs:0x7efd8f91(%rip),%rax

I also cleaned up the x86-specific APIs and made the x86 code use
these new generic percpu primitives.

tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out
    * added percpu_and() for completeness's sake
    * made generic percpu ops atomic against preemption

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/current.h        |  2 +-
 arch/x86/include/asm/irq_regs_32.h    |  4 +--
 arch/x86/include/asm/mmu_context_32.h | 12 ++++----
 arch/x86/include/asm/pda.h            | 10 +++----
 arch/x86/include/asm/percpu.h         | 24 ++++++++--------
 arch/x86/include/asm/smp.h            |  2 +-
 arch/x86/kernel/process_32.c          |  2 +-
 arch/x86/kernel/tlb_32.c              | 10 +++----
 arch/x86/mach-voyager/voyager_smp.c   |  4 +--
 arch/x86/xen/enlighten.c              | 14 +++++-----
 arch/x86/xen/irq.c                    |  8 +++---
 arch/x86/xen/mmu.c                    |  2 +-
 arch/x86/xen/multicalls.h             |  2 +-
 arch/x86/xen/smp.c                    |  2 +-
 include/asm-generic/percpu.h          | 52 +++++++++++++++++++++++++++++++++++
 15 files changed, 102 insertions(+), 48 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..0728480f5c56 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -10,7 +10,7 @@ struct task_struct;
 DECLARE_PER_CPU(struct task_struct *, current_task);
 static __always_inline struct task_struct *get_current(void)
 {
-	return x86_read_percpu(current_task);
+	return percpu_read(current_task);
 }
 
 #else /* X86_32 */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index 86afd7473457..d7ed33ee94e9 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
 
 static inline struct pt_regs *get_irq_regs(void)
 {
-	return x86_read_percpu(irq_regs);
+	return percpu_read(irq_regs);
 }
 
 static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
 	struct pt_regs *old_regs;
 
 	old_regs = get_irq_regs();
-	x86_write_percpu(irq_regs, new_regs);
+	percpu_write(irq_regs, new_regs);
 
 	return old_regs;
 }
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 7e98ce1d2c0e..08b53454f831 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,8 +4,8 @@
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 #ifdef CONFIG_SMP
-	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
 #endif
 }
 
@@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
 #ifdef CONFIG_SMP
-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
-		x86_write_percpu(cpu_tlbstate.active_mm, next);
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
 		cpu_set(cpu, next->cpu_vm_mask);
 
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
 	}
 #ifdef CONFIG_SMP
 	else {
-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
-		BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
 
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index e3d3a081d798..47f274fe6953 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -45,11 +45,11 @@ extern void pda_init(int);
 
 #define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
 
-#define read_pda(field)		x86_read_percpu(__pda.field)
-#define write_pda(field, val)	x86_write_percpu(__pda.field, val)
-#define add_pda(field, val)	x86_add_percpu(__pda.field, val)
-#define sub_pda(field, val)	x86_sub_percpu(__pda.field, val)
-#define or_pda(field, val)	x86_or_percpu(__pda.field, val)
+#define read_pda(field)		percpu_read(__pda.field)
+#define write_pda(field, val)	percpu_write(__pda.field, val)
+#define add_pda(field, val)	percpu_add(__pda.field, val)
+#define sub_pda(field, val)	percpu_sub(__pda.field, val)
+#define or_pda(field, val)	percpu_or(__pda.field, val)
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define test_and_clear_bit_pda(bit, field)				\
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 328b31a429d7..03aa4b00a1c3 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -40,16 +40,11 @@
 
 #ifdef CONFIG_SMP
 #define __percpu_seg_str	"%%"__stringify(__percpu_seg)":"
-#define __my_cpu_offset		x86_read_percpu(this_cpu_off)
+#define __my_cpu_offset		percpu_read(this_cpu_off)
 #else
 #define __percpu_seg_str
 #endif
 
-#include <asm-generic/percpu.h>
-
-/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU(unsigned long, this_cpu_off);
-
 /* For arch-specific code, we can use direct single-insn ops (they
  * don't give an lvalue though). */
 extern void __bad_percpu_size(void);
@@ -115,11 +110,13 @@ do {							\
 	ret__;						\
 })
 
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
+#define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
+#define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
+#define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
+#define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
+#define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
+#define percpu_xor(var, val)	percpu_to_op("xor", per_cpu__##var, val)
 
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
@@ -131,6 +128,11 @@ do {							\
 	old__;								\
 })
 
+#include <asm-generic/percpu.h>
+
+/* We can use this directly for local CPU (faster). */
+DECLARE_PER_CPU(unsigned long, this_cpu_off);
+
 #ifdef CONFIG_X86_64
 extern void load_pda_offset(int cpu);
 #else
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 127415402ea1..c7bbbbe65d3f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata;
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+#define raw_smp_processor_id() (percpu_read(cpu_number))
 extern int safe_smp_processor_id(void);
 
 #elif defined(CONFIG_X86_64_SMP)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..77d546817d94 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		loadsegment(gs, next->gs);
 
-	x86_write_percpu(current_task, next_p);
+	percpu_write(current_task, next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ec53818f4e38..e65449d0f7d9 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  */
 void leave_mm(int cpu)
 {
-	BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
-	cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
+	BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
+	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 		 * BUG();
 		 */
 
-	if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
-		if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
+	if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 			if (flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
@@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info)
 	unsigned long cpu = smp_processor_id();
 
 	__flush_tlb_all();
-	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(cpu);
 }
 
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 1a48368acb09..96f15b09a4c5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
 	     VOYAGER_SUS_IN_CONTROL_PORT);
 
 	current_thread_info()->cpu = boot_cpu_id;
-	x86_write_percpu(cpu_number, boot_cpu_id);
+	percpu_write(cpu_number, boot_cpu_id);
 }
 
 /*
@@ -1782,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
 void __init smp_setup_processor_id(void)
 {
 	current_thread_info()->cpu = hard_smp_processor_id();
-	x86_write_percpu(cpu_number, hard_smp_processor_id());
+	percpu_write(cpu_number, hard_smp_processor_id());
 }
 
 static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 312414ef9365..75b94139e1f2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
 
 static void xen_write_cr2(unsigned long cr2)
 {
-	x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
+	percpu_read(xen_vcpu)->arch.cr2 = cr2;
 }
 
 static unsigned long xen_read_cr2(void)
 {
-	return x86_read_percpu(xen_vcpu)->arch.cr2;
+	return percpu_read(xen_vcpu)->arch.cr2;
 }
 
 static unsigned long xen_read_cr2_direct(void)
 {
-	return x86_read_percpu(xen_vcpu_info.arch.cr2);
+	return percpu_read(xen_vcpu_info.arch.cr2);
 }
 
 static void xen_write_cr4(unsigned long cr4)
@@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
 
 static unsigned long xen_read_cr3(void)
 {
-	return x86_read_percpu(xen_cr3);
+	return percpu_read(xen_cr3);
 }
 
 static void set_current_cr3(void *v)
 {
-	x86_write_percpu(xen_current_cr3, (unsigned long)v);
+	percpu_write(xen_current_cr3, (unsigned long)v);
 }
 
 static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
 	if (kernel) {
-		x86_write_percpu(xen_cr3, cr3);
+		percpu_write(xen_cr3, cr3);
 
 		/* Update xen_current_cr3 once the batch has actually
 		   been submitted. */
@@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
 
 	/* Update while interrupts are disabled, so its atomic with
 	   respect to ipis */
-	x86_write_percpu(xen_cr3, cr3);
+	percpu_write(xen_cr3, cr3);
 
 	__xen_write_cr3(true, cr3);
 
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
 	struct vcpu_info *vcpu;
 	unsigned long flags;
 
-	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu = percpu_read(xen_vcpu);
 
 	/* flag has opposite sense of mask */
 	flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
 	   make sure we're don't switch CPUs between getting the vcpu
 	   pointer and updating the mask. */
 	preempt_disable();
-	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu = percpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = flags;
 	preempt_enable_no_resched();
 
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
 	   make sure we're don't switch CPUs between getting the vcpu
 	   pointer and updating the mask. */
 	preempt_disable();
-	x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
+	percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
 	preempt_enable_no_resched();
 }
 
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
 	   the caller is confused and is trying to re-enable interrupts
 	   on an indeterminate processor. */
 
-	vcpu = x86_read_percpu(xen_vcpu);
+	vcpu = percpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = 0;
 
 	/* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..7bc7852cc5c4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1074,7 +1074,7 @@ static void drop_other_mm_ref(void *info)
 
 	/* If this cpu still has a stale cr3 reference, then make sure
 	   it has been flushed. */
-	if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
+	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
 		load_cr3(swapper_pg_dir);
 		arch_flush_lazy_cpu_mode();
 	}
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
 		xen_mc_flush();
 
 	/* restore flags saved in xen_mc_batch */
-	local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
+	local_irq_restore(percpu_read(xen_mc_irq_flags));
 }
 
 /* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 83fa4236477d..3bfd6dd0b47c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -78,7 +78,7 @@ static __cpuinit void cpu_bringup(void)
 	xen_setup_cpu_clockevents();
 
 	cpu_set(cpu, cpu_online_map);
-	x86_write_percpu(cpu_state, CPU_ONLINE);
+	percpu_write(cpu_state, CPU_ONLINE);
 	wmb();
 
 	/* We can take interrupts now: we're officially "up". */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b0e63c672ebd..00f45ff081a6 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void);
 #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
 					__typeof__(type) per_cpu_var(name)
 
+/*
+ * Optional methods for optimized non-lvalue per-cpu variable access.
+ *
+ * @var can be a percpu variable or a field of it and its size should
+ * equal char, int or long.  percpu_read() evaluates to a lvalue and
+ * all others to void.
+ *
+ * These operations are guaranteed to be atomic w.r.t. preemption.
+ * The generic versions use plain get/put_cpu_var().  Archs are
+ * encouraged to implement single-instruction alternatives which don't
+ * require preemption protection.
+ */
+#ifndef percpu_read
+# define percpu_read(var)						\
+  ({									\
+	typeof(per_cpu_var(var)) __tmp_var__;				\
+	__tmp_var__ = get_cpu_var(var);					\
+	put_cpu_var(var);						\
+	__tmp_var__;							\
+  })
+#endif
+
+#define __percpu_generic_to_op(var, val, op)				\
+do {									\
+	get_cpu_var(var) op val;					\
+	put_cpu_var(var);						\
+} while (0)
+
+#ifndef percpu_write
+# define percpu_write(var, val)		__percpu_generic_to_op(var, (val), =)
+#endif
+
+#ifndef percpu_add
+# define percpu_add(var, val)		__percpu_generic_to_op(var, (val), +=)
+#endif
+
+#ifndef percpu_sub
+# define percpu_sub(var, val)		__percpu_generic_to_op(var, (val), -=)
+#endif
+
+#ifndef percpu_and
+# define percpu_and(var, val)		__percpu_generic_to_op(var, (val), &=)
+#endif
+
+#ifndef percpu_or
+# define percpu_or(var, val)		__percpu_generic_to_op(var, (val), |=)
+#endif
+
+#ifndef percpu_xor
+# define percpu_xor(var, val)		__percpu_generic_to_op(var, (val), ^=)
+#endif
+
 #endif /* _ASM_GENERIC_PERCPU_H_ */
-- 
cgit v1.2.3


From 1b437c8c73a36daa471dd54a63c426d72af5723d Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:57 +0900
Subject: x86-64: Move irq stats from PDA to per-cpu and consolidate with
 32-bit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++-----
 arch/x86/include/asm/pda.h        | 10 ----------
 arch/x86/kernel/irq.c             |  6 +-----
 arch/x86/kernel/irq_64.c          |  3 +++
 arch/x86/kernel/nmi.c             | 10 +---------
 arch/x86/xen/smp.c                | 18 +++---------------
 6 files changed, 27 insertions(+), 44 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index b5a6b5d56704..a65bab20f6ce 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -3,22 +3,36 @@
 
 #include <linux/threads.h>
 #include <linux/irq.h>
-#include <asm/pda.h>
 #include <asm/apic.h>
 
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int __nmi_count;	/* arch dependent */
+	unsigned int apic_timer_irqs;	/* arch dependent */
+	unsigned int irq0_irqs;
+	unsigned int irq_resched_count;
+	unsigned int irq_call_count;
+	unsigned int irq_tlb_count;
+	unsigned int irq_thermal_count;
+	unsigned int irq_spurious_count;
+	unsigned int irq_threshold_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
 /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
 #define MAX_HARDIRQS_PER_CPU NR_VECTORS
 
 #define __ARCH_IRQ_STAT 1
 
-#define inc_irq_stat(member)	add_pda(member, 1)
+#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
 
-#define local_softirq_pending() read_pda(__softirq_pending)
+#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
 
 #define __ARCH_SET_SOFTIRQ_PENDING 1
 
-#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
-#define or_softirq_pending(x)  or_pda(__softirq_pending, (x))
+#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x)  percpu_or(irq_stat.__softirq_pending, (x))
 
 extern void ack_bad_irq(unsigned int irq);
 
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 47f274fe6953..69a40757e217 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -25,19 +25,9 @@ struct x8664_pda {
 	char *irqstackptr;
 	short nodenumber;		/* number of current node (32k max) */
 	short in_bootmem;		/* pda lives in bootmem */
-	unsigned int __softirq_pending;
-	unsigned int __nmi_count;	/* number of NMI on this CPUs */
 	short mmu_state;
 	short isidle;
 	struct mm_struct *active_mm;
-	unsigned apic_timer_irqs;
-	unsigned irq0_irqs;
-	unsigned irq_resched_count;
-	unsigned irq_call_count;
-	unsigned irq_tlb_count;
-	unsigned irq_thermal_count;
-	unsigned irq_threshold_count;
-	unsigned irq_spurious_count;
 } ____cacheline_aligned_in_smp;
 
 DECLARE_PER_CPU(struct x8664_pda, __pda);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2df7f87..8b30d0c2512c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-# define irq_stats(x)		(&per_cpu(irq_stat, x))
-#else
-# define irq_stats(x)		cpu_pda(x)
-#endif
+#define irq_stats(x)		(&per_cpu(irq_stat, x))
 /*
  * /proc/interrupts printing:
  */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0b21cb1ea11f..1db05247b47f 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -19,6 +19,9 @@
 #include <asm/io_apic.h>
 #include <asm/idle.h>
 
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
+
 /*
  * Probabilistic stack overflow check:
  *
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7f..23b6d9e6e4f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
 
 static inline unsigned int get_nmi_count(int cpu)
 {
-#ifdef CONFIG_X86_64
-	return cpu_pda(cpu)->__nmi_count;
-#else
-	return nmi_count(cpu);
-#endif
+	return per_cpu(irq_stat, cpu).__nmi_count;
 }
 
 static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
  */
 static inline unsigned int get_timer_irqs(int cpu)
 {
-#ifdef CONFIG_X86_64
-	return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
-#else
 	return per_cpu(irq_stat, cpu).apic_timer_irqs +
 		per_cpu(irq_stat, cpu).irq0_irqs;
-#endif
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 3bfd6dd0b47c..9ff3b0999cfb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_resched_count++;
-#else
-	add_pda(irq_resched_count, 1);
-#endif
+	inc_irq_stat(irq_resched_count);
 
 	return IRQ_HANDLED;
 }
@@ -435,11 +431,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
 {
 	irq_enter();
 	generic_smp_call_function_interrupt();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_call_count++;
-#else
-	add_pda(irq_call_count, 1);
-#endif
+	inc_irq_stat(irq_call_count);
 	irq_exit();
 
 	return IRQ_HANDLED;
@@ -449,11 +441,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 {
 	irq_enter();
 	generic_smp_call_function_single_interrupt();
-#ifdef CONFIG_X86_32
-	__get_cpu_var(irq_stat).irq_call_count++;
-#else
-	add_pda(irq_call_count, 1);
-#endif
+	inc_irq_stat(irq_call_count);
 	irq_exit();
 
 	return IRQ_HANDLED;
-- 
cgit v1.2.3


From 9eb912d1aa6b8106e06a73ea6702ec3dab0d6a1a Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:57 +0900
Subject: x86-64: Move TLB state from PDA to per-cpu and consolidate with
 32-bit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/mmu_context_64.h | 16 +++++++---------
 arch/x86/include/asm/pda.h            |  2 --
 arch/x86/include/asm/tlbflush.h       |  7 ++-----
 arch/x86/kernel/cpu/common.c          |  2 --
 arch/x86/kernel/tlb_32.c              | 12 ++----------
 arch/x86/kernel/tlb_64.c              | 13 ++++++++-----
 arch/x86/xen/mmu.c                    |  6 +-----
 7 files changed, 20 insertions(+), 38 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
index 677d36e9540a..c4572505ab3e 100644
--- a/arch/x86/include/asm/mmu_context_64.h
+++ b/arch/x86/include/asm/mmu_context_64.h
@@ -1,13 +1,11 @@
 #ifndef _ASM_X86_MMU_CONTEXT_64_H
 #define _ASM_X86_MMU_CONTEXT_64_H
 
-#include <asm/pda.h>
-
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
 #ifdef CONFIG_SMP
-	if (read_pda(mmu_state) == TLBSTATE_OK)
-		write_pda(mmu_state, TLBSTATE_LAZY);
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
 #endif
 }
 
@@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		/* stop flush ipis for the previous mm */
 		cpu_clear(cpu, prev->cpu_vm_mask);
 #ifdef CONFIG_SMP
-		write_pda(mmu_state, TLBSTATE_OK);
-		write_pda(active_mm, next);
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
 		cpu_set(cpu, next->cpu_vm_mask);
 		load_cr3(next->pgd);
@@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	}
 #ifdef CONFIG_SMP
 	else {
-		write_pda(mmu_state, TLBSTATE_OK);
-		if (read_pda(active_mm) != next)
-			BUG();
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+
 		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
 			/* We were in lazy tlb mode and leave_mm disabled
 			 * tlb flush IPI delivery. We must reload CR3
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 69a40757e217..8ee835ed10e1 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -25,9 +25,7 @@ struct x8664_pda {
 	char *irqstackptr;
 	short nodenumber;		/* number of current node (32k max) */
 	short in_bootmem;		/* pda lives in bootmem */
-	short mmu_state;
 	short isidle;
-	struct mm_struct *active_mm;
 } ____cacheline_aligned_in_smp;
 
 DECLARE_PER_CPU(struct x8664_pda, __pda);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 17feaa9c7e76..d3539f998f88 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
 
-#ifdef CONFIG_X86_32
 struct tlb_state {
 	struct mm_struct *active_mm;
 	int state;
-	char __cacheline_padding[L1_CACHE_BYTES-8];
 };
 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
 
-void reset_lazy_tlbstate(void);
-#else
 static inline void reset_lazy_tlbstate(void)
 {
+	percpu_write(cpu_tlbstate.state, 0);
+	percpu_write(cpu_tlbstate.active_mm, &init_mm);
 }
-#endif
 
 #endif	/* SMP */
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c49498d40830..3d0cc6f17116 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -897,8 +897,6 @@ void __cpuinit pda_init(int cpu)
 	pda->irqcount = -1;
 	pda->kernelstack = (unsigned long)stack_thread_info() -
 				 PDA_STACKOFFSET + THREAD_SIZE;
-	pda->active_mm = &init_mm;
-	pda->mmu_state = 0;
 
 	if (cpu == 0) {
 		/* others are initialized in smpboot.c */
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index e65449d0f7d9..abf0808d6fc4 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -4,8 +4,8 @@
 
 #include <asm/tlbflush.h>
 
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
-			____cacheline_aligned = { &init_mm, 0, };
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+			= { &init_mm, 0, };
 
 /* must come after the send_IPI functions above for inlining */
 #include <mach_ipi.h>
@@ -231,14 +231,6 @@ void flush_tlb_all(void)
 	on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
-void reset_lazy_tlbstate(void)
-{
-	int cpu = raw_smp_processor_id();
-
-	per_cpu(cpu_tlbstate, cpu).state = 0;
-	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
-}
-
 static int init_flush_cpumask(void)
 {
 	alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 7f4141d3b661..e64a32c48825 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -18,6 +18,9 @@
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_bau.h>
 
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+			= { &init_mm, 0, };
+
 #include <mach_ipi.h>
 /*
  *	Smarter SMP flushing macros.
@@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
  */
 void leave_mm(int cpu)
 {
-	if (read_pda(mmu_state) == TLBSTATE_OK)
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
 		BUG();
-	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
 	load_cr3(swapper_pg_dir);
 }
 EXPORT_SYMBOL_GPL(leave_mm);
@@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 		 * BUG();
 		 */
 
-	if (f->flush_mm == read_pda(active_mm)) {
-		if (read_pda(mmu_state) == TLBSTATE_OK) {
+	if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
+		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 			if (f->flush_va == TLB_FLUSH_ALL)
 				local_flush_tlb();
 			else
@@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info)
 	unsigned long cpu = smp_processor_id();
 
 	__flush_tlb_all();
-	if (read_pda(mmu_state) == TLBSTATE_LAZY)
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 		leave_mm(cpu);
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 7bc7852cc5c4..98cb9869eb24 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info)
 	struct mm_struct *mm = info;
 	struct mm_struct *active_mm;
 
-#ifdef CONFIG_X86_64
-	active_mm = read_pda(active_mm);
-#else
-	active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
-#endif
+	active_mm = percpu_read(cpu_tlbstate.active_mm);
 
 	if (active_mm == mm)
 		leave_mm(smp_processor_id());
-- 
cgit v1.2.3


From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:58 +0900
Subject: x86-64: Convert irqstacks to per-cpu

Move the irqstackptr variable from the PDA to per-cpu.  Make the
stacks themselves per-cpu, removing some specific allocation code.
Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot
adjustments.

tj: * sprinkle some underbars around.

    * irq_stack_ptr is not used till traps_init(), no reason to
      initialize it early.  On SMP, just leaving it NULL till proper
      initialization in setup_per_cpu_areas() works.  Dropped
      is_boot_cpu and early irq_stack_ptr initialization.

    * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack)
      instead of (char, irq_stack[IRQ_STACK_SIZE]).

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/page_64.h   |  4 ++--
 arch/x86/include/asm/pda.h       |  1 -
 arch/x86/include/asm/processor.h |  3 +++
 arch/x86/kernel/asm-offsets_64.c |  1 -
 arch/x86/kernel/cpu/common.c     | 19 +++++++------------
 arch/x86/kernel/dumpstack_64.c   | 33 +++++++++++++++++----------------
 arch/x86/kernel/entry_64.S       |  6 +++---
 arch/x86/kernel/setup_percpu.c   |  4 +++-
 8 files changed, 35 insertions(+), 36 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29f44f0..e27fdbe5f9e4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -13,8 +13,8 @@
 #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
 #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
 
-#define IRQSTACK_ORDER 2
-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+#define IRQ_STACK_ORDER 2
+#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
 
 #define STACKFAULT_STACK 1
 #define DOUBLEFAULT_STACK 2
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 8ee835ed10e1..09965f7a2165 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -22,7 +22,6 @@ struct x8664_pda {
 					/* gcc-ABI: this canary MUST be at
 					   offset 40!!! */
 #endif
-	char *irqstackptr;
 	short nodenumber;		/* number of current node (32k max) */
 	short in_bootmem;		/* pda lives in bootmem */
 	short isidle;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd8855f2e..f511246fa6cd 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -378,6 +378,9 @@ union thread_xstate {
 
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
+
+DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+DECLARE_PER_CPU(char *, irq_stack_ptr);
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f4cc81bfbf89..5b821fbdaf7b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -54,7 +54,6 @@ int main(void)
 	ENTRY(pcurrent); 
 	ENTRY(irqcount);
 	ENTRY(cpunumber);
-	ENTRY(irqstackptr);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
 #undef ENTRY
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3d0cc6f17116..496f0a01919b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -881,7 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid);
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(char *, irq_stack_ptr);	/* will be set during per cpu init */
+#else
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+#endif
 
 void __cpuinit pda_init(int cpu)
 {
@@ -901,18 +907,7 @@ void __cpuinit pda_init(int cpu)
 	if (cpu == 0) {
 		/* others are initialized in smpboot.c */
 		pda->pcurrent = &init_task;
-		pda->irqstackptr = boot_cpu_stack;
-		pda->irqstackptr += IRQSTACKSIZE - 64;
 	} else {
-		if (!pda->irqstackptr) {
-			pda->irqstackptr = (char *)
-				__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
-			if (!pda->irqstackptr)
-				panic("cannot allocate irqstack for cpu %d",
-				      cpu);
-			pda->irqstackptr += IRQSTACKSIZE - 64;
-		}
-
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
 			pda->nodenumber = cpu_to_node(cpu);
 	}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..28e26a4315df 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+	unsigned long *irq_stack_end =
+		(unsigned long *)per_cpu(irq_stack_ptr, cpu);
 	unsigned used = 0;
 	struct thread_info *tinfo;
 	int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			stack = (unsigned long *) estack_end[-2];
 			continue;
 		}
-		if (irqstack_end) {
-			unsigned long *irqstack;
-			irqstack = irqstack_end -
-				(IRQSTACKSIZE - 64) / sizeof(*irqstack);
+		if (irq_stack_end) {
+			unsigned long *irq_stack;
+			irq_stack = irq_stack_end -
+				(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
 
-			if (stack >= irqstack && stack < irqstack_end) {
+			if (stack >= irq_stack && stack < irq_stack_end) {
 				if (ops->stack(data, "IRQ") < 0)
 					break;
 				bp = print_context_stack(tinfo, stack, bp,
-					ops, data, irqstack_end, &graph);
+					ops, data, irq_stack_end, &graph);
 				/*
 				 * We link to the next stack (which would be
 				 * the process stack normally) the last
 				 * pointer (index -1 to end) in the IRQ stack:
 				 */
-				stack = (unsigned long *) (irqstack_end[-1]);
-				irqstack_end = NULL;
+				stack = (unsigned long *) (irq_stack_end[-1]);
+				irq_stack_end = NULL;
 				ops->stack(data, "EOI");
 				continue;
 			}
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	unsigned long *stack;
 	int i;
 	const int cpu = smp_processor_id();
-	unsigned long *irqstack_end =
-		(unsigned long *) (cpu_pda(cpu)->irqstackptr);
-	unsigned long *irqstack =
-		(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+	unsigned long *irq_stack_end =
+		(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+	unsigned long *irq_stack =
+		(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
 
 	/*
 	 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 	stack = sp;
 	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (stack >= irqstack && stack <= irqstack_end) {
-			if (stack == irqstack_end) {
-				stack = (unsigned long *) (irqstack_end[-1]);
+		if (stack >= irq_stack && stack <= irq_stack_end) {
+			if (stack == irq_stack_end) {
+				stack = (unsigned long *) (irq_stack_end[-1]);
 				printk(" <EOI> ");
 			}
 		} else {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4833f3a19650..d22677a66438 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -345,7 +345,7 @@ ENTRY(save_args)
 1:	incl %gs:pda_irqcount
 	jne 2f
 	popq_cfi %rax			/* move return address... */
-	mov %gs:pda_irqstackptr,%rsp
+	mov PER_CPU_VAR(irq_stack_ptr),%rsp
 	EMPTY_FRAME 0
 	pushq_cfi %rax			/* ... to the new stack */
 	/*
@@ -1261,7 +1261,7 @@ ENTRY(call_softirq)
 	mov  %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
 	incl %gs:pda_irqcount
-	cmove %gs:pda_irqstackptr,%rsp
+	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
 	push  %rbp			# backlink for old unwinder
 	call __do_softirq
 	leaveq
@@ -1300,7 +1300,7 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 11:	incl %gs:pda_irqcount
 	movq %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
-	cmovzq %gs:pda_irqstackptr,%rsp
+	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
 	pushq %rbp			# backlink for old unwinder
 	call xen_evtchn_do_upcall
 	popq %rsp
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b5c35af2011d..8b53ef83c611 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -192,7 +192,10 @@ void __init setup_per_cpu_areas(void)
 
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
+		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 #ifdef CONFIG_X86_64
+		per_cpu(irq_stack_ptr, cpu) =
+			(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
 		/*
 		 * CPU0 modified pda in the init data area, reload pda
 		 * offset for CPU0 and clear the area for others.
@@ -202,7 +205,6 @@ void __init setup_per_cpu_areas(void)
 		else
 			memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
 #endif
-		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
-- 
cgit v1.2.3


From ea9279066de44053d0c20ea855bc9f4706652d84 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:58 +0900
Subject: x86-64: Move cpu number from PDA to per-cpu and consolidate with
 32-bit.

tj: moved cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA
    for voyager.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h       |  2 +-
 arch/x86/include/asm/smp.h       |  4 +---
 arch/x86/kernel/asm-offsets_64.c |  1 -
 arch/x86/kernel/cpu/common.c     |  1 -
 arch/x86/kernel/process_32.c     |  3 ---
 arch/x86/kernel/setup_percpu.c   | 10 ++++++++++
 arch/x86/kernel/smpcommon.c      |  2 --
 7 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 09965f7a2165..668d5a5b6f70 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -16,7 +16,7 @@ struct x8664_pda {
 	unsigned long kernelstack;	/* 16 top of kernel stack for current */
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
-	unsigned int cpunumber;		/* 36 Logical CPU number */
+	unsigned int unused6;		/* 36 was cpunumber */
 #ifdef CONFIG_CC_STACKPROTECTOR
 	unsigned long stack_canary;	/* 40 stack canary value */
 					/* gcc-ABI: this canary MUST be at
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c7bbbbe65d3f..68636e767a91 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -25,9 +25,7 @@ extern unsigned int num_processors;
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 DECLARE_PER_CPU(u16, cpu_llc_id);
-#ifdef CONFIG_X86_32
 DECLARE_PER_CPU(int, cpu_number);
-#endif
 
 static inline struct cpumask *cpu_sibling_mask(int cpu)
 {
@@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata;
 extern int safe_smp_processor_id(void);
 
 #elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id()	read_pda(cpunumber)
+#define raw_smp_processor_id() (percpu_read(cpu_number))
 
 #define stack_smp_processor_id()					\
 ({								\
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 5b821fbdaf7b..cae6697c0991 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -53,7 +53,6 @@ int main(void)
 	ENTRY(oldrsp); 
 	ENTRY(pcurrent); 
 	ENTRY(irqcount);
-	ENTRY(cpunumber);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
 #undef ENTRY
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b6d7eec0be77..4221e920886d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -899,7 +899,6 @@ void __cpuinit pda_init(int cpu)
 
 	load_pda_offset(cpu);
 
-	pda->cpunumber = cpu;
 	pda->irqcount = -1;
 	pda->kernelstack = (unsigned long)stack_thread_info() -
 				 PDA_STACKOFFSET + THREAD_SIZE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77d546817d94..2c00a57ccb90 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
-DEFINE_PER_CPU(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
 /*
  * Return saved PC of a blocked thread.
  */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 8b53ef83c611..258497f93f4d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -22,6 +22,15 @@
 # define DBG(x...)
 #endif
 
+/*
+ * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
+ * voyager wants cpu_number too.
+ */
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+#endif
+
 #ifdef CONFIG_X86_LOCAL_APIC
 unsigned int num_processors;
 unsigned disabled_cpus __cpuinitdata;
@@ -193,6 +202,7 @@ void __init setup_per_cpu_areas(void)
 		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
+		per_cpu(cpu_number, cpu) = cpu;
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
 			(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 7e157810062f..add36b4e37c9 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu)
 
 	write_gdt_entry(get_cpu_gdt_table(cpu),
 			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
-
-	per_cpu(cpu_number, cpu) = cpu;
 }
 #endif
-- 
cgit v1.2.3


From c6f5e0acd5d12ee23f701f15889872e67b47caa6 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:58 +0900
Subject: x86-64: Move current task from PDA to per-cpu and consolidate with
 32-bit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/current.h   | 24 +++---------------------
 arch/x86/include/asm/pda.h       |  4 ++--
 arch/x86/include/asm/system.h    |  4 ++--
 arch/x86/kernel/asm-offsets_64.c |  1 -
 arch/x86/kernel/cpu/common.c     |  5 +----
 arch/x86/kernel/dumpstack_64.c   |  2 +-
 arch/x86/kernel/process_64.c     |  5 ++++-
 arch/x86/kernel/smpboot.c        |  3 +--
 arch/x86/xen/smp.c               |  3 +--
 9 files changed, 15 insertions(+), 36 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0728480f5c56..c68c361697e1 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
 #ifndef _ASM_X86_CURRENT_H
 #define _ASM_X86_CURRENT_H
 
-#ifdef CONFIG_X86_32
 #include <linux/compiler.h>
 #include <asm/percpu.h>
 
+#ifndef __ASSEMBLY__
 struct task_struct;
 
 DECLARE_PER_CPU(struct task_struct *, current_task);
-static __always_inline struct task_struct *get_current(void)
-{
-	return percpu_read(current_task);
-}
-
-#else /* X86_32 */
-
-#ifndef __ASSEMBLY__
-#include <asm/pda.h>
-
-struct task_struct;
 
 static __always_inline struct task_struct *get_current(void)
 {
-	return read_pda(pcurrent);
+	return percpu_read(current_task);
 }
 
-#else /* __ASSEMBLY__ */
-
-#include <asm/asm-offsets.h>
-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
+#define current get_current()
 
 #endif /* __ASSEMBLY__ */
 
-#endif /* X86_32 */
-
-#define current get_current()
-
 #endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 668d5a5b6f70..7209302d9227 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -11,8 +11,8 @@
 
 /* Per processor datastructure. %gs points to it while the kernel runs */
 struct x8664_pda {
-	struct task_struct *pcurrent;	/* 0  Current process */
-	unsigned long dummy;
+	unsigned long unused1;
+	unsigned long unused2;
 	unsigned long kernelstack;	/* 16 top of kernel stack for current */
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea33a1a..4399aac680e9 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -94,7 +94,7 @@ do {									\
 	     "call __switch_to\n\t"					  \
 	     ".globl thread_return\n"					  \
 	     "thread_return:\n\t"					  \
-	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
+	     "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t"	  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
 	     "movq %%rax,%%rdi\n\t" 					  \
@@ -106,7 +106,7 @@ do {									\
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [tif_fork] "i" (TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
+	       [current_task] "m" (per_cpu_var(current_task))		  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index cae6697c0991..4f7a210e1e58 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -51,7 +51,6 @@ int main(void)
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
 	ENTRY(kernelstack); 
 	ENTRY(oldrsp); 
-	ENTRY(pcurrent); 
 	ENTRY(irqcount);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4221e920886d..b50e38d16888 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -903,10 +903,7 @@ void __cpuinit pda_init(int cpu)
 	pda->kernelstack = (unsigned long)stack_thread_info() -
 				 PDA_STACKOFFSET + THREAD_SIZE;
 
-	if (cpu == 0) {
-		/* others are initialized in smpboot.c */
-		pda->pcurrent = &init_task;
-	} else {
+	if (cpu != 0) {
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
 			pda->nodenumber = cpu_to_node(cpu);
 	}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 28e26a4315df..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -242,7 +242,7 @@ void show_registers(struct pt_regs *regs)
 	int i;
 	unsigned long sp;
 	const int cpu = smp_processor_id();
-	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+	struct task_struct *cur = current;
 
 	sp = regs->sp;
 	printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..e00c31a4b3c0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -57,6 +57,9 @@
 
 asmlinkage extern void ret_from_fork(void);
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -615,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	prev->usersp = read_pda(oldrsp);
 	write_pda(oldrsp, next->usersp);
-	write_pda(pcurrent, next_p);
+	percpu_write(current_task, next_p);
 
 	write_pda(kernelstack,
 		  (unsigned long)task_stack_page(next_p) +
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2f0e0f1090f6..5854be0fb804 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -790,13 +790,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 
 	set_idle_for_cpu(cpu, c_idle.idle);
 do_rest:
-#ifdef CONFIG_X86_32
 	per_cpu(current_task, cpu) = c_idle.idle;
+#ifdef CONFIG_X86_32
 	init_gdt(cpu);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
 #else
-	cpu_pda(cpu)->pcurrent = c_idle.idle;
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
 #endif
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 9ff3b0999cfb..72c2eb9b64cd 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -279,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	struct task_struct *idle = idle_task(cpu);
 	int rc;
 
+	per_cpu(current_task, cpu) = idle;
 #ifdef CONFIG_X86_32
 	init_gdt(cpu);
-	per_cpu(current_task, cpu) = idle;
 	irq_ctx_init(cpu);
 #else
-	cpu_pda(cpu)->pcurrent = idle;
 	clear_tsk_thread_flag(idle, TIF_FORK);
 #endif
 	xen_setup_timer(cpu);
-- 
cgit v1.2.3


From 9af45651f1f7c89942e016a1a00a7ebddfa727f8 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:58 +0900
Subject: x86-64: Move kernelstack from PDA to per-cpu.

Also clean up PER_CPU_VAR usage in xen-asm_64.S

tj: * remove now unused stack_thread_info()
    * s/kernelstack/kernel_stack/
    * added FIXME comment in xen-asm_64.S

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/ia32/ia32entry.S          |  8 ++++----
 arch/x86/include/asm/pda.h         |  4 +---
 arch/x86/include/asm/thread_info.h | 20 ++++++++------------
 arch/x86/kernel/asm-offsets_64.c   |  1 -
 arch/x86/kernel/cpu/common.c       |  6 ++++--
 arch/x86/kernel/entry_64.S         |  4 ++--
 arch/x86/kernel/process_64.c       |  4 ++--
 arch/x86/kernel/smpboot.c          |  3 +++
 arch/x86/xen/xen-asm_64.S          | 23 +++++++++++------------
 9 files changed, 35 insertions(+), 38 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b61892..9c79b2477008 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
 	CFI_DEF_CFA	rsp,0
 	CFI_REGISTER	rsp,rbp
 	SWAPGS_UNSAFE_STACK
-	movq	%gs:pda_kernelstack, %rsp
-	addq	$(PDA_STACKOFFSET),%rsp	
+	movq	PER_CPU_VAR(kernel_stack), %rsp
+	addq	$(KERNEL_STACK_OFFSET),%rsp
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
+	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
-	movq	%gs:pda_kernelstack,%rsp
+	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 7209302d9227..4d28ffba6e1b 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -13,7 +13,7 @@
 struct x8664_pda {
 	unsigned long unused1;
 	unsigned long unused2;
-	unsigned long kernelstack;	/* 16 top of kernel stack for current */
+	unsigned long unused3;
 	unsigned long oldrsp;		/* 24 user rsp for system call */
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
 	unsigned int unused6;		/* 36 was cpunumber */
@@ -44,6 +44,4 @@ extern void pda_init(int);
 
 #endif
 
-#define PDA_STACKOFFSET (5*8)
-
 #endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 98789647baa9..b46f8ca007b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
 
 #else /* X86_32 */
 
-#include <asm/pda.h>
+#include <asm/percpu.h>
+#define KERNEL_STACK_OFFSET (5*8)
 
 /*
  * macros/functions for gaining access to the thread information structure
  * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
-static inline struct thread_info *current_thread_info(void)
-{
-	struct thread_info *ti;
-	ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
-	return ti;
-}
+DECLARE_PER_CPU(unsigned long, kernel_stack);
 
-/* do not use in interrupt context */
-static inline struct thread_info *stack_thread_info(void)
+static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
-	asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
+	ti = (void *)(percpu_read(kernel_stack) +
+		      KERNEL_STACK_OFFSET - THREAD_SIZE);
 	return ti;
 }
 
@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
 
 /* how to get the thread information struct from ASM */
 #define GET_THREAD_INFO(reg) \
-	movq %gs:pda_kernelstack,reg ; \
-	subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
+	movq PER_CPU_VAR(kernel_stack),reg ; \
+	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 
 #endif
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4f7a210e1e58..cafff5f4a031 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,7 +49,6 @@ int main(void)
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-	ENTRY(kernelstack); 
 	ENTRY(oldrsp); 
 	ENTRY(irqcount);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b50e38d16888..06b6290088f4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
 	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
 #endif
 
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
+	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
+
 void __cpuinit pda_init(int cpu)
 {
 	struct x8664_pda *pda = cpu_pda(cpu);
@@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu)
 	load_pda_offset(cpu);
 
 	pda->irqcount = -1;
-	pda->kernelstack = (unsigned long)stack_thread_info() -
-				 PDA_STACKOFFSET + THREAD_SIZE;
 
 	if (cpu != 0) {
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d22677a66438..0dd45859a7a8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -468,7 +468,7 @@ END(ret_from_fork)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
+	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	SWAPGS_UNSAFE_STACK
@@ -480,7 +480,7 @@ ENTRY(system_call)
 ENTRY(system_call_after_swapgs)
 
 	movq	%rsp,%gs:pda_oldrsp
-	movq	%gs:pda_kernelstack,%rsp
+	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
 	 * and short:
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e00c31a4b3c0..6c5f57602108 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	write_pda(oldrsp, next->usersp);
 	percpu_write(current_task, next_p);
 
-	write_pda(kernelstack,
+	percpu_write(kernel_stack,
 		  (unsigned long)task_stack_page(next_p) +
-		  THREAD_SIZE - PDA_STACKOFFSET);
+		  THREAD_SIZE - KERNEL_STACK_OFFSET);
 #ifdef CONFIG_CC_STACKPROTECTOR
 	write_pda(stack_canary, next_p->stack_canary);
 	/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5854be0fb804..869b98840fd0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -798,6 +798,9 @@ do_rest:
 #else
 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
+	per_cpu(kernel_stack, cpu) =
+		(unsigned long)task_stack_page(c_idle.idle) -
+		KERNEL_STACK_OFFSET + THREAD_SIZE;
 #endif
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c566e87..5a23e8993678 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -17,6 +17,7 @@
 #include <asm/processor-flags.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
+#include <asm/percpu.h>
 
 #include <xen/interface/xen.h>
 
@@ -28,12 +29,10 @@
 
 #if 1
 /*
-	x86-64 does not yet support direct access to percpu variables
-	via a segment override, so we just need to make sure this code
-	never gets used
+	FIXME: x86_64 now can support direct access to percpu variables
+	via a segment override.  Update xen accordingly.
  */
 #define BUG			ud2a
-#define PER_CPU_VAR(var, off)	0xdeadbeef
 #endif
 
 /*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
 	BUG
 
 	/* Unmask events */
-	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 
 	/* Preempt here doesn't matter because that will deal with
 	   any pending interrupts.  The pending check may end up being
 	   run on the wrong CPU, but that doesn't hurt. */
 
 	/* Test for pending */
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 
 2:	call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
 ENTRY(xen_irq_disable_direct)
 	BUG
 
-	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 ENDPATCH(xen_irq_disable_direct)
 	ret
 	ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
 ENTRY(xen_save_fl_direct)
 	BUG
 
-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	setz %ah
 	addb %ah,%ah
 ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
 	BUG
 
 	testb $X86_EFLAGS_IF>>8, %ah
-	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
 	/* Preempt here doesn't matter because that will deal with
 	   any pending interrupts.  The pending check may end up being
 	   run on the wrong CPU, but that doesn't hurt. */
 
 	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
 	jz 1f
 2:	call check_events
 1:
@@ -196,7 +195,7 @@ ENTRY(xen_sysret64)
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
 	movq %rsp, %gs:pda_oldrsp
-	movq %gs:pda_kernelstack,%rsp
+	movq PER_CPU_VAR(kernel_stack),%rsp
 
 	pushq $__USER_DS
 	pushq %gs:pda_oldrsp
@@ -213,7 +212,7 @@ ENTRY(xen_sysret32)
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
 	movq %rsp, %gs:pda_oldrsp
-	movq %gs:pda_kernelstack, %rsp
+	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER32_DS
 	pushq %gs:pda_oldrsp
-- 
cgit v1.2.3


From 3d1e42a7cf945e289d6ba26159aa0e2b0645401b Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:58 +0900
Subject: x86-64: Move oldrsp from PDA to per-cpu.

tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda
      is still referenced in the file
    * s/oldrsp/old_rsp/

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h       |  2 +-
 arch/x86/kernel/asm-offsets_64.c |  1 -
 arch/x86/kernel/entry_64.S       | 10 +++++-----
 arch/x86/kernel/process_64.c     |  8 +++++---
 arch/x86/xen/xen-asm_64.S        |  8 ++++----
 5 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 4d28ffba6e1b..ae23deb99559 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -14,7 +14,7 @@ struct x8664_pda {
 	unsigned long unused1;
 	unsigned long unused2;
 	unsigned long unused3;
-	unsigned long oldrsp;		/* 24 user rsp for system call */
+	unsigned long unused4;
 	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
 	unsigned int unused6;		/* 36 was cpunumber */
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index cafff5f4a031..afda6deb8515 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,7 +49,6 @@ int main(void)
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-	ENTRY(oldrsp); 
 	ENTRY(irqcount);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0dd45859a7a8..7c27da407da7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64)
 
 	/* %rsp:at FRAMEEND */
 	.macro FIXUP_TOP_OF_STACK tmp offset=0
-	movq %gs:pda_oldrsp,\tmp
+	movq PER_CPU_VAR(old_rsp),\tmp
 	movq \tmp,RSP+\offset(%rsp)
 	movq $__USER_DS,SS+\offset(%rsp)
 	movq $__USER_CS,CS+\offset(%rsp)
@@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64)
 
 	.macro RESTORE_TOP_OF_STACK tmp offset=0
 	movq RSP+\offset(%rsp),\tmp
-	movq \tmp,%gs:pda_oldrsp
+	movq \tmp,PER_CPU_VAR(old_rsp)
 	movq EFLAGS+\offset(%rsp),\tmp
 	movq \tmp,R11+\offset(%rsp)
 	.endm
@@ -479,7 +479,7 @@ ENTRY(system_call)
 	 */
 ENTRY(system_call_after_swapgs)
 
-	movq	%rsp,%gs:pda_oldrsp
+	movq	%rsp,PER_CPU_VAR(old_rsp)
 	movq	PER_CPU_VAR(kernel_stack),%rsp
 	/*
 	 * No need to follow this irqs off/on section - it's straight
@@ -523,7 +523,7 @@ sysret_check:
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
 	/*CFI_REGISTER	rflags,r11*/
-	movq	%gs:pda_oldrsp, %rsp
+	movq	PER_CPU_VAR(old_rsp), %rsp
 	USERGS_SYSRET64
 
 	CFI_RESTORE_STATE
@@ -833,7 +833,7 @@ common_interrupt:
 	XCPT_FRAME
 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
 	interrupt do_IRQ
-	/* 0(%rsp): oldrsp-ARGOFFSET */
+	/* 0(%rsp): old_rsp-ARGOFFSET */
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6c5f57602108..480128918926 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -60,6 +60,8 @@ asmlinkage extern void ret_from_fork(void);
 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
+DEFINE_PER_CPU(unsigned long, old_rsp);
+
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -395,7 +397,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	load_gs_index(0);
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
-	write_pda(oldrsp, new_sp);
+	percpu_write(old_rsp, new_sp);
 	regs->cs		= __USER_CS;
 	regs->ss		= __USER_DS;
 	regs->flags		= 0x200;
@@ -616,8 +618,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
-	prev->usersp = read_pda(oldrsp);
-	write_pda(oldrsp, next->usersp);
+	prev->usersp = percpu_read(old_rsp);
+	percpu_write(old_rsp, next->usersp);
 	percpu_write(current_task, next_p);
 
 	percpu_write(kernel_stack,
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 5a23e8993678..d6fc51f4ce85 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -194,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
 ENTRY(xen_sysret64)
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
-	movq %rsp, %gs:pda_oldrsp
+	movq %rsp, PER_CPU_VAR(old_rsp)
 	movq PER_CPU_VAR(kernel_stack),%rsp
 
 	pushq $__USER_DS
-	pushq %gs:pda_oldrsp
+	pushq PER_CPU_VAR(old_rsp)
 	pushq %r11
 	pushq $__USER_CS
 	pushq %rcx
@@ -211,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
 ENTRY(xen_sysret32)
 	/* We're already on the usermode stack at this point, but still
 	   with the kernel gs, so we can easily switch back */
-	movq %rsp, %gs:pda_oldrsp
+	movq %rsp, PER_CPU_VAR(old_rsp)
 	movq PER_CPU_VAR(kernel_stack), %rsp
 
 	pushq $__USER32_DS
-	pushq %gs:pda_oldrsp
+	pushq PER_CPU_VAR(old_rsp)
 	pushq %r11
 	pushq $__USER32_CS
 	pushq %rcx
-- 
cgit v1.2.3


From 5689553076c4a67b83426b076082c63085b7567a Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:58 +0900
Subject: x86-64: Move irqcount from PDA to per-cpu.

tj: s/irqcount/irq_count/

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h       |  2 +-
 arch/x86/kernel/asm-offsets_64.c |  1 -
 arch/x86/kernel/cpu/common.c     |  4 ++--
 arch/x86/kernel/entry_64.S       | 14 +++++++-------
 4 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index ae23deb99559..4527d70314d4 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -15,7 +15,7 @@ struct x8664_pda {
 	unsigned long unused2;
 	unsigned long unused3;
 	unsigned long unused4;
-	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
+	int unused5;
 	unsigned int unused6;		/* 36 was cpunumber */
 #ifdef CONFIG_CC_STACKPROTECTOR
 	unsigned long stack_canary;	/* 40 stack canary value */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index afda6deb8515..64c834a39aa8 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,7 +49,6 @@ int main(void)
 	BLANK();
 #undef ENTRY
 #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-	ENTRY(irqcount);
 	DEFINE(pda_size, sizeof(struct x8664_pda));
 	BLANK();
 #undef ENTRY
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 06b6290088f4..e2323ecce1d3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -893,6 +893,8 @@ DEFINE_PER_CPU(unsigned long, kernel_stack) =
 	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+
 void __cpuinit pda_init(int cpu)
 {
 	struct x8664_pda *pda = cpu_pda(cpu);
@@ -903,8 +905,6 @@ void __cpuinit pda_init(int cpu)
 
 	load_pda_offset(cpu);
 
-	pda->irqcount = -1;
-
 	if (cpu != 0) {
 		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
 			pda->nodenumber = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7c27da407da7..c52b60919163 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -337,12 +337,12 @@ ENTRY(save_args)
 	je 1f
 	SWAPGS
 	/*
-	 * irqcount is used to check if a CPU is already on an interrupt stack
+	 * irq_count is used to check if a CPU is already on an interrupt stack
 	 * or not. While this is essentially redundant with preempt_count it is
 	 * a little cheaper to use a separate counter in the PDA (short of
 	 * moving irq_enter into assembly, which would be too much work)
 	 */
-1:	incl %gs:pda_irqcount
+1:	incl PER_CPU_VAR(irq_count)
 	jne 2f
 	popq_cfi %rax			/* move return address... */
 	mov PER_CPU_VAR(irq_stack_ptr),%rsp
@@ -837,7 +837,7 @@ common_interrupt:
 ret_from_intr:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	decl %gs:pda_irqcount
+	decl PER_CPU_VAR(irq_count)
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET	-8
@@ -1260,14 +1260,14 @@ ENTRY(call_softirq)
 	CFI_REL_OFFSET rbp,0
 	mov  %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
-	incl %gs:pda_irqcount
+	incl PER_CPU_VAR(irq_count)
 	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
 	push  %rbp			# backlink for old unwinder
 	call __do_softirq
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_ADJUST_CFA_OFFSET   -8
-	decl %gs:pda_irqcount
+	decl PER_CPU_VAR(irq_count)
 	ret
 	CFI_ENDPROC
 END(call_softirq)
@@ -1297,7 +1297,7 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 	movq %rdi, %rsp            # we don't return, adjust the stack frame
 	CFI_ENDPROC
 	DEFAULT_FRAME
-11:	incl %gs:pda_irqcount
+11:	incl PER_CPU_VAR(irq_count)
 	movq %rsp,%rbp
 	CFI_DEF_CFA_REGISTER rbp
 	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
@@ -1305,7 +1305,7 @@ ENTRY(xen_do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
 	call xen_evtchn_do_upcall
 	popq %rsp
 	CFI_DEF_CFA_REGISTER rsp
-	decl %gs:pda_irqcount
+	decl PER_CPU_VAR(irq_count)
 	jmp  error_exit
 	CFI_ENDPROC
 END(do_hypervisor_callback)
-- 
cgit v1.2.3


From e7a22c1ebcc1caa8178df1819d05128bb5b45ab9 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:59 +0900
Subject: x86-64: Move nodenumber from PDA to per-cpu.

tj: * s/nodenumber/node_number/
    * removed now unused pda variable from pda_init()

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h      |  1 -
 arch/x86/include/asm/topology.h |  3 ++-
 arch/x86/kernel/cpu/common.c    | 13 ++++++-------
 arch/x86/kernel/setup_percpu.c  |  4 +++-
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 4527d70314d4..b30ef6bddc43 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -22,7 +22,6 @@ struct x8664_pda {
 					/* gcc-ABI: this canary MUST be at
 					   offset 40!!! */
 #endif
-	short nodenumber;		/* number of current node (32k max) */
 	short in_bootmem;		/* pda lives in bootmem */
 	short isidle;
 } ____cacheline_aligned_in_smp;
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 87ca3fd86e88..ffea1fe03a99 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map;
 DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
 
 /* Returns the number of the current Node. */
-#define numa_node_id()		read_pda(nodenumber)
+DECLARE_PER_CPU(int, node_number);
+#define numa_node_id()		percpu_read(node_number)
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 extern int cpu_to_node(int cpu);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2323ecce1d3..7976a6a0f65c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -897,18 +897,11 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
 void __cpuinit pda_init(int cpu)
 {
-	struct x8664_pda *pda = cpu_pda(cpu);
-
 	/* Setup up data that may be needed in __get_free_pages early */
 	loadsegment(fs, 0);
 	loadsegment(gs, 0);
 
 	load_pda_offset(cpu);
-
-	if (cpu != 0) {
-		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
-			pda->nodenumber = cpu_to_node(cpu);
-	}
 }
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
@@ -978,6 +971,12 @@ void __cpuinit cpu_init(void)
 	if (cpu != 0)
 		pda_init(cpu);
 
+#ifdef CONFIG_NUMA
+	if (cpu != 0 && percpu_read(node_number) == 0 &&
+	    cpu_to_node(cpu) != NUMA_NO_NODE)
+		percpu_write(node_number, cpu_to_node(cpu));
+#endif
+
 	me = current;
 
 	if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 258497f93f4d..efbafbbff584 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -53,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
 #define	X86_64_NUMA	1	/* (used later) */
+DEFINE_PER_CPU(int, node_number) = 0;
+EXPORT_PER_CPU_SYMBOL(node_number);
 
 /*
  * Map cpu index to node index
@@ -283,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node)
 	per_cpu(x86_cpu_to_node_map, cpu) = node;
 
 	if (node != NUMA_NO_NODE)
-		cpu_pda(cpu)->nodenumber = node;
+		per_cpu(node_number, cpu) = node;
 }
 
 void __cpuinit numa_clear_node(int cpu)
-- 
cgit v1.2.3


From c2558e0eba66b49993e619da66c95a50a97830a3 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:59 +0900
Subject: x86-64: Move isidle from PDA to per-cpu.

tj: s/isidle/is_idle/

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h   | 1 -
 arch/x86/kernel/process_64.c | 5 +++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index b30ef6bddc43..c31ca048a901 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -23,7 +23,6 @@ struct x8664_pda {
 					   offset 40!!! */
 #endif
 	short in_bootmem;		/* pda lives in bootmem */
-	short isidle;
 } ____cacheline_aligned_in_smp;
 
 DECLARE_PER_CPU(struct x8664_pda, __pda);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 480128918926..4523ff88a69d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,6 +61,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(unsigned long, old_rsp);
+static DEFINE_PER_CPU(unsigned char, is_idle);
 
 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
 
@@ -80,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 
 void enter_idle(void)
 {
-	write_pda(isidle, 1);
+	percpu_write(is_idle, 1);
 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
 }
 
 static void __exit_idle(void)
 {
-	if (test_and_clear_bit_pda(0, isidle) == 0)
+	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
 		return;
 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 }
-- 
cgit v1.2.3


From 87b264065880fa696c121dad8498a60524e0f6de Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 00:38:59 +0900
Subject: x86-64: Use absolute displacements for per-cpu accesses.

Accessing memory through %gs should not use rip-relative addressing.
Adding a P prefix for the argument tells gcc to not add (%rip) to
the memory references.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/percpu.h | 26 +++++++++++++-------------
 arch/x86/include/asm/system.h |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 03aa4b00a1c3..165d5272ece1 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -39,10 +39,10 @@
 #include <linux/stringify.h>
 
 #ifdef CONFIG_SMP
-#define __percpu_seg_str	"%%"__stringify(__percpu_seg)":"
+#define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset		percpu_read(this_cpu_off)
 #else
-#define __percpu_seg_str
+#define __percpu_arg(x)		"%" #x
 #endif
 
 /* For arch-specific code, we can use direct single-insn ops (they
@@ -58,22 +58,22 @@ do {							\
 	}						\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b %1,"__percpu_seg_str"%0"	\
+		asm(op "b %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		break;					\
 	case 2:						\
-		asm(op "w %1,"__percpu_seg_str"%0"	\
+		asm(op "w %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		break;					\
 	case 4:						\
-		asm(op "l %1,"__percpu_seg_str"%0"	\
+		asm(op "l %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "ri" ((T__)val));			\
 		break;					\
 	case 8:						\
-		asm(op "q %1,"__percpu_seg_str"%0"	\
+		asm(op "q %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
 		    : "r" ((T__)val));			\
 		break;					\
@@ -86,22 +86,22 @@ do {							\
 	typeof(var) ret__;				\
 	switch (sizeof(var)) {				\
 	case 1:						\
-		asm(op "b "__percpu_seg_str"%1,%0"	\
+		asm(op "b "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 2:						\
-		asm(op "w "__percpu_seg_str"%1,%0"	\
+		asm(op "w "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 4:						\
-		asm(op "l "__percpu_seg_str"%1,%0"	\
+		asm(op "l "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
 	case 8:						\
-		asm(op "q "__percpu_seg_str"%1,%0"	\
+		asm(op "q "__percpu_arg(1)",%0"		\
 		    : "=r" (ret__)			\
 		    : "m" (var));			\
 		break;					\
@@ -122,9 +122,9 @@ do {							\
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
 	int old__;							\
-	asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0"	\
-		     : "=r" (old__)					\
-		     : "dIr" (bit), "i" (&per_cpu__##var) : "memory");	\
+	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
+		     : "=r" (old__), "+m" (per_cpu__##var)		\
+		     : "dIr" (bit));					\
 	old__;								\
 })
 
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 4399aac680e9..d1dc27dba36d 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -94,7 +94,7 @@ do {									\
 	     "call __switch_to\n\t"					  \
 	     ".globl thread_return\n"					  \
 	     "thread_return:\n\t"					  \
-	     "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t"	  \
+	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
 	     "movq %%rax,%%rdi\n\t" 					  \
-- 
cgit v1.2.3


From c6e50f93db5bd0895ec7c7d1b6f3886c6e1f11b6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jan 2009 12:29:19 +0900
Subject: x86: cleanup stack protector

Impact: cleanup

Make the following cleanups.

* remove duplicate comment from boot_init_stack_canary() which fits
  better in the other place - cpu_idle().

* move stack_canary offset check from __switch_to() to
  boot_init_stack_canary().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h            |  2 --
 arch/x86/include/asm/stackprotector.h | 13 ++++++-------
 arch/x86/kernel/process_64.c          |  7 -------
 3 files changed, 6 insertions(+), 16 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 5976cd803e9a..4a8c9d382c93 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -40,6 +40,4 @@ extern void pda_init(int);
 
 #endif
 
-#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary)
-
 #endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index c7f0d10bae7b..2383e5bb475c 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void)
 	u64 tsc;
 
 	/*
-	 * If we're the non-boot CPU, nothing set the PDA stack
-	 * canary up for us - and if we are the boot CPU we have
-	 * a 0 stack canary. This is a good place for updating
-	 * it, as we wont ever return from this function (so the
-	 * invalid canaries already on the stack wont ever
-	 * trigger).
-	 *
+	 * Build time only check to make sure the stack_canary is at
+	 * offset 40 in the pda; this is a gcc ABI requirement
+	 */
+	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
+
+	/*
 	 * We both use the random pool and the current TSC as a source
 	 * of randomness. The TSC only matters for very early init,
 	 * there it already has some randomness on most systems. Later
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index aa89eabf09e0..088bc9a0f82c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -638,13 +638,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	percpu_write(kernel_stack,
 		  (unsigned long)task_stack_page(next_p) +
 		  THREAD_SIZE - KERNEL_STACK_OFFSET);
-#ifdef CONFIG_CC_STACKPROTECTOR
-	/*
-	 * Build time only check to make sure the stack_canary is at
-	 * offset 40 in the pda; this is a gcc ABI requirement
-	 */
-	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
-#endif
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
-- 
cgit v1.2.3


From b4a8f7a262e79ecb0b39beb1449af524a78887f8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jan 2009 12:29:19 +0900
Subject: x86: conditionalize stack canary handling in hot path

Impact: no unnecessary stack canary swapping during context switch

There's no point in moving stack_canary around during context switch
if it's not enabled.  Conditionalize it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/system.h | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8cadfe9b1194..b77bd8bd3cc2 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -86,17 +86,28 @@ do {									\
 	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
 	  "r12", "r13", "r14", "r15"
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary							  \
+	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
+	"movq %%r8,%%gs:%P[pda_canary]\n\t"
+#define __switch_canary_param						  \
+	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))  \
+	, [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))
+#else	/* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_param
+#endif	/* CC_STACKPROTECTOR */
+
 /* Save restore flags to clear handle leaking NT */
 #define switch_to(prev, next, last) \
-	asm volatile(SAVE_CONTEXT						    \
+	asm volatile(SAVE_CONTEXT					  \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
 	     "call __switch_to\n\t"					  \
 	     ".globl thread_return\n"					  \
 	     "thread_return:\n\t"					  \
 	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
-	     "movq %P[task_canary](%%rsi),%%r8\n\t"			  \
-	     "movq %%r8,%%gs:%P[pda_canary]\n\t"			  \
+	     __switch_canary						  \
 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
 	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
 	     "movq %%rax,%%rdi\n\t" 					  \
@@ -108,9 +119,8 @@ do {									\
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [tif_fork] "i" (TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\
-	       [current_task] "m" (per_cpu_var(current_task)),		  \
-	       [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\
+	       [current_task] "m" (per_cpu_var(current_task))		  \
+	       __switch_canary_param					  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
 
-- 
cgit v1.2.3


From 8ce031972b40da58c268caba8c5ea3c0856d7131 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 12:21:27 +0900
Subject: x86: remove pda_init()

Impact: cleanup

Copy the code to cpu_init() to satisfy the requirement that the cpu
be reinitialized.  Remove all other calls, since the segments are
already initialized in head_64.S.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h   |  1 -
 arch/x86/kernel/cpu/common.c | 15 +++------------
 arch/x86/kernel/head64.c     |  2 --
 arch/x86/xen/enlighten.c     |  1 -
 4 files changed, 3 insertions(+), 16 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 4a8c9d382c93..b473e952439a 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -24,7 +24,6 @@ struct x8664_pda {
 } ____cacheline_aligned_in_smp;
 
 DECLARE_PER_CPU(struct x8664_pda, __pda);
-extern void pda_init(int);
 
 #define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7976a6a0f65c..f83a4d6160f0 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
-void __cpuinit pda_init(int cpu)
-{
-	/* Setup up data that may be needed in __get_free_pages early */
-	loadsegment(fs, 0);
-	loadsegment(gs, 0);
-
-	load_pda_offset(cpu);
-}
-
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
 	__aligned(PAGE_SIZE);
@@ -967,9 +958,9 @@ void __cpuinit cpu_init(void)
 	struct task_struct *me;
 	int i;
 
-	/* CPU 0 is initialised in head64.c */
-	if (cpu != 0)
-		pda_init(cpu);
+	loadsegment(fs, 0);
+	loadsegment(gs, 0);
+	load_pda_offset(cpu);
 
 #ifdef CONFIG_NUMA
 	if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index af67d3227ea6..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	if (console_loglevel == 10)
 		early_printk("Kernel alive\n");
 
-	pda_init(0);
-
 	x86_64_start_reservations(real_mode_data);
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75b94139e1f2..bef941f61451 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
 #ifdef CONFIG_X86_64
 	/* Disable until direct per-cpu data access. */
 	have_vcpu_info_placement = 0;
-	pda_init(0);
 #endif
 
 	xen_smp_init();
-- 
cgit v1.2.3


From 947e76cdc34c782fc947313d4331380686eebbad Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 19 Jan 2009 12:21:28 +0900
Subject: x86: move stack_canary into irq_stack

Impact: x86_64 percpu area layout change, irq_stack now at the beginning

Now that the PDA is empty except for the stack canary, it can be removed.
The irqstack is moved to the start of the per-cpu section.  If the stack
protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack.

tj: * updated subject
    * dropped asm relocation of irq_stack_ptr
    * updated comments a bit
    * rebased on top of stack canary changes

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/pda.h            |  3 ---
 arch/x86/include/asm/percpu.h         |  6 ------
 arch/x86/include/asm/processor.h      | 23 ++++++++++++++++++++++-
 arch/x86/include/asm/stackprotector.h |  6 +++---
 arch/x86/include/asm/system.h         |  4 ++--
 arch/x86/kernel/asm-offsets_64.c      |  4 ----
 arch/x86/kernel/cpu/common.c          |  7 ++++---
 arch/x86/kernel/head_64.S             | 13 +++++--------
 arch/x86/kernel/setup_percpu.c        | 34 ++++------------------------------
 arch/x86/kernel/vmlinux_64.lds.S      |  8 ++++++--
 10 files changed, 46 insertions(+), 62 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index b473e952439a..ba46416634f0 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -17,9 +17,6 @@ struct x8664_pda {
 	unsigned long unused4;
 	int unused5;
 	unsigned int unused6;		/* 36 was cpunumber */
-	unsigned long stack_canary;	/* 40 stack canary value */
-					/* gcc-ABI: this canary MUST be at
-					   offset 40!!! */
 	short in_bootmem;		/* pda lives in bootmem */
 } ____cacheline_aligned_in_smp;
 
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 165d5272ece1..ce980db5e59d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -133,12 +133,6 @@ do {							\
 /* We can use this directly for local CPU (faster). */
 DECLARE_PER_CPU(unsigned long, this_cpu_off);
 
-#ifdef CONFIG_X86_64
-extern void load_pda_offset(int cpu);
-#else
-static inline void load_pda_offset(int cpu) { }
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f511246fa6cd..48676b943b92 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -379,8 +379,29 @@ union thread_xstate {
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(struct orig_ist, orig_ist);
 
-DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
+union irq_stack_union {
+	char irq_stack[IRQ_STACK_SIZE];
+	/*
+	 * GCC hardcodes the stack canary as %gs:40.  Since the
+	 * irq_stack is the object at %gs:0, we reserve the bottom
+	 * 48 bytes of the irq stack for the canary.
+	 */
+	struct {
+		char gs_base[40];
+		unsigned long stack_canary;
+	};
+};
+
+DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
 DECLARE_PER_CPU(char *, irq_stack_ptr);
+
+static inline void load_gs_base(int cpu)
+{
+	/* Memory clobbers used to order pda/percpu accesses */
+	mb();
+	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+	mb();
+}
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 2383e5bb475c..36a700acaf2b 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,7 +2,7 @@
 #define _ASM_STACKPROTECTOR_H 1
 
 #include <asm/tsc.h>
-#include <asm/pda.h>
+#include <asm/processor.h>
 
 /*
  * Initialize the stackprotector canary value.
@@ -19,7 +19,7 @@ static __always_inline void boot_init_stack_canary(void)
 	 * Build time only check to make sure the stack_canary is at
 	 * offset 40 in the pda; this is a gcc ABI requirement
 	 */
-	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
+	BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
 
 	/*
 	 * We both use the random pool and the current TSC as a source
@@ -32,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void)
 	canary += tsc + (tsc << 32UL);
 
 	current->stack_canary = canary;
-	write_pda(stack_canary, canary);
+	percpu_write(irq_stack_union.stack_canary, canary);
 }
 
 #endif
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index b77bd8bd3cc2..52eb748a68af 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -89,10 +89,10 @@ do {									\
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							  \
 	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
-	"movq %%r8,%%gs:%P[pda_canary]\n\t"
+	"movq %%r8,%%gs:%P[gs_canary]\n\t"
 #define __switch_canary_param						  \
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))  \
-	, [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))
+	, [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary))
 #else	/* CC_STACKPROTECTOR */
 #define __switch_canary
 #define __switch_canary_param
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 64c834a39aa8..94f9c8b39d20 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -48,10 +48,6 @@ int main(void)
 #endif
 	BLANK();
 #undef ENTRY
-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
-	DEFINE(pda_size, sizeof(struct x8664_pda));
-	BLANK();
-#undef ENTRY
 #ifdef CONFIG_PARAVIRT
 	BLANK();
 	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f83a4d6160f0..098934e72a16 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid);
 #ifdef CONFIG_X86_64
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
-DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
+		     irq_stack_union) __aligned(PAGE_SIZE);
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(char *, irq_stack_ptr);	/* will be set during per cpu init */
 #else
 DEFINE_PER_CPU(char *, irq_stack_ptr) =
-	per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
+	per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
 #endif
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
@@ -960,7 +961,7 @@ void __cpuinit cpu_init(void)
 
 	loadsegment(fs, 0);
 	loadsegment(gs, 0);
-	load_pda_offset(cpu);
+	load_gs_base(cpu);
 
 #ifdef CONFIG_NUMA
 	if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 98ea26a2fca1..a0a2b5ca9b7d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -242,13 +242,10 @@ ENTRY(secondary_startup_64)
 
 	/* Set up %gs.
 	 *
-	 * On SMP, %gs should point to the per-cpu area.  For initial
-	 * boot, make %gs point to the init data section.  For a
-	 * secondary CPU,initial_gs should be set to its pda address
-	 * before the CPU runs this code.
-	 *
-	 * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
-	 * change.
+	 * The base of %gs always points to the bottom of the irqstack
+	 * union.  If the stack protector canary is enabled, it is
+	 * located at %gs:40.  Note that, on SMP, the boot cpu uses
+	 * init data section till per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
 	movq	initial_gs(%rip),%rax
@@ -281,7 +278,7 @@ ENTRY(secondary_startup_64)
 #ifdef CONFIG_SMP
 	.quad	__per_cpu_load
 #else
-	.quad	PER_CPU_VAR(__pda)
+	.quad	PER_CPU_VAR(irq_stack_union)
 #endif
 	__FINITDATA
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efbafbbff584..90b8e154bb53 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
 static inline void setup_node_to_cpumask_map(void) { }
 #endif
 
-/*
- * Define load_pda_offset() and per-cpu __pda for x86_64.
- * load_pda_offset() is responsible for loading the offset of pda into
- * %gs.
- *
- * On SMP, pda offset also duals as percpu base address and thus it
- * should be at the start of per-cpu area.  To achieve this, it's
- * preallocated in vmlinux_64.lds.S directly instead of using
- * DEFINE_PER_CPU().
- */
-#ifdef CONFIG_X86_64
-void __cpuinit load_pda_offset(int cpu)
-{
-	/* Memory clobbers used to order pda/percpu accesses */
-	mb();
-	wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
-	mb();
-}
-#ifndef CONFIG_SMP
-DEFINE_PER_CPU(struct x8664_pda, __pda);
-#endif
-EXPORT_PER_CPU_SYMBOL(__pda);
-#endif /* CONFIG_SMP && CONFIG_X86_64 */
-
 #ifdef CONFIG_X86_64
 
 /* correctly size the local cpu masks */
@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(cpu_number, cpu) = cpu;
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
-			(char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
+			per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
 		/*
-		 * CPU0 modified pda in the init data area, reload pda
-		 * offset for CPU0 and clear the area for others.
+		 * Up to this point, CPU0 has been using .data.init
+		 * area.  Reload %gs offset for CPU0.
 		 */
 		if (cpu == 0)
-			load_pda_offset(0);
-		else
-			memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
+			load_gs_base(cpu);
 #endif
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index a09abb8fb97f..c9740996430a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -220,8 +220,7 @@ SECTIONS
    * so that it can be accessed as a percpu variable.
    */
   . = ALIGN(PAGE_SIZE);
-  PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
-  per_cpu____pda = __per_cpu_start;
+  PERCPU_VADDR(0, :percpu)
 #else
   PERCPU(PAGE_SIZE)
 #endif
@@ -262,3 +261,8 @@ SECTIONS
  */
 ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
 	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+        "irq_stack_union is not at start of per-cpu area");
+#endif
-- 
cgit v1.2.3


From 0d974d4592708f85044751817da4b7016e1b0602 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Sun, 18 Jan 2009 19:52:25 -0500
Subject: x86: remove pda.h

Impact: cleanup

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/include/asm/pda.h        | 39 ---------------------------------------
 arch/x86/include/asm/pgtable_64.h |  1 -
 arch/x86/include/asm/smp.h        |  1 -
 arch/x86/kernel/asm-offsets_64.c  |  1 -
 arch/x86/kernel/cpu/common.c      |  1 -
 arch/x86/kernel/process_64.c      |  1 -
 arch/x86/kernel/traps.c           |  1 -
 7 files changed, 45 deletions(-)
 delete mode 100644 arch/x86/include/asm/pda.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index ba46416634f0..000000000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef _ASM_X86_PDA_H
-#define _ASM_X86_PDA_H
-
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/threads.h>
-#include <asm/page.h>
-#include <asm/percpu.h>
-
-/* Per processor datastructure. %gs points to it while the kernel runs */
-struct x8664_pda {
-	unsigned long unused1;
-	unsigned long unused2;
-	unsigned long unused3;
-	unsigned long unused4;
-	int unused5;
-	unsigned int unused6;		/* 36 was cpunumber */
-	short in_bootmem;		/* pda lives in bootmem */
-} ____cacheline_aligned_in_smp;
-
-DECLARE_PER_CPU(struct x8664_pda, __pda);
-
-#define cpu_pda(cpu)		(&per_cpu(__pda, cpu))
-
-#define read_pda(field)		percpu_read(__pda.field)
-#define write_pda(field, val)	percpu_write(__pda.field, val)
-#define add_pda(field, val)	percpu_add(__pda.field, val)
-#define sub_pda(field, val)	percpu_sub(__pda.field, val)
-#define or_pda(field, val)	percpu_or(__pda.field, val)
-
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define test_and_clear_bit_pda(bit, field)				\
-	x86_test_and_clear_bit_percpu(bit, __pda.field)
-
-#endif
-
-#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289accaa..1df9637dfda3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
 #include <asm/processor.h>
 #include <linux/bitops.h>
 #include <linux/threads.h>
-#include <asm/pda.h>
 
 extern pud_t level3_kernel_pgt[512];
 extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 68636e767a91..45ef8a1b9d7c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,7 +15,6 @@
 #  include <asm/io_apic.h>
 # endif
 #endif
-#include <asm/pda.h>
 #include <asm/thread_info.h>
 #include <asm/cpumask.h>
 
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 94f9c8b39d20..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
 #include <linux/hardirq.h>
 #include <linux/suspend.h>
 #include <linux/kbuild.h>
-#include <asm/pda.h>
 #include <asm/processor.h>
 #include <asm/segment.h>
 #include <asm/thread_info.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 098934e72a16..3887fcf6e519 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -30,7 +30,6 @@
 #include <asm/genapic.h>
 #endif
 
-#include <asm/pda.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 088bc9a0f82c..c422eebb0c58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -47,7 +47,6 @@
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/mmu_context.h>
-#include <asm/pda.h>
 #include <asm/prctl.h>
 #include <asm/desc.h>
 #include <asm/proto.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..ed5aee5f3fcc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
 #ifdef CONFIG_X86_64
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
-#include <asm/pda.h>
 #else
 #include <asm/processor-flags.h>
 #include <asm/arch_hooks.h>
-- 
cgit v1.2.3


From 67e68bde02fe783efc2ce2ca31bdb992f5235f8d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 21 Jan 2009 17:26:05 +0900
Subject: x86: update canary handling during switch

Impact: cleanup

In switch_to(), instead of taking offset to irq_stack_union.stack,
make it a proper percpu access using __percpu_arg() and per_cpu_var().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/system.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 52eb748a68af..2fcc70bc85f3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -89,13 +89,15 @@ do {									\
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							  \
 	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
-	"movq %%r8,%%gs:%P[gs_canary]\n\t"
-#define __switch_canary_param						  \
-	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))  \
-	, [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary))
+	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
+#define __switch_canary_oparam						  \
+	, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+#define __switch_canary_iparam						  \
+	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
 #define __switch_canary
-#define __switch_canary_param
+#define __switch_canary_oparam
+#define __switch_canary_iparam
 #endif	/* CC_STACKPROTECTOR */
 
 /* Save restore flags to clear handle leaking NT */
@@ -114,13 +116,14 @@ do {									\
 	     "jc   ret_from_fork\n\t"					  \
 	     RESTORE_CONTEXT						  \
 	     : "=a" (last)					  	  \
+	       __switch_canary_oparam					  \
 	     : [next] "S" (next), [prev] "D" (prev),			  \
 	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
 	       [tif_fork] "i" (TIF_FORK),			  	  \
 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
 	       [current_task] "m" (per_cpu_var(current_task))		  \
-	       __switch_canary_param					  \
+	       __switch_canary_iparam					  \
 	     : "memory", "cc" __EXTRA_CLOBBER)
 #endif
 
-- 
cgit v1.2.3


From 299e26992a737804e13e74fdb97cdab470ed19ac Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Wed, 21 Jan 2009 17:26:05 +0900
Subject: x86: fix percpu_write with 64-bit constants

Impact: slightly better code generation for percpu_to_op()

The processor will sign-extend 32-bit immediate values in 64-bit
operations.  Use the 'e' constraint ("32-bit signed integer constant,
or a symbolic reference known to fit that range") for 64-bit constants.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ce980db5e59d..0b64af4f13ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -75,7 +75,7 @@ do {							\
 	case 8:						\
 		asm(op "q %1,"__percpu_arg(0)		\
 		    : "+m" (var)			\
-		    : "r" ((T__)val));			\
+		    : "re" ((T__)val));			\
 		break;					\
 	default: __bad_percpu_size();			\
 	}						\
-- 
cgit v1.2.3


From 6826c8ff07b5f95df0473a748a9831707079b940 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Wed, 21 Jan 2009 17:26:06 +0900
Subject: x86: merge mmu_context.h

Impact: cleanup

tj: * changed cpu to unsigned as was done on mmu_context_64.h as cpu
      id is officially unsigned int
    * added missing ';' to 32bit version of deactivate_mm()

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/mmu_context.h    | 63 ++++++++++++++++++++++++++++++++---
 arch/x86/include/asm/mmu_context_32.h | 55 ------------------------------
 arch/x86/include/asm/mmu_context_64.h | 52 -----------------------------
 3 files changed, 59 insertions(+), 111 deletions(-)
 delete mode 100644 arch/x86/include/asm/mmu_context_32.h
 delete mode 100644 arch/x86/include/asm/mmu_context_64.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3fd73db..52948df9cd1d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context(struct mm_struct *mm);
 
-#ifdef CONFIG_X86_32
-# include "mmu_context_32.h"
-#else
-# include "mmu_context_64.h"
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+#ifdef CONFIG_SMP
+	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+#endif
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	unsigned cpu = smp_processor_id();
+
+	if (likely(prev != next)) {
+		/* stop flush ipis for the previous mm */
+		cpu_clear(cpu, prev->cpu_vm_mask);
+#ifdef CONFIG_SMP
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
+		cpu_set(cpu, next->cpu_vm_mask);
+
+		/* Re-load page tables */
+		load_cr3(next->pgd);
+
+		/*
+		 * load the LDT, if the LDT is different:
+		 */
+		if (unlikely(prev->context.ldt != next->context.ldt))
+			load_LDT_nolock(&next->context);
+	}
+#ifdef CONFIG_SMP
+	else {
+		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
+
+		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+			/* We were in lazy tlb mode and leave_mm disabled
+			 * tlb flush IPI delivery. We must reload CR3
+			 * to make sure to use no freed page tables.
+			 */
+			load_cr3(next->pgd);
+			load_LDT_nolock(&next->context);
+		}
+	}
+#endif
+}
 
 #define activate_mm(prev, next)			\
 do {						\
@@ -33,5 +76,17 @@ do {						\
 	switch_mm((prev), (next), NULL);	\
 } while (0);
 
+#ifdef CONFIG_X86_32
+#define deactivate_mm(tsk, mm)			\
+do {						\
+	loadsegment(gs, 0);			\
+} while (0)
+#else
+#define deactivate_mm(tsk, mm)			\
+do {						\
+	load_gs_index(0);			\
+	loadsegment(fs, 0);			\
+} while (0)
+#endif
 
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 08b53454f831..000000000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _ASM_X86_MMU_CONTEXT_32_H
-#define _ASM_X86_MMU_CONTEXT_32_H
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-#ifdef CONFIG_SMP
-	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
-		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
-}
-
-static inline void switch_mm(struct mm_struct *prev,
-			     struct mm_struct *next,
-			     struct task_struct *tsk)
-{
-	int cpu = smp_processor_id();
-
-	if (likely(prev != next)) {
-		/* stop flush ipis for the previous mm */
-		cpu_clear(cpu, prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
-		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		percpu_write(cpu_tlbstate.active_mm, next);
-#endif
-		cpu_set(cpu, next->cpu_vm_mask);
-
-		/* Re-load page tables */
-		load_cr3(next->pgd);
-
-		/*
-		 * load the LDT, if the LDT is different:
-		 */
-		if (unlikely(prev->context.ldt != next->context.ldt))
-			load_LDT_nolock(&next->context);
-	}
-#ifdef CONFIG_SMP
-	else {
-		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
-
-		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
-			/* We were in lazy tlb mode and leave_mm disabled
-			 * tlb flush IPI delivery. We must reload %cr3.
-			 */
-			load_cr3(next->pgd);
-			load_LDT_nolock(&next->context);
-		}
-	}
-#endif
-}
-
-#define deactivate_mm(tsk, mm)			\
-	asm("movl %0,%%gs": :"r" (0));
-
-#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index c4572505ab3e..000000000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef _ASM_X86_MMU_CONTEXT_64_H
-#define _ASM_X86_MMU_CONTEXT_64_H
-
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-#ifdef CONFIG_SMP
-	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
-		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
-#endif
-}
-
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-			     struct task_struct *tsk)
-{
-	unsigned cpu = smp_processor_id();
-	if (likely(prev != next)) {
-		/* stop flush ipis for the previous mm */
-		cpu_clear(cpu, prev->cpu_vm_mask);
-#ifdef CONFIG_SMP
-		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		percpu_write(cpu_tlbstate.active_mm, next);
-#endif
-		cpu_set(cpu, next->cpu_vm_mask);
-		load_cr3(next->pgd);
-
-		if (unlikely(next->context.ldt != prev->context.ldt))
-			load_LDT_nolock(&next->context);
-	}
-#ifdef CONFIG_SMP
-	else {
-		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
-
-		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
-			/* We were in lazy tlb mode and leave_mm disabled
-			 * tlb flush IPI delivery. We must reload CR3
-			 * to make sure to use no freed page tables.
-			 */
-			load_cr3(next->pgd);
-			load_LDT_nolock(&next->context);
-		}
-	}
-#endif
-}
-
-#define deactivate_mm(tsk, mm)			\
-do {						\
-	load_gs_index(0);			\
-	asm volatile("movl %0,%%fs"::"r"(0));	\
-} while (0)
-
-#endif /* _ASM_X86_MMU_CONTEXT_64_H */
-- 
cgit v1.2.3


From d650a5148593b65a3c3f9a344f46b91b7dfe7713 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Wed, 21 Jan 2009 17:26:06 +0900
Subject: x86: merge irq_regs.h

Impact: cleanup, better irq_regs code generation for x86_64

Make 64-bit use the same optimizations as 32-bit.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/irq_regs.h    | 36 +++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/irq_regs_32.h | 31 -------------------------------
 arch/x86/include/asm/irq_regs_64.h |  1 -
 arch/x86/kernel/irq_64.c           |  3 +++
 4 files changed, 34 insertions(+), 37 deletions(-)
 delete mode 100644 arch/x86/include/asm/irq_regs_32.h
 delete mode 100644 arch/x86/include/asm/irq_regs_64.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898ab298b..77843225b7ea 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
-#ifdef CONFIG_X86_32
-# include "irq_regs_32.h"
-#else
-# include "irq_regs_64.h"
-#endif
+/*
+ * Per-cpu current frame pointer - the location of the last exception frame on
+ * the stack, stored in the per-cpu area.
+ *
+ * Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+#ifndef _ASM_X86_IRQ_REGS_H
+#define _ASM_X86_IRQ_REGS_H
+
+#include <asm/percpu.h>
+
+#define ARCH_HAS_OWN_IRQ_REGS
+
+DECLARE_PER_CPU(struct pt_regs *, irq_regs);
+
+static inline struct pt_regs *get_irq_regs(void)
+{
+	return percpu_read(irq_regs);
+}
+
+static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = get_irq_regs();
+	percpu_write(irq_regs, new_regs);
+
+	return old_regs;
+}
+
+#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index d7ed33ee94e9..000000000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Per-cpu current frame pointer - the location of the last exception frame on
- * the stack, stored in the per-cpu area.
- *
- * Jeremy Fitzhardinge <jeremy@goop.org>
- */
-#ifndef _ASM_X86_IRQ_REGS_32_H
-#define _ASM_X86_IRQ_REGS_32_H
-
-#include <asm/percpu.h>
-
-#define ARCH_HAS_OWN_IRQ_REGS
-
-DECLARE_PER_CPU(struct pt_regs *, irq_regs);
-
-static inline struct pt_regs *get_irq_regs(void)
-{
-	return percpu_read(irq_regs);
-}
-
-static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
-{
-	struct pt_regs *old_regs;
-
-	old_regs = get_irq_regs();
-	percpu_write(irq_regs, new_regs);
-
-	return old_regs;
-}
-
-#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1db05247b47f..0b254de84083 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -22,6 +22,9 @@
 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
 
+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
+EXPORT_PER_CPU_SYMBOL(irq_regs);
+
 /*
  * Probabilistic stack overflow check:
  *
-- 
cgit v1.2.3


From bdbcdd48883940bbd8d17eb01172d58a261a413a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 21 Jan 2009 17:26:06 +0900
Subject: x86: uv cleanup

Impact: cleanup

Make the following uv related cleanups.

* collect visible uv related definitions and interfaces into uv/uv.h
  and use it.  this cleans up the messy situation where on 64bit, uv
  is defined properly, on 32bit generic it's dummy and on the rest
  undefined.  after this clean up, uv is defined on 64 and dummy on
  32.

* update uv_flush_tlb_others() such that it takes cpumask of
  to-be-flushed cpus as argument, instead of that minus self, and
  returns yet-to-be-flushed cpumask, instead of modifying the passed
  in parameter.  this interface change will ease dummy implementation
  of uv_flush_tlb_others() and makes uv tlb flush related stuff
  defined in tlb_uv proper.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/genapic_32.h |  7 ----
 arch/x86/include/asm/genapic_64.h |  6 ----
 arch/x86/include/asm/uv/uv.h      | 33 +++++++++++++++++++
 arch/x86/include/asm/uv/uv_bau.h  |  2 --
 arch/x86/kernel/cpu/common.c      |  1 +
 arch/x86/kernel/genx2apic_uv_x.c  |  1 +
 arch/x86/kernel/smpboot.c         |  1 +
 arch/x86/kernel/tlb_64.c          | 18 ++++-------
 arch/x86/kernel/tlb_uv.c          | 68 +++++++++++++++++++++++----------------
 9 files changed, 83 insertions(+), 54 deletions(-)
 create mode 100644 arch/x86/include/asm/uv/uv.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 2c05b737ee22..4334502d3664 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -138,11 +138,4 @@ struct genapic {
 extern struct genapic *genapic;
 extern void es7000_update_genapic_to_cluster(void);
 
-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
-#define get_uv_system_type()		UV_NONE
-#define is_uv_system()			0
-#define uv_wakeup_secondary(a, b)	1
-#define uv_system_init()		do {} while (0)
-
-
 #endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..7bb092c59055 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
 extern int acpi_madt_oem_check(char *, char *);
 
 extern void apic_send_IPI_self(int vector);
-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
-extern enum uv_system_type get_uv_system_type(void);
-extern int is_uv_system(void);
 
 extern struct genapic apic_x2apic_uv_x;
 DECLARE_PER_CPU(int, x2apic_extra_bits);
-extern void uv_cpu_init(void);
-extern void uv_system_init(void);
-extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
 
 extern void setup_apic_routing(void);
 
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 000000000000..dce5fe350134
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_X86_UV_UV_H
+#define _ASM_X86_UV_UV_H
+
+enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+
+#ifdef CONFIG_X86_64
+
+extern enum uv_system_type get_uv_system_type(void);
+extern int is_uv_system(void);
+extern void uv_cpu_init(void);
+extern void uv_system_init(void);
+extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
+extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+						 struct mm_struct *mm,
+						 unsigned long va,
+						 unsigned int cpu);
+
+#else	/* X86_64 */
+
+static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
+static inline int is_uv_system(void)	{ return 0; }
+static inline void uv_cpu_init(void)	{ }
+static inline void uv_system_init(void)	{ }
+static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{ return 1; }
+static inline const struct cpumask *
+uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
+		    unsigned long va, unsigned int cpu)
+{ return cpumask; }
+
+#endif	/* X86_64 */
+
+#endif	/* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 74e6393bfddb..9b0e61bf7a88 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
 
-extern int uv_flush_tlb_others(struct cpumask *,
-			       struct mm_struct *, unsigned long);
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fbebbcec001b..99904f288d6a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,6 +28,7 @@
 #include <asm/apic.h>
 #include <mach_apic.h>
 #include <asm/genapic.h>
+#include <asm/uv/uv.h>
 #endif
 
 #include <asm/pgtable.h>
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
 #include <asm/ipi.h>
 #include <asm/genapic.h>
 #include <asm/pgtable.h>
+#include <asm/uv/uv.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 869b98840fd0..def770b57b5a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
 #include <asm/vmi.h>
 #include <asm/genapic.h>
 #include <asm/setup.h>
+#include <asm/uv/uv.h>
 #include <linux/mc146818rtc.h>
 
 #include <mach_apic.h>
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index e64a32c48825..b8bed841ad67 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -15,8 +15,7 @@
 #include <asm/proto.h>
 #include <asm/apicdef.h>
 #include <asm/idle.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
+#include <asm/uv/uv.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
 			= { &init_mm, 0, };
@@ -206,16 +205,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 			     struct mm_struct *mm, unsigned long va)
 {
 	if (is_uv_system()) {
-		/* FIXME: could be an percpu_alloc'd thing */
-		static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
-		struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
+		unsigned int cpu;
 
-		cpumask_andnot(after_uv_flush, cpumask,
-			       cpumask_of(smp_processor_id()));
-		if (!uv_flush_tlb_others(after_uv_flush, mm, va))
-			flush_tlb_others_ipi(after_uv_flush, mm, va);
-
-		put_cpu_var(flush_tlb_uv_cpumask);
+		cpu = get_cpu();
+		cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
+		if (cpumask)
+			flush_tlb_others_ipi(cpumask, mm, va);
+		put_cpu();
 		return;
 	}
 	flush_tlb_others_ipi(cpumask, mm, va);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 690dcf1a27d4..aae15dd72604 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 
 #include <asm/mmu_context.h>
+#include <asm/uv/uv.h>
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_bau.h>
@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
  *
  * Send a broadcast and wait for a broadcast message to complete.
  *
- * The cpumaskp mask contains the cpus the broadcast was sent to.
+ * The flush_mask contains the cpus the broadcast was sent to.
  *
- * Returns 1 if all remote flushing was done. The mask is zeroed.
- * Returns 0 if some remote flushing remains to be done. The mask will have
- * some bits still set.
+ * Returns NULL if all remote flushing was done. The mask is zeroed.
+ * Returns @flush_mask if some remote flushing remains to be done. The
+ * mask will have some bits still set.
  */
-int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
-			   struct cpumask *cpumaskp)
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+					     struct bau_desc *bau_desc,
+					     struct cpumask *flush_mask)
 {
 	int completion_status = 0;
 	int right_shift;
@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
 	 * Success, so clear the remote cpu's from the mask so we don't
 	 * use the IPI method of shootdown on them.
 	 */
-	for_each_cpu(bit, cpumaskp) {
+	for_each_cpu(bit, flush_mask) {
 		blade = uv_cpu_to_blade_id(bit);
 		if (blade == this_blade)
 			continue;
-		cpumask_clear_cpu(bit, cpumaskp);
+		cpumask_clear_cpu(bit, flush_mask);
 	}
-	if (!cpumask_empty(cpumaskp))
-		return 0;
-	return 1;
+	if (!cpumask_empty(flush_mask))
+		return flush_mask;
+	return NULL;
 }
 
 /**
  * uv_flush_tlb_others - globally purge translation cache of a virtual
  * address or all TLB's
- * @cpumaskp: mask of all cpu's in which the address is to be removed
+ * @cpumask: mask of all cpu's in which the address is to be removed
  * @mm: mm_struct containing virtual address range
  * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ * @cpu: the current cpu
  *
  * This is the entry point for initiating any UV global TLB shootdown.
  *
  * Purges the translation caches of all specified processors of the given
  * virtual address, or purges all TLB's on specified processors.
  *
- * The caller has derived the cpumaskp from the mm_struct and has subtracted
- * the local cpu from the mask.  This function is called only if there
- * are bits set in the mask. (e.g. flush_tlb_page())
+ * The caller has derived the cpumask from the mm_struct.  This function
+ * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
  *
- * The cpumaskp is converted into a nodemask of the nodes containing
+ * The cpumask is converted into a nodemask of the nodes containing
  * the cpus.
  *
- * Returns 1 if all remote flushing was done.
- * Returns 0 if some remote flushing remains to be done.
+ * Note that this function should be called with preemption disabled.
+ *
+ * Returns NULL if all remote flushing was done.
+ * Returns pointer to cpumask if some remote flushing remains to be
+ * done.  The returned pointer is valid till preemption is re-enabled.
  */
-int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
-			unsigned long va)
+const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
+					  struct mm_struct *mm,
+					  unsigned long va, unsigned int cpu)
 {
+	static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
+	struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
 	int i;
 	int bit;
 	int blade;
-	int cpu;
+	int uv_cpu;
 	int this_blade;
 	int locals = 0;
 	struct bau_desc *bau_desc;
 
-	cpu = uv_blade_processor_id();
+	WARN_ON(!in_atomic());
+
+	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
+
+	uv_cpu = uv_blade_processor_id();
 	this_blade = uv_numa_blade_id();
 	bau_desc = __get_cpu_var(bau_control).descriptor_base;
-	bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
+	bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
 
 	bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
 	i = 0;
-	for_each_cpu(bit, cpumaskp) {
+	for_each_cpu(bit, flush_mask) {
 		blade = uv_cpu_to_blade_id(bit);
 		BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
 		if (blade == this_blade) {
@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
 		 * no off_node flushing; return status for local node
 		 */
 		if (locals)
-			return 0;
+			return flush_mask;
 		else
-			return 1;
+			return NULL;
 	}
 	__get_cpu_var(ptcstats).requestor++;
 	__get_cpu_var(ptcstats).ntargeted += i;
 
 	bau_desc->payload.address = va;
-	bau_desc->payload.sending_cpu = smp_processor_id();
+	bau_desc->payload.sending_cpu = cpu;
 
-	return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
+	return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
 }
 
 /*
-- 
cgit v1.2.3


From 6dd01bedee6c3191643db303a1dc530bad56ec55 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 21 Jan 2009 17:26:06 +0900
Subject: x86: prepare for tlb merge

Impact: clean up, ipi vector number reordering for x86_32

Make the following changes to prepare for tlb merge.

* reorder x86_32 ip vectors

* adjust tlb_32.c and tlb_64.c such that their logics coincide exactly
	- on spurious invalidate ipi, tlb_32 acks the irq
	- tlb_64 now has proper memory barriers around clearing
          flush_cpumask (no change in generated code)

* unexport flush_tlb_page from tlb_32.c, there's no user

* use unsigned int for cpu id

* drop unnecessary includes from tlb_64.c

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/irq_vectors.h | 33 ++++++++++++++++-----------------
 arch/x86/kernel/tlb_32.c           | 10 +++++-----
 arch/x86/kernel/tlb_64.c           | 18 +++++++-----------
 3 files changed, 28 insertions(+), 33 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index a16a2ab2b429..4ee8f800504b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -49,31 +49,30 @@
  *  some of the following vectors are 'rare', they are merged
  *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
  *  TLB, reschedule and local APIC vectors are performance-critical.
- *
- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
  */
 #ifdef CONFIG_X86_32
 
 # define SPURIOUS_APIC_VECTOR		0xff
 # define ERROR_APIC_VECTOR		0xfe
-# define INVALIDATE_TLB_VECTOR		0xfd
-# define RESCHEDULE_VECTOR		0xfc
-# define CALL_FUNCTION_VECTOR		0xfb
-# define CALL_FUNCTION_SINGLE_VECTOR	0xfa
-# define THERMAL_APIC_VECTOR		0xf0
+# define RESCHEDULE_VECTOR		0xfd
+# define CALL_FUNCTION_VECTOR		0xfc
+# define CALL_FUNCTION_SINGLE_VECTOR	0xfb
+# define THERMAL_APIC_VECTOR		0xfa
+/* 0xf1 - 0xf9 : free */
+# define INVALIDATE_TLB_VECTOR		0xf0
 
 #else
 
-#define SPURIOUS_APIC_VECTOR		0xff
-#define ERROR_APIC_VECTOR		0xfe
-#define RESCHEDULE_VECTOR		0xfd
-#define CALL_FUNCTION_VECTOR		0xfc
-#define CALL_FUNCTION_SINGLE_VECTOR	0xfb
-#define THERMAL_APIC_VECTOR		0xfa
-#define THRESHOLD_APIC_VECTOR		0xf9
-#define UV_BAU_MESSAGE			0xf8
-#define INVALIDATE_TLB_VECTOR_END	0xf7
-#define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
+# define SPURIOUS_APIC_VECTOR		0xff
+# define ERROR_APIC_VECTOR		0xfe
+# define RESCHEDULE_VECTOR		0xfd
+# define CALL_FUNCTION_VECTOR		0xfc
+# define CALL_FUNCTION_SINGLE_VECTOR	0xfb
+# define THERMAL_APIC_VECTOR		0xfa
+# define THRESHOLD_APIC_VECTOR		0xf9
+# define UV_BAU_MESSAGE			0xf8
+# define INVALIDATE_TLB_VECTOR_END	0xf7
+# define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
 
 #define NUM_INVALIDATE_TLB_VECTORS	8
 
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index abf0808d6fc4..93fcb05c7d43 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -84,13 +84,15 @@ EXPORT_SYMBOL_GPL(leave_mm);
  *
  * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
  * 2) Leave the mm if we are in the lazy tlb mode.
+ *
+ * Interrupts are disabled.
  */
 
 void smp_invalidate_interrupt(struct pt_regs *regs)
 {
-	unsigned long cpu;
+	unsigned int cpu;
 
-	cpu = get_cpu();
+	cpu = smp_processor_id();
 
 	if (!cpumask_test_cpu(cpu, flush_cpumask))
 		goto out;
@@ -112,12 +114,11 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 		} else
 			leave_mm(cpu);
 	}
+out:
 	ack_APIC_irq();
 	smp_mb__before_clear_bit();
 	cpumask_clear_cpu(cpu, flush_cpumask);
 	smp_mb__after_clear_bit();
-out:
-	put_cpu_no_resched();
 	inc_irq_stat(irq_tlb_count);
 }
 
@@ -215,7 +216,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 		flush_tlb_others(&mm->cpu_vm_mask, mm, va);
 	preempt_enable();
 }
-EXPORT_SYMBOL(flush_tlb_page);
 
 static void do_flush_tlb_all(void *info)
 {
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index b8bed841ad67..19ac661422f7 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -1,20 +1,14 @@
 #include <linux/init.h>
 
 #include <linux/mm.h>
-#include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/apicdef.h>
-#include <asm/idle.h>
+#include <asm/apic.h>
 #include <asm/uv/uv.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
@@ -121,8 +115,8 @@ EXPORT_SYMBOL_GPL(leave_mm);
 
 asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 {
-	int cpu;
-	int sender;
+	unsigned int cpu;
+	unsigned int sender;
 	union smp_flush_state *f;
 
 	cpu = smp_processor_id();
@@ -155,14 +149,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 	}
 out:
 	ack_APIC_irq();
+	smp_mb__before_clear_bit();
 	cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
+	smp_mb__after_clear_bit();
 	inc_irq_stat(irq_tlb_count);
 }
 
 static void flush_tlb_others_ipi(const struct cpumask *cpumask,
 				 struct mm_struct *mm, unsigned long va)
 {
-	int sender;
+	unsigned int sender;
 	union smp_flush_state *f;
 
 	/* Caller has disabled preemption */
-- 
cgit v1.2.3


From 02cf94c370e0dc9bf408fe45eb86fe9ad58eaf7f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 21 Jan 2009 17:26:06 +0900
Subject: x86: make x86_32 use tlb_64.c

Impact: less contention when issuing invalidate IPI, cleanup

Make x86_32 use the same tlb code as 64bit.  The 64bit code uses
multiple IPI vectors for tlb shootdown to reduce contention.  This
patch makes x86_32 allocate the same 8 IPIs as x86_64 and share the
code paths.

Note that the usage of asmlinkage is inconsistent for x86_32 and 64
and calls for further cleanup.  This has been noted with a FIXME
comment in tlb_64.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/irq_vectors.h             |   7 +-
 arch/x86/include/asm/mach-default/entry_arch.h |  18 +-
 arch/x86/kernel/Makefile                       |   2 +-
 arch/x86/kernel/entry_32.S                     |   6 +-
 arch/x86/kernel/irqinit_32.c                   |  11 +-
 arch/x86/kernel/tlb_32.c                       | 239 -------------------------
 arch/x86/kernel/tlb_64.c                       |  12 +-
 7 files changed, 47 insertions(+), 248 deletions(-)
 delete mode 100644 arch/x86/kernel/tlb_32.c

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4ee8f800504b..9a83a10a5d51 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -58,8 +58,11 @@
 # define CALL_FUNCTION_VECTOR		0xfc
 # define CALL_FUNCTION_SINGLE_VECTOR	0xfb
 # define THERMAL_APIC_VECTOR		0xfa
-/* 0xf1 - 0xf9 : free */
-# define INVALIDATE_TLB_VECTOR		0xf0
+/* 0xf8 - 0xf9 : free */
+# define INVALIDATE_TLB_VECTOR_END	0xf7
+# define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
+
+# define NUM_INVALIDATE_TLB_VECTORS	8
 
 #else
 
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8e31dd..6fa399ad1de2 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
  */
 #ifdef CONFIG_X86_SMP
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+
+BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
+		 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
+		 smp_invalidate_interrupt)
 #endif
 
 /*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index eb074530c7d3..a62a15c22227 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -58,7 +58,7 @@ obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_X86_SMP)		+= smp.o
-obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
+obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb_64.o
 obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
 obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-#define BUILD_INTERRUPT(name, nr)	\
+#define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
 	pushl $~(nr);			\
@@ -680,11 +680,13 @@ ENTRY(name)				\
 	SAVE_ALL;			\
 	TRACE_IRQS_OFF			\
 	movl %esp,%eax;			\
-	call smp_##name;		\
+	call fn;			\
 	jmp ret_from_intr;		\
 	CFI_ENDPROC;			\
 ENDPROC(name)
 
+#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)
+
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674d..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
 	 */
 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 
-	/* IPI for invalidation */
-	alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+	/* IPIs for invalidation */
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
 
 	/* IPI for generic function call */
 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index 93fcb05c7d43..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,239 +0,0 @@
-#include <linux/spinlock.h>
-#include <linux/cpu.h>
-#include <linux/interrupt.h>
-
-#include <asm/tlbflush.h>
-
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
-			= { &init_mm, 0, };
-
-/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
-
-/*
- *	Smarter SMP flushing macros.
- *		c/o Linus Torvalds.
- *
- *	These mean you can really definitely utterly forget about
- *	writing to user space from interrupts. (Its not allowed anyway).
- *
- *	Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_var_t flush_cpumask;
-static struct mm_struct *flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-void leave_mm(int cpu)
-{
-	BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
-	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
-	load_cr3(swapper_pg_dir);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * 	Stop ipi delivery for the old mm. This is not synchronized with
- * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * 	for the wrong mm, and in the worst case we perform a superfluous
- * 	tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *	was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * 	Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * 	Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
- *	flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * 	Atomically set the bit [other cpus will start sending flush ipis],
- * 	and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
- */
-
-void smp_invalidate_interrupt(struct pt_regs *regs)
-{
-	unsigned int cpu;
-
-	cpu = smp_processor_id();
-
-	if (!cpumask_test_cpu(cpu, flush_cpumask))
-		goto out;
-		/*
-		 * This was a BUG() but until someone can quote me the
-		 * line from the intel manual that guarantees an IPI to
-		 * multiple CPUs is retried _only_ on the erroring CPUs
-		 * its staying as a return
-		 *
-		 * BUG();
-		 */
-
-	if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
-		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-			if (flush_va == TLB_FLUSH_ALL)
-				local_flush_tlb();
-			else
-				__flush_tlb_one(flush_va);
-		} else
-			leave_mm(cpu);
-	}
-out:
-	ack_APIC_irq();
-	smp_mb__before_clear_bit();
-	cpumask_clear_cpu(cpu, flush_cpumask);
-	smp_mb__after_clear_bit();
-	inc_irq_stat(irq_tlb_count);
-}
-
-void native_flush_tlb_others(const struct cpumask *cpumask,
-			     struct mm_struct *mm, unsigned long va)
-{
-	/*
-	 * - mask must exist :)
-	 */
-	BUG_ON(cpumask_empty(cpumask));
-	BUG_ON(!mm);
-
-	/*
-	 * i'm not happy about this global shared spinlock in the
-	 * MM hot path, but we'll see how contended it is.
-	 * AK: x86-64 has a faster method that could be ported.
-	 */
-	spin_lock(&tlbstate_lock);
-
-	cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
-#ifdef CONFIG_HOTPLUG_CPU
-	/* If a CPU which we ran on has gone down, OK. */
-	cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
-	if (unlikely(cpumask_empty(flush_cpumask))) {
-		spin_unlock(&tlbstate_lock);
-		return;
-	}
-#endif
-	flush_mm = mm;
-	flush_va = va;
-
-	/*
-	 * Make the above memory operations globally visible before
-	 * sending the IPI.
-	 */
-	smp_mb();
-	/*
-	 * We have to send the IPI only to
-	 * CPUs affected.
-	 */
-	send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
-
-	while (!cpumask_empty(flush_cpumask))
-		/* nothing. lockup detection does not belong here */
-		cpu_relax();
-
-	flush_mm = NULL;
-	flush_va = 0;
-	spin_unlock(&tlbstate_lock);
-}
-
-void flush_tlb_current_task(void)
-{
-	struct mm_struct *mm = current->mm;
-
-	preempt_disable();
-
-	local_flush_tlb();
-	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
-	preempt_enable();
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-
-	preempt_disable();
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			local_flush_tlb();
-		else
-			leave_mm(smp_processor_id());
-	}
-	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
-
-	preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{
-	struct mm_struct *mm = vma->vm_mm;
-
-	preempt_disable();
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			__flush_tlb_one(va);
-		 else
-			leave_mm(smp_processor_id());
-	}
-
-	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-		flush_tlb_others(&mm->cpu_vm_mask, mm, va);
-	preempt_enable();
-}
-
-static void do_flush_tlb_all(void *info)
-{
-	unsigned long cpu = smp_processor_id();
-
-	__flush_tlb_all();
-	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
-		leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
-	on_each_cpu(do_flush_tlb_all, NULL, 1);
-}
-
-static int init_flush_cpumask(void)
-{
-	alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
-	return 0;
-}
-early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 19ac661422f7..b3ca1b940654 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -113,7 +113,17 @@ EXPORT_SYMBOL_GPL(leave_mm);
  * Interrupts are disabled.
  */
 
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+/*
+ * FIXME: use of asmlinkage is not consistent.  On x86_64 it's noop
+ * but still used for documentation purpose but the usage is slightly
+ * inconsistent.  On x86_32, asmlinkage is regparm(0) but interrupt
+ * entry calls in with the first parameter in %eax.  Maybe define
+ * intrlinkage?
+ */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void smp_invalidate_interrupt(struct pt_regs *regs)
 {
 	unsigned int cpu;
 	unsigned int sender;
-- 
cgit v1.2.3


From 03b486322e994dde49e67aedb391867b7cf28822 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 20 Jan 2009 04:36:04 +0100
Subject: x86: make UV support configurable

Make X86 SGI Ultraviolet support configurable. Saves about 13K of text size
on my modest config.

   text    data     bss     dec     hex filename
6770537 1158680  694356 8623573  8395d5 vmlinux
6757492 1157664  694228 8609384  835e68 vmlinux.nouv

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig             | 7 +++++++
 arch/x86/include/asm/uv/uv.h | 6 +++---
 arch/x86/kernel/Makefile     | 5 +++--
 arch/x86/kernel/efi.c        | 2 ++
 arch/x86/kernel/entry_64.S   | 2 ++
 arch/x86/kernel/genapic_64.c | 2 ++
 arch/x86/kernel/io_apic.c    | 2 +-
 drivers/misc/Kconfig         | 4 ++--
 8 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ef27aed6ff74..5a29b792cb84 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -391,6 +391,13 @@ config X86_RDC321X
 	  as R-8610-(G).
 	  If you don't have one of these chips, you should say N here.
 
+config X86_UV
+	bool "SGI Ultraviolet"
+	depends on X86_64
+	help
+	  This option is needed in order to support SGI Ultraviolet systems.
+	  If you don't have one of these, you should say N here.
+
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
 	prompt "Single-depth WCHAN output"
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index dce5fe350134..8ac1d7e312f3 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -3,7 +3,7 @@
 
 enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_UV
 
 extern enum uv_system_type get_uv_system_type(void);
 extern int is_uv_system(void);
@@ -15,7 +15,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 						 unsigned long va,
 						 unsigned int cpu);
 
-#else	/* X86_64 */
+#else	/* X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
 static inline int is_uv_system(void)	{ return 0; }
@@ -28,6 +28,6 @@ uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
 		    unsigned long va, unsigned int cpu)
 { return cpumask; }
 
-#endif	/* X86_64 */
+#endif	/* X86_UV */
 
 #endif	/* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0b3272f58bd9..a99437c965cc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -115,10 +115,11 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb_64.o # NB rename without _64
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-	obj-y				+= bios_uv.o uv_irq.o uv_sysfs.o
+        obj-y				+= genapic_64.o genapic_flat_64.o
         obj-y				+= genx2apic_cluster.o
         obj-y				+= genx2apic_phys.o
+	obj-$(CONFIG_X86_UV)		+= genx2apic_uv_x.o tlb_uv.o
+	obj-$(CONFIG_X86_UV)		+= bios_uv.o uv_irq.o uv_sysfs.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
 					SMBIOS_TABLE_GUID)) {
 			efi.smbios = config_tables[i].table;
 			printk(" SMBIOS=0x%lx ", config_tables[i].table);
+#ifdef CONFIG_X86_UV
 		} else if (!efi_guidcmp(config_tables[i].guid,
 					UV_SYSTEM_TABLE_GUID)) {
 			efi.uv_systab = config_tables[i].table;
 			printk(" UVsystab=0x%lx ", config_tables[i].table);
+#endif
 		} else if (!efi_guidcmp(config_tables[i].guid,
 					HCDP_TABLE_GUID)) {
 			efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c52b60919163..a52703864a16 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
 #endif
 
+#ifdef CONFIG_X86_UV
 apicinterrupt UV_BAU_MESSAGE \
 	uv_bau_message_intr1 uv_bau_message_interrupt
+#endif
 apicinterrupt LOCAL_TIMER_VECTOR \
 	apic_timer_interrupt smp_apic_timer_interrupt
 
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 2bced78b0b8e..e656c2721154 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
 struct genapic __read_mostly *genapic = &apic_flat;
 
 static struct genapic *apic_probe[] __initdata = {
+#ifdef CONFIG_X86_UV
 	&apic_x2apic_uv_x,
+#endif
 	&apic_x2apic_phys,
 	&apic_x2apic_cluster,
 	&apic_physflat,
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f79660390724..e4d36bd56b62 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_UV
 /*
  * Re-target the irq to the specified CPU and enable the specified MMR located
  * on the specified blade to allow the sending of MSIs to the specified CPU.
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 419c378bd24b..abcb8459254e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES
 config SGI_XP
 	tristate "Support communication between SGI SSIs"
 	depends on NET
-	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
+	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
@@ -197,7 +197,7 @@ config HP_ILO
 
 config SGI_GRU
 	tristate "SGI GRU driver"
-	depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+	depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
 	default n
 	select MMU_NOTIFIER
 	---help---
-- 
cgit v1.2.3


From ab897d2013128f470240a541b31cf5e636984e71 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 22 Jan 2009 14:24:16 -0800
Subject: x86/pvops: remove pte_flags pvop

pte_flags() was introduced as a new pvop in order to extract just the
flags portion of a pte, which is a potentially cheaper operation than
extracting the page number as well.  It turns out this operation is
not needed, because simply using a mask to extract the flags from a
pte is sufficient for all current users.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/page.h     |  3 +--
 arch/x86/include/asm/paravirt.h | 18 ------------------
 arch/x86/kernel/paravirt.c      |  1 -
 arch/x86/xen/enlighten.c        |  1 -
 4 files changed, 1 insertion(+), 22 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e8695..6b9810859daf 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -147,7 +147,7 @@ static inline pteval_t native_pte_val(pte_t pte)
 	return pte.pte;
 }
 
-static inline pteval_t native_pte_flags(pte_t pte)
+static inline pteval_t pte_flags(pte_t pte)
 {
 	return native_pte_val(pte) & PTE_FLAGS_MASK;
 }
@@ -173,7 +173,6 @@ static inline pteval_t native_pte_flags(pte_t pte)
 #endif
 
 #define pte_val(x)	native_pte_val(x)
-#define pte_flags(x)	native_pte_flags(x)
 #define __pte(x)	native_make_pte(x)
 
 #endif	/* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ba3e2ff6aedc..e25c410f3d8c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -279,7 +279,6 @@ struct pv_mmu_ops {
 					pte_t *ptep, pte_t pte);
 
 	pteval_t (*pte_val)(pte_t);
-	pteval_t (*pte_flags)(pte_t);
 	pte_t (*make_pte)(pteval_t pte);
 
 	pgdval_t (*pgd_val)(pgd_t);
@@ -1084,23 +1083,6 @@ static inline pteval_t pte_val(pte_t pte)
 	return ret;
 }
 
-static inline pteval_t pte_flags(pte_t pte)
-{
-	pteval_t ret;
-
-	if (sizeof(pteval_t) > sizeof(long))
-		ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
-				 pte.pte, (u64)pte.pte >> 32);
-	else
-		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
-				 pte.pte);
-
-#ifdef CONFIG_PARAVIRT_DEBUG
-	BUG_ON(ret & PTE_PFN_MASK);
-#endif
-	return ret;
-}
-
 static inline pgd_t __pgd(pgdval_t val)
 {
 	pgdval_t ret;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..202514be5923 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -435,7 +435,6 @@ struct pv_mmu_ops pv_mmu_ops = {
 #endif /* PAGETABLE_LEVELS >= 3 */
 
 	.pte_val = native_pte_val,
-	.pte_flags = native_pte_flags,
 	.pgd_val = native_pgd_val,
 
 	.make_pte = native_make_pte,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bea215230b20..6f1bb71aa13a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1314,7 +1314,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
 
 	.pte_val = xen_pte_val,
-	.pte_flags = native_pte_flags,
 	.pgd_val = xen_pgd_val,
 
 	.make_pte = xen_make_pte,
-- 
cgit v1.2.3


From 6522869c34664dd5f05a0a327e93915b1281c90d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 22 Jan 2009 14:24:22 -0800
Subject: x86: add pte_set_flags/clear_flags for pte flag manipulation

It's not necessary to deconstruct and reconstruct a pte every time its
flags are being updated.  Introduce pte_set_flags and pte_clear_flags
to set and clear flags in a pte.  This allows the flag manipulation
code to be inlined, and avoids calls via paravirt-ops.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pgtable.h | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 06bbcbd66e9c..6ceaef08486f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -240,64 +240,78 @@ static inline int pmd_large(pmd_t pte)
 		(_PAGE_PSE | _PAGE_PRESENT);
 }
 
+static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
+{
+	pteval_t v = native_pte_val(pte);
+
+	return native_make_pte(v | set);
+}
+
+static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
+{
+	pteval_t v = native_pte_val(pte);
+
+	return native_make_pte(v & ~clear);
+}
+
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+	return pte_clear_flags(pte, _PAGE_DIRTY);
 }
 
 static inline pte_t pte_mkold(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+	return pte_clear_flags(pte, _PAGE_ACCESSED);
 }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_RW);
+	return pte_clear_flags(pte, _PAGE_RW);
 }
 
 static inline pte_t pte_mkexec(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_NX);
+	return pte_clear_flags(pte, _PAGE_NX);
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_DIRTY);
+	return pte_set_flags(pte, _PAGE_DIRTY);
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+	return pte_set_flags(pte, _PAGE_ACCESSED);
 }
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_RW);
+	return pte_set_flags(pte, _PAGE_RW);
 }
 
 static inline pte_t pte_mkhuge(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_PSE);
+	return pte_set_flags(pte, _PAGE_PSE);
 }
 
 static inline pte_t pte_clrhuge(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_PSE);
+	return pte_clear_flags(pte, _PAGE_PSE);
 }
 
 static inline pte_t pte_mkglobal(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_GLOBAL);
+	return pte_set_flags(pte, _PAGE_GLOBAL);
 }
 
 static inline pte_t pte_clrglobal(pte_t pte)
 {
-	return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
+	return pte_clear_flags(pte, _PAGE_GLOBAL);
 }
 
 static inline pte_t pte_mkspecial(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+	return pte_set_flags(pte, _PAGE_SPECIAL);
 }
 
 extern pteval_t __supported_pte_mask;
-- 
cgit v1.2.3


From 03d2989df9c1c7df5b33c7a87e0790465461836a Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Fri, 23 Jan 2009 11:03:28 +0900
Subject: x86: remove idle_timestamp from 32bit irq_cpustat_t

Impact: bogus irq_cpustat field removed

idle_timestamp is left over from the removed irqbalance code.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/hardirq_32.h | 1 -
 arch/x86/kernel/process_32.c      | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index d4b5d731073f..a70ed050fdef 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -6,7 +6,6 @@
 
 typedef struct {
 	unsigned int __softirq_pending;
-	unsigned long idle_timestamp;
 	unsigned int __nmi_count;	/* arch dependent */
 	unsigned int apic_timer_irqs;	/* arch dependent */
 	unsigned int irq0_irqs;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2c00a57ccb90..1a1ae8edc40c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -108,7 +108,6 @@ void cpu_idle(void)
 				play_dead();
 
 			local_irq_disable();
-			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
 			pm_idle();
-- 
cgit v1.2.3


From 3819cd489ec5d18a4cbd2f05acdc516473caa105 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Fri, 23 Jan 2009 11:03:29 +0900
Subject: x86: remove include of apic.h from hardirq_64.h

Impact: cleanup

APIC definitions aren't needed here.  Remove the include and fix
up the fallout.

tj: added include to mce_intel_64.c.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/hardirq_64.h         | 1 -
 arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 1 +
 arch/x86/kernel/efi_64.c                  | 1 +
 arch/x86/kernel/irq_64.c                  | 1 +
 4 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index a65bab20f6ce..873c3c7bffcc 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -3,7 +3,6 @@
 
 #include <linux/threads.h>
 #include <linux/irq.h>
-#include <asm/apic.h>
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..5e8c79e748a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <asm/processor.h>
+#include <asm/apic.h>
 #include <asm/msr.h>
 #include <asm/mce.h>
 #include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
 #include <asm/proto.h>
 #include <asm/efi.h>
 #include <asm/cacheflush.h>
+#include <asm/fixmap.h>
 
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0b254de84083..018963aa6ee3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <asm/io_apic.h>
 #include <asm/idle.h>
+#include <asm/apic.h>
 
 DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
-- 
cgit v1.2.3


From 658a9a2c34914e8114eea9c4d85d4ecd3ee33b98 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Fri, 23 Jan 2009 11:03:31 +0900
Subject: x86: sync hardirq_{32,64}.h

Impact: better code generation and removal of unused field for 32bit

In general, use the 64-bit version.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/hardirq_32.h | 14 ++++++++++----
 arch/x86/include/asm/hardirq_64.h | 10 +++++-----
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index a70ed050fdef..e5a332c28c98 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -14,6 +14,7 @@ typedef struct {
 	unsigned int irq_tlb_count;
 	unsigned int irq_thermal_count;
 	unsigned int irq_spurious_count;
+	unsigned int irq_threshold_count;
 } ____cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
@@ -22,11 +23,16 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
 #define MAX_HARDIRQS_PER_CPU NR_VECTORS
 
 #define __ARCH_IRQ_STAT
-#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
 
-#define inc_irq_stat(member)	(__get_cpu_var(irq_stat).member++)
+#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
 
-void ack_bad_irq(unsigned int irq);
-#include <linux/irq_cpustat.h>
+#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)
+
+#define __ARCH_SET_SOFTIRQ_PENDING
+
+#define set_softirq_pending(x)	percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x)	percpu_or(irq_stat.__softirq_pending, (x))
+
+extern void ack_bad_irq(unsigned int irq);
 
 #endif /* _ASM_X86_HARDIRQ_32_H */
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index 873c3c7bffcc..392e7d614579 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -22,16 +22,16 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
 /* We can have at most NR_VECTORS irqs routed to a cpu at a time */
 #define MAX_HARDIRQS_PER_CPU NR_VECTORS
 
-#define __ARCH_IRQ_STAT 1
+#define __ARCH_IRQ_STAT
 
 #define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
 
-#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
+#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)
 
-#define __ARCH_SET_SOFTIRQ_PENDING 1
+#define __ARCH_SET_SOFTIRQ_PENDING
 
-#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x)  percpu_or(irq_stat.__softirq_pending, (x))
+#define set_softirq_pending(x)	percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x)	percpu_or(irq_stat.__softirq_pending, (x))
 
 extern void ack_bad_irq(unsigned int irq);
 
-- 
cgit v1.2.3


From 22da7b3df3a2e26a87a8581575dbf26e465a6ac7 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Fri, 23 Jan 2009 11:03:31 +0900
Subject: x86: merge hardirq_{32,64}.h into hardirq.h

Impact: cleanup

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/hardirq.h    | 43 ++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/hardirq_32.h | 38 ----------------------------------
 arch/x86/include/asm/hardirq_64.h | 38 ----------------------------------
 3 files changed, 38 insertions(+), 81 deletions(-)
 delete mode 100644 arch/x86/include/asm/hardirq_32.h
 delete mode 100644 arch/x86/include/asm/hardirq_64.h

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 000787df66e6..f4a95f20f8ec 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -1,11 +1,44 @@
-#ifdef CONFIG_X86_32
-# include "hardirq_32.h"
-#else
-# include "hardirq_64.h"
-#endif
+#ifndef _ASM_X86_HARDIRQ_H
+#define _ASM_X86_HARDIRQ_H
+
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int __nmi_count;	/* arch dependent */
+	unsigned int apic_timer_irqs;	/* arch dependent */
+	unsigned int irq0_irqs;
+	unsigned int irq_resched_count;
+	unsigned int irq_call_count;
+	unsigned int irq_tlb_count;
+	unsigned int irq_thermal_count;
+	unsigned int irq_spurious_count;
+	unsigned int irq_threshold_count;
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+
+/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
+#define MAX_HARDIRQS_PER_CPU NR_VECTORS
+
+#define __ARCH_IRQ_STAT
+
+#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
+
+#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)
+
+#define __ARCH_SET_SOFTIRQ_PENDING
+
+#define set_softirq_pending(x)	percpu_write(irq_stat.__softirq_pending, (x))
+#define or_softirq_pending(x)	percpu_or(irq_stat.__softirq_pending, (x))
+
+extern void ack_bad_irq(unsigned int irq);
 
 extern u64 arch_irq_stat_cpu(unsigned int cpu);
 #define arch_irq_stat_cpu	arch_irq_stat_cpu
 
 extern u64 arch_irq_stat(void);
 #define arch_irq_stat		arch_irq_stat
+
+#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
deleted file mode 100644
index e5a332c28c98..000000000000
--- a/arch/x86/include/asm/hardirq_32.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _ASM_X86_HARDIRQ_32_H
-#define _ASM_X86_HARDIRQ_32_H
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-	unsigned int __nmi_count;	/* arch dependent */
-	unsigned int apic_timer_irqs;	/* arch dependent */
-	unsigned int irq0_irqs;
-	unsigned int irq_resched_count;
-	unsigned int irq_call_count;
-	unsigned int irq_tlb_count;
-	unsigned int irq_thermal_count;
-	unsigned int irq_spurious_count;
-	unsigned int irq_threshold_count;
-} ____cacheline_aligned irq_cpustat_t;
-
-DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
-
-/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
-#define MAX_HARDIRQS_PER_CPU NR_VECTORS
-
-#define __ARCH_IRQ_STAT
-
-#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
-
-#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x)	percpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x)	percpu_or(irq_stat.__softirq_pending, (x))
-
-extern void ack_bad_irq(unsigned int irq);
-
-#endif /* _ASM_X86_HARDIRQ_32_H */
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
deleted file mode 100644
index 392e7d614579..000000000000
--- a/arch/x86/include/asm/hardirq_64.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _ASM_X86_HARDIRQ_64_H
-#define _ASM_X86_HARDIRQ_64_H
-
-#include <linux/threads.h>
-#include <linux/irq.h>
-
-typedef struct {
-	unsigned int __softirq_pending;
-	unsigned int __nmi_count;	/* arch dependent */
-	unsigned int apic_timer_irqs;	/* arch dependent */
-	unsigned int irq0_irqs;
-	unsigned int irq_resched_count;
-	unsigned int irq_call_count;
-	unsigned int irq_tlb_count;
-	unsigned int irq_thermal_count;
-	unsigned int irq_spurious_count;
-	unsigned int irq_threshold_count;
-} ____cacheline_aligned irq_cpustat_t;
-
-DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
-
-/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
-#define MAX_HARDIRQS_PER_CPU NR_VECTORS
-
-#define __ARCH_IRQ_STAT
-
-#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
-
-#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x)	percpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x)	percpu_or(irq_stat.__softirq_pending, (x))
-
-extern void ack_bad_irq(unsigned int irq);
-
-#endif /* _ASM_X86_HARDIRQ_64_H */
-- 
cgit v1.2.3


From 2de3a5f7956eb81447feea3aec68193ddd8534bb Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Fri, 23 Jan 2009 11:03:32 +0900
Subject: x86: make irq_cpustat_t fields conditional

Impact: shrink size of irq_cpustat_t when possible

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/hardirq.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index f4a95f20f8ec..176f058e7159 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -7,14 +7,22 @@
 typedef struct {
 	unsigned int __softirq_pending;
 	unsigned int __nmi_count;	/* arch dependent */
-	unsigned int apic_timer_irqs;	/* arch dependent */
 	unsigned int irq0_irqs;
+#ifdef CONFIG_X86_LOCAL_APIC
+	unsigned int apic_timer_irqs;	/* arch dependent */
+	unsigned int irq_spurious_count;
+#endif
+#ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
 	unsigned int irq_tlb_count;
+#endif
+#ifdef CONFIG_X86_MCE
 	unsigned int irq_thermal_count;
-	unsigned int irq_spurious_count;
+# ifdef CONFIG_X86_64
 	unsigned int irq_threshold_count;
+# endif
+#endif
 } ____cacheline_aligned irq_cpustat_t;
 
 DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
-- 
cgit v1.2.3


From 6470aff619fbb9dff8dfe8afa5033084cd55ca20 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 27 Jan 2009 12:56:47 +0900
Subject: x86: move 64-bit NUMA code

Impact: Code movement, no functional change.

Move the 64-bit NUMA code from setup_percpu.c to numa_64.c

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/topology.h |   6 +
 arch/x86/kernel/setup_percpu.c  | 237 +---------------------------------------
 arch/x86/mm/numa_64.c           | 217 ++++++++++++++++++++++++++++++++++++
 3 files changed, 228 insertions(+), 232 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 10022ed3a4b6..77cfb2cfb386 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
 	return &node_to_cpumask_map[node];
 }
 
+static inline void setup_node_to_cpumask_map(void) { }
+
 #else /* CONFIG_X86_64 */
 
 /* Mappings between node number and cpus on that node. */
@@ -120,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node)
 
 #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
 
+extern void setup_node_to_cpumask_map(void);
+
 /*
  * Replace default node_to_cpumask_ptr with optimized version
  * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
@@ -218,6 +222,8 @@ static inline int node_to_first_cpu(int node)
 	return first_cpu(cpu_online_map);
 }
 
+static inline void setup_node_to_cpumask_map(void) { }
+
 /*
  * Replace default node_to_cpumask_ptr with optimized version
  * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d0b1476490a7..cb6d622520be 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -51,32 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
-#define	X86_64_NUMA	1	/* (used later) */
-DEFINE_PER_CPU(int, node_number) = 0;
-EXPORT_PER_CPU_SYMBOL(node_number);
-
-/*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
-/*
- * Which logical CPUs are on which nodes
- */
-cpumask_t *node_to_cpumask_map;
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-/*
- * Setup node_to_cpumask_map
- */
-static void __init setup_node_to_cpumask_map(void);
-
-#else
-static inline void setup_node_to_cpumask_map(void) { }
-#endif
-
 #ifdef CONFIG_X86_64
 
 /* correctly size the local cpu masks */
@@ -163,13 +137,13 @@ void __init setup_per_cpu_areas(void)
 				early_per_cpu_map(x86_cpu_to_apicid, cpu);
 		per_cpu(x86_bios_cpu_apicid, cpu) =
 				early_per_cpu_map(x86_bios_cpu_apicid, cpu);
-#ifdef X86_64_NUMA
-		per_cpu(x86_cpu_to_node_map, cpu) =
-				early_per_cpu_map(x86_cpu_to_node_map, cpu);
-#endif
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
+#ifdef CONFIG_NUMA
+		per_cpu(x86_cpu_to_node_map, cpu) =
+				early_per_cpu_map(x86_cpu_to_node_map, cpu);
+#endif
 		/*
 		 * Up to this point, CPU0 has been using .data.init
 		 * area.  Reload %gs offset for CPU0.
@@ -184,7 +158,7 @@ void __init setup_per_cpu_areas(void)
 	/* indicate the early static arrays will soon be gone */
 	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
 	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
-#ifdef X86_64_NUMA
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
@@ -197,204 +171,3 @@ void __init setup_per_cpu_areas(void)
 
 #endif
 
-#ifdef X86_64_NUMA
-
-/*
- * Allocate node_to_cpumask_map based on number of available nodes
- * Requires node_possible_map to be valid.
- *
- * Note: node_to_cpumask() is not valid until after this is done.
- * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
- */
-static void __init setup_node_to_cpumask_map(void)
-{
-	unsigned int node, num = 0;
-	cpumask_t *map;
-
-	/* setup nr_node_ids if not done yet */
-	if (nr_node_ids == MAX_NUMNODES) {
-		for_each_node_mask(node, node_possible_map)
-			num = node;
-		nr_node_ids = num + 1;
-	}
-
-	/* allocate the map */
-	map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
-	DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
-
-	pr_debug("Node to cpumask map at %p for %d nodes\n",
-		 map, nr_node_ids);
-
-	/* node_to_cpumask() will now work */
-	node_to_cpumask_map = map;
-}
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
-	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
-	/* early setting, no percpu area yet */
-	if (cpu_to_node_map) {
-		cpu_to_node_map[cpu] = node;
-		return;
-	}
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
-		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
-		dump_stack();
-		return;
-	}
-#endif
-	per_cpu(x86_cpu_to_node_map, cpu) = node;
-
-	if (node != NUMA_NO_NODE)
-		per_cpu(node_number, cpu) = node;
-}
-
-void __cpuinit numa_clear_node(int cpu)
-{
-	numa_set_node(cpu, NUMA_NO_NODE);
-}
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-
-void __cpuinit numa_add_cpu(int cpu)
-{
-	cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
-	cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-/*
- * --------- debug versions of the numa functions ---------
- */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
-	int node = early_cpu_to_node(cpu);
-	cpumask_t *mask;
-	char buf[64];
-
-	if (node_to_cpumask_map == NULL) {
-		printk(KERN_ERR "node_to_cpumask_map NULL\n");
-		dump_stack();
-		return;
-	}
-
-	mask = &node_to_cpumask_map[node];
-	if (enable)
-		cpu_set(cpu, *mask);
-	else
-		cpu_clear(cpu, *mask);
-
-	cpulist_scnprintf(buf, sizeof(buf), mask);
-	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
-		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
-}
-
-void __cpuinit numa_add_cpu(int cpu)
-{
-	numa_set_cpumask(cpu, 1);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
-	numa_set_cpumask(cpu, 0);
-}
-
-int cpu_to_node(int cpu)
-{
-	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
-		printk(KERN_WARNING
-			"cpu_to_node(%d): usage too early!\n", cpu);
-		dump_stack();
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-	}
-	return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(cpu_to_node);
-
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
-	if (early_per_cpu_ptr(x86_cpu_to_node_map))
-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
-	if (!per_cpu_offset(cpu)) {
-		printk(KERN_WARNING
-			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
-		dump_stack();
-		return NUMA_NO_NODE;
-	}
-	return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
-
-/* empty cpumask */
-static const cpumask_t cpu_mask_none;
-
-/*
- * Returns a pointer to the bitmask of CPUs on Node 'node'.
- */
-const cpumask_t *cpumask_of_node(int node)
-{
-	if (node_to_cpumask_map == NULL) {
-		printk(KERN_WARNING
-			"cpumask_of_node(%d): no node_to_cpumask_map!\n",
-			node);
-		dump_stack();
-		return (const cpumask_t *)&cpu_online_map;
-	}
-	if (node >= nr_node_ids) {
-		printk(KERN_WARNING
-			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
-			node, nr_node_ids);
-		dump_stack();
-		return &cpu_mask_none;
-	}
-	return &node_to_cpumask_map[node];
-}
-EXPORT_SYMBOL(cpumask_of_node);
-
-/*
- * Returns a bitmask of CPUs on Node 'node'.
- *
- * Side note: this function creates the returned cpumask on the stack
- * so with a high NR_CPUS count, excessive stack space is used.  The
- * node_to_cpumask_ptr function should be used whenever possible.
- */
-cpumask_t node_to_cpumask(int node)
-{
-	if (node_to_cpumask_map == NULL) {
-		printk(KERN_WARNING
-			"node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
-		dump_stack();
-		return cpu_online_map;
-	}
-	if (node >= nr_node_ids) {
-		printk(KERN_WARNING
-			"node_to_cpumask(%d): node > nr_node_ids(%d)\n",
-			node, nr_node_ids);
-		dump_stack();
-		return cpu_mask_none;
-	}
-	return node_to_cpumask_map[node];
-}
-EXPORT_SYMBOL(node_to_cpumask);
-
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
-
-#endif /* X86_64_NUMA */
-
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..08d140fbc31b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,6 +20,12 @@
 #include <asm/acpi.h>
 #include <asm/k8.h>
 
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+# define DBG(x...) printk(KERN_DEBUG x)
+#else
+# define DBG(x...)
+#endif
+
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
@@ -33,6 +39,21 @@ int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
 
+DEFINE_PER_CPU(int, node_number) = 0;
+EXPORT_PER_CPU_SYMBOL(node_number);
+
+/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+/*
+ * Which logical CPUs are on which nodes
+ */
+cpumask_t *node_to_cpumask_map;
+EXPORT_SYMBOL(node_to_cpumask_map);
+
 /*
  * Given a shift value, try to populate memnodemap[]
  * Returns :
@@ -640,3 +661,199 @@ void __init init_cpu_to_node(void)
 #endif
 
 
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: node_to_cpumask() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+void __init setup_node_to_cpumask_map(void)
+{
+	unsigned int node, num = 0;
+	cpumask_t *map;
+
+	/* setup nr_node_ids if not done yet */
+	if (nr_node_ids == MAX_NUMNODES) {
+		for_each_node_mask(node, node_possible_map)
+			num = node;
+		nr_node_ids = num + 1;
+	}
+
+	/* allocate the map */
+	map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
+	DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
+
+	pr_debug("Node to cpumask map at %p for %d nodes\n",
+		 map, nr_node_ids);
+
+	/* node_to_cpumask() will now work */
+	node_to_cpumask_map = map;
+}
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+	/* early setting, no percpu area yet */
+	if (cpu_to_node_map) {
+		cpu_to_node_map[cpu] = node;
+		return;
+	}
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
+		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+		dump_stack();
+		return;
+	}
+#endif
+	per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+	if (node != NUMA_NO_NODE)
+		per_cpu(node_number, cpu) = node;
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+	numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+	cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+#else /* CONFIG_DEBUG_PER_CPU_MAPS */
+
+/*
+ * --------- debug versions of the numa functions ---------
+ */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+	int node = early_cpu_to_node(cpu);
+	cpumask_t *mask;
+	char buf[64];
+
+	if (node_to_cpumask_map == NULL) {
+		printk(KERN_ERR "node_to_cpumask_map NULL\n");
+		dump_stack();
+		return;
+	}
+
+	mask = &node_to_cpumask_map[node];
+	if (enable)
+		cpu_set(cpu, *mask);
+	else
+		cpu_clear(cpu, *mask);
+
+	cpulist_scnprintf(buf, sizeof(buf), mask);
+	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+	numa_set_cpumask(cpu, 0);
+}
+
+int cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+		printk(KERN_WARNING
+			"cpu_to_node(%d): usage too early!\n", cpu);
+		dump_stack();
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+	if (!per_cpu_offset(cpu)) {
+		printk(KERN_WARNING
+			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+		dump_stack();
+		return NUMA_NO_NODE;
+	}
+	return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+
+/* empty cpumask */
+static const cpumask_t cpu_mask_none;
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const cpumask_t *cpumask_of_node(int node)
+{
+	if (node_to_cpumask_map == NULL) {
+		printk(KERN_WARNING
+			"cpumask_of_node(%d): no node_to_cpumask_map!\n",
+			node);
+		dump_stack();
+		return (const cpumask_t *)&cpu_online_map;
+	}
+	if (node >= nr_node_ids) {
+		printk(KERN_WARNING
+			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+			node, nr_node_ids);
+		dump_stack();
+		return &cpu_mask_none;
+	}
+	return &node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+/*
+ * Returns a bitmask of CPUs on Node 'node'.
+ *
+ * Side note: this function creates the returned cpumask on the stack
+ * so with a high NR_CPUS count, excessive stack space is used.  The
+ * node_to_cpumask_ptr function should be used whenever possible.
+ */
+cpumask_t node_to_cpumask(int node)
+{
+	if (node_to_cpumask_map == NULL) {
+		printk(KERN_WARNING
+			"node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
+		dump_stack();
+		return cpu_online_map;
+	}
+	if (node >= nr_node_ids) {
+		printk(KERN_WARNING
+			"node_to_cpumask(%d): node > nr_node_ids(%d)\n",
+			node, nr_node_ids);
+		dump_stack();
+		return cpu_mask_none;
+	}
+	return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(node_to_cpumask);
+
+/*
+ * --------- end of debug versions of the numa functions ---------
+ */
+
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
-- 
cgit v1.2.3


From 2f2f52bad72f5e1ca5d1b9ad00a7b57a8cbd9159 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 27 Jan 2009 12:56:47 +0900
Subject: x86: move setup_cpu_local_masks()

Impact: Code movement, no functional change.

Move setup_cpu_local_masks() to kernel/cpu/common.c, where the
masks are defined.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/cpumask.h |  4 ++++
 arch/x86/kernel/cpu/common.c   |  9 +++++++++
 arch/x86/kernel/setup_percpu.c | 19 -------------------
 3 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 26c6dad90479..a7f3c75f8ad7 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -10,6 +10,8 @@ extern cpumask_var_t cpu_callout_mask;
 extern cpumask_var_t cpu_initialized_mask;
 extern cpumask_var_t cpu_sibling_setup_mask;
 
+extern void setup_cpu_local_masks(void);
+
 #else /* CONFIG_X86_32 */
 
 extern cpumask_t cpu_callin_map;
@@ -22,6 +24,8 @@ extern cpumask_t cpu_sibling_setup_map;
 #define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
 #define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
 
+static inline void setup_cpu_local_masks(void) { }
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 99904f288d6a..67e30c8a282c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -52,6 +52,15 @@ cpumask_var_t cpu_initialized_mask;
 /* representing cpus for which sibling maps can be computed */
 cpumask_var_t cpu_sibling_setup_mask;
 
+/* correctly size the local cpu masks */
+void setup_cpu_local_masks(void)
+{
+	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+	alloc_bootmem_cpumask_var(&cpu_callin_mask);
+	alloc_bootmem_cpumask_var(&cpu_callout_mask);
+	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
 #else /* CONFIG_X86_32 */
 
 cpumask_t cpu_callin_map;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index cb6d622520be..7bebdba8eb89 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -51,25 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
 
-#ifdef CONFIG_X86_64
-
-/* correctly size the local cpu masks */
-static void setup_cpu_local_masks(void)
-{
-	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-	alloc_bootmem_cpumask_var(&cpu_callin_mask);
-	alloc_bootmem_cpumask_var(&cpu_callout_mask);
-	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
-}
-
-#else /* CONFIG_X86_32 */
-
-static inline void setup_cpu_local_masks(void)
-{
-}
-
-#endif /* CONFIG_X86_32 */
-
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
 
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 27 Jan 2009 12:56:48 +0900
Subject: x86: initialize per-cpu GDT segment in per-cpu setup

Impact: cleanup

Rename init_gdt() to setup_percpu_segment(), and move it to
setup_percpu.c.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/processor.h    |  1 -
 arch/x86/kernel/Makefile            |  3 +--
 arch/x86/kernel/setup_percpu.c      | 14 ++++++++++++++
 arch/x86/kernel/smpboot.c           |  4 ----
 arch/x86/kernel/smpcommon.c         | 25 -------------------------
 arch/x86/mach-voyager/voyager_smp.c |  2 --
 arch/x86/xen/smp.c                  |  1 -
 7 files changed, 15 insertions(+), 35 deletions(-)
 delete mode 100644 arch/x86/kernel/smpcommon.c

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 48676b943b92..32c30b02b51f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -778,7 +778,6 @@ extern struct desc_ptr		early_gdt_descr;
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(void);
 extern void cpu_init(void);
-extern void init_gdt(int cpu);
 
 static inline unsigned long get_debugctlmsr(void)
 {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 73de055c29c2..37fa30bada17 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -60,8 +60,7 @@ obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_X86_SMP)		+= smp.o
 obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o
 obj-$(CONFIG_SMP)		+= setup_percpu.o
-obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
-obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
+obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 599dc1cc1da8..bcca3a7b3748 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -40,6 +40,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
 };
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static inline void setup_percpu_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+	struct desc_struct gdt;
+
+	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
+			0x2 | DESCTYPE_S, 0x8);
+	gdt.s = 1;
+	write_gdt_entry(get_cpu_gdt_table(cpu),
+			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
+#endif
+}
+
 /*
  * Great future plan:
  * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -81,6 +94,7 @@ void __init setup_per_cpu_areas(void)
 		per_cpu_offset(cpu) = ptr - __per_cpu_start;
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
+		setup_percpu_segment(cpu);
 		/*
 		 * Copy data used in early init routines from the initial arrays to the
 		 * per cpu data areas.  These arrays then become expendable and the
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index def770b57b5a..f9dbcff43546 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 do_rest:
 	per_cpu(current_task, cpu) = c_idle.idle;
 #ifdef CONFIG_X86_32
-	init_gdt(cpu);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
 #else
@@ -1186,9 +1185,6 @@ out:
 void __init native_smp_prepare_boot_cpu(void)
 {
 	int me = smp_processor_id();
-#ifdef CONFIG_X86_32
-	init_gdt(me);
-#endif
 	switch_to_new_gdt();
 	/* already set me in cpu_online_mask in boot_cpu_init() */
 	cpumask_set_cpu(me, cpu_callout_mask);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index 5ec29a1a8465..000000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * SMP stuff which is common to all sub-architectures.
- */
-#include <linux/module.h>
-#include <asm/smp.h>
-#include <asm/sections.h>
-
-#ifdef CONFIG_X86_32
-/*
- * Initialize the CPU's GDT.  This is either the boot CPU doing itself
- * (still using the master per-cpu area), or a CPU doing it for a
- * secondary which will soon come up.
- */
-__cpuinit void init_gdt(int cpu)
-{
-	struct desc_struct gdt;
-
-	pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
-			0x2 | DESCTYPE_S, 0x8);
-	gdt.s = 1;
-
-	write_gdt_entry(get_cpu_gdt_table(cpu),
-			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
-}
-#endif
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index dd82f2052f34..331cd6d56483 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu)
 	/* init_tasks (in sched.c) is indexed logically */
 	stack_start.sp = (void *)idle->thread.sp;
 
-	init_gdt(cpu);
 	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	irq_ctx_init(cpu);
@@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
 
 static void __cpuinit voyager_smp_prepare_boot_cpu(void)
 {
-	init_gdt(smp_processor_id());
 	switch_to_new_gdt();
 
 	cpu_set(smp_processor_id(), cpu_online_map);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72c2eb9b64cd..7735e3dd359c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -281,7 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 
 	per_cpu(current_task, cpu) = idle;
 #ifdef CONFIG_X86_32
-	init_gdt(cpu);
 	irq_ctx_init(cpu);
 #else
 	clear_tsk_thread_flag(idle, TIF_FORK);
-- 
cgit v1.2.3


From 1825b8edc2034c012ae48f797d74efd1bd9d4f72 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 27 Jan 2009 12:56:48 +0900
Subject: x86: remove extra barriers from load_gs_base()

Impact: optimization

mb() generates an mfence instruction, which is not needed here.  Only
a compiler barrier is needed, and that is handled by the memory clobber
in the wrmsrl function.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/processor.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 32c30b02b51f..794234eba317 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -397,10 +397,7 @@ DECLARE_PER_CPU(char *, irq_stack_ptr);
 
 static inline void load_gs_base(int cpu)
 {
-	/* Memory clobbers used to order pda/percpu accesses */
-	mb();
 	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
-	mb();
 }
 #endif
 
-- 
cgit v1.2.3


From 2697fbd5faf19c84c17441b1752bdcbdcfd1248c Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Tue, 27 Jan 2009 12:56:48 +0900
Subject: x86: load new GDT after setting up boot cpu per-cpu area

Impact: sync 32 and 64-bit code

Merge load_gs_base() into switch_to_new_gdt().  Load the GDT and
per-cpu state for the boot cpu when its new area is set up.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/processor.h |  5 -----
 arch/x86/kernel/cpu/common.c     | 15 +++++++++------
 arch/x86/kernel/setup_percpu.c   |  6 +++---
 3 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 794234eba317..befa20b4a68c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -394,11 +394,6 @@ union irq_stack_union {
 
 DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
 DECLARE_PER_CPU(char *, irq_stack_ptr);
-
-static inline void load_gs_base(int cpu)
-{
-	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
-}
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 67e30c8a282c..0c766b80d915 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -258,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 void switch_to_new_gdt(void)
 {
 	struct desc_ptr gdt_descr;
+	int cpu = smp_processor_id();
 
-	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
+	/* Reload the per-cpu base */
 #ifdef CONFIG_X86_32
-	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+	loadsegment(fs, __KERNEL_PERCPU);
+#else
+	loadsegment(gs, 0);
+	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 #endif
 }
 
@@ -968,10 +973,6 @@ void __cpuinit cpu_init(void)
 	struct task_struct *me;
 	int i;
 
-	loadsegment(fs, 0);
-	loadsegment(gs, 0);
-	load_gs_base(cpu);
-
 #ifdef CONFIG_NUMA
 	if (cpu != 0 && percpu_read(node_number) == 0 &&
 	    cpu_to_node(cpu) != NUMA_NO_NODE)
@@ -993,6 +994,8 @@ void __cpuinit cpu_init(void)
 	 */
 
 	switch_to_new_gdt();
+	loadsegment(fs, 0);
+
 	load_idt((const struct desc_ptr *)&idt_descr);
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bcca3a7b3748..4caa78d7cb15 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -112,14 +112,14 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
 				early_per_cpu_map(x86_cpu_to_node_map, cpu);
+#endif
 #endif
 		/*
 		 * Up to this point, the boot CPU has been using .data.init
-		 * area.  Reload %gs offset for the boot CPU.
+		 * area.  Reload any changed state for the boot CPU.
 		 */
 		if (cpu == boot_cpu_id)
-			load_gs_base(cpu);
-#endif
+			switch_to_new_gdt();
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
-- 
cgit v1.2.3


From 41edafdb78feac1d1f8823846209975fde990633 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 28 Jan 2009 14:35:02 -0800
Subject: x86/pvops: add a paravirt_ident functions to allow special patching

Impact: Optimization

Several paravirt ops implementations simply return their arguments,
the most obvious being the make_pte/pte_val class of operations on
native.

On 32-bit, the identity function is literally a no-op, as the calling
convention uses the same registers for the first argument and return.
On 64-bit, it can be implemented with a single "mov".

This patch adds special identity functions for 32 and 64 bit argument,
and machinery to recognize them and replace them with either nops or a
mov as appropriate.

At the moment, the only users for the identity functions are the
pagetable entry conversion functions.

The result is a measureable improvement on pagetable-heavy benchmarks
(2-3%, reducing the pvops overhead from 5 to 2%).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h     |  5 +++
 arch/x86/kernel/paravirt.c          | 75 ++++++++++++++++++++++++++++++++-----
 arch/x86/kernel/paravirt_patch_32.c | 12 ++++++
 arch/x86/kernel/paravirt_patch_64.c | 15 ++++++++
 4 files changed, 98 insertions(+), 9 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 175778887090..961d10c12f16 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -388,6 +388,8 @@ extern struct pv_lock_ops pv_lock_ops;
 	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
 
 unsigned paravirt_patch_nop(void);
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ignore(unsigned len);
 unsigned paravirt_patch_call(void *insnbuf,
 			     const void *target, u16 tgt_clobbers,
@@ -1371,6 +1373,9 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 }
 
 void _paravirt_nop(void);
+u32 _paravirt_ident_32(u32);
+u64 _paravirt_ident_64(u64);
+
 #define paravirt_nop	((void *)_paravirt_nop)
 
 void paravirt_use_bytelocks(void);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 202514be5923..dd25e2b1593b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -44,6 +44,17 @@ void _paravirt_nop(void)
 {
 }
 
+/* identity function, which can be inlined */
+u32 _paravirt_ident_32(u32 x)
+{
+	return x;
+}
+
+u64 _paravirt_ident_64(u64 x)
+{
+	return x;
+}
+
 static void __init default_banner(void)
 {
 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
 	if (opfunc == NULL)
 		/* If there's no function, patch it with a ud2a (BUG) */
 		ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
-	else if (opfunc == paravirt_nop)
+	else if (opfunc == _paravirt_nop)
 		/* If the operation is a nop, then nop the callsite */
 		ret = paravirt_patch_nop();
+
+	/* identity functions just return their single argument */
+	else if (opfunc == _paravirt_ident_32)
+		ret = paravirt_patch_ident_32(insnbuf, len);
+	else if (opfunc == _paravirt_ident_64)
+		ret = paravirt_patch_ident_64(insnbuf, len);
+
 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
 		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
@@ -373,6 +391,45 @@ struct pv_apic_ops pv_apic_ops = {
 #endif
 };
 
+typedef pte_t make_pte_t(pteval_t);
+typedef pmd_t make_pmd_t(pmdval_t);
+typedef pud_t make_pud_t(pudval_t);
+typedef pgd_t make_pgd_t(pgdval_t);
+
+typedef pteval_t pte_val_t(pte_t);
+typedef pmdval_t pmd_val_t(pmd_t);
+typedef pudval_t pud_val_t(pud_t);
+typedef pgdval_t pgd_val_t(pgd_t);
+
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+/* 32-bit pagetable entries */
+#define paravirt_native_make_pte	(make_pte_t *)_paravirt_ident_32
+#define paravirt_native_pte_val		(pte_val_t *)_paravirt_ident_32
+
+#define paravirt_native_make_pmd	(make_pmd_t *)_paravirt_ident_32
+#define paravirt_native_pmd_val		(pmd_val_t *)_paravirt_ident_32
+
+#define paravirt_native_make_pud	(make_pud_t *)_paravirt_ident_32
+#define paravirt_native_pud_val		(pud_val_t *)_paravirt_ident_32
+
+#define paravirt_native_make_pgd	(make_pgd_t *)_paravirt_ident_32
+#define paravirt_native_pgd_val		(pgd_val_t *)_paravirt_ident_32
+#else
+/* 64-bit pagetable entries */
+#define paravirt_native_make_pte	(make_pte_t *)_paravirt_ident_64
+#define paravirt_native_pte_val		(pte_val_t *)_paravirt_ident_64
+
+#define paravirt_native_make_pmd	(make_pmd_t *)_paravirt_ident_64
+#define paravirt_native_pmd_val		(pmd_val_t *)_paravirt_ident_64
+
+#define paravirt_native_make_pud	(make_pud_t *)_paravirt_ident_64
+#define paravirt_native_pud_val		(pud_val_t *)_paravirt_ident_64
+
+#define paravirt_native_make_pgd	(make_pgd_t *)_paravirt_ident_64
+#define paravirt_native_pgd_val		(pgd_val_t *)_paravirt_ident_64
+#endif
+
 struct pv_mmu_ops pv_mmu_ops = {
 #ifndef CONFIG_X86_64
 	.pagetable_setup_start = native_pagetable_setup_start,
@@ -424,21 +481,21 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.pmd_clear = native_pmd_clear,
 #endif
 	.set_pud = native_set_pud,
-	.pmd_val = native_pmd_val,
-	.make_pmd = native_make_pmd,
+	.pmd_val = paravirt_native_pmd_val,
+	.make_pmd = paravirt_native_make_pmd,
 
 #if PAGETABLE_LEVELS == 4
-	.pud_val = native_pud_val,
-	.make_pud = native_make_pud,
+	.pud_val = paravirt_native_pud_val,
+	.make_pud = paravirt_native_make_pud,
 	.set_pgd = native_set_pgd,
 #endif
 #endif /* PAGETABLE_LEVELS >= 3 */
 
-	.pte_val = native_pte_val,
-	.pgd_val = native_pgd_val,
+	.pte_val = paravirt_native_pte_val,
+	.pgd_val = paravirt_native_pgd_val,
 
-	.make_pte = native_make_pte,
-	.make_pgd = native_make_pgd,
+	.make_pte = paravirt_native_make_pte,
+	.make_pgd = paravirt_native_make_pgd,
 
 	.dup_mmap = paravirt_nop,
 	.exit_mmap = paravirt_nop,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 9fe644f4861d..d9f32e6d6ab6 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
 
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
+{
+	/* arg in %eax, return in %eax */
+	return 0;
+}
+
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+{
+	/* arg in %edx:%eax, return in %edx:%eax */
+	return 0;
+}
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		      unsigned long addr, unsigned len)
 {
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 061d01df9ae6..3f08f34f93eb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
 DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 
+DEF_NATIVE(, mov32, "mov %edi, %eax");
+DEF_NATIVE(, mov64, "mov %rdi, %rax");
+
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
+{
+	return paravirt_patch_insns(insnbuf, len,
+				    start__mov32, end__mov32);
+}
+
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+{
+	return paravirt_patch_insns(insnbuf, len,
+				    start__mov64, end__mov64);
+}
+
 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 		      unsigned long addr, unsigned len)
 {
-- 
cgit v1.2.3


From 9104a18dcdd8dfefdddca8ce44988563f13ed3c4 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 28 Jan 2009 14:35:04 -0800
Subject: x86/paravirt: selectively save/restore regs around pvops calls

Impact: Optimization

Each asm paravirt-ops call says what registers are available for
clobbering.  This patch makes use of this to selectively save/restore
registers around each pvops call.  In many cases this significantly
shrinks code size.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h | 100 ++++++++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 35 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 961d10c12f16..dcce961262bf 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -12,19 +12,29 @@
 #define CLBR_EAX  (1 << 0)
 #define CLBR_ECX  (1 << 1)
 #define CLBR_EDX  (1 << 2)
+#define CLBR_EDI  (1 << 3)
 
-#ifdef CONFIG_X86_64
-#define CLBR_RSI  (1 << 3)
-#define CLBR_RDI  (1 << 4)
+#ifdef CONFIG_X86_32
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY  ((1 << 4) - 1)
+#else
+#define CLBR_RAX  CLBR_EAX
+#define CLBR_RCX  CLBR_ECX
+#define CLBR_RDX  CLBR_EDX
+#define CLBR_RDI  CLBR_EDI
+#define CLBR_RSI  (1 << 4)
 #define CLBR_R8   (1 << 5)
 #define CLBR_R9   (1 << 6)
 #define CLBR_R10  (1 << 7)
 #define CLBR_R11  (1 << 8)
 #define CLBR_ANY  ((1 << 9) - 1)
+
+#define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+			 CLBR_RCX | CLBR_R8 | CLBR_R9)
+#define CLBR_RET_REG	(CLBR_RAX | CLBR_RDX)
+#define CLBR_SCRATCH	(CLBR_R10 | CLBR_R11)
+
 #include <asm/desc_defs.h>
-#else
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY  ((1 << 3) - 1)
 #endif /* X86_64 */
 
 #ifndef __ASSEMBLY__
@@ -1530,33 +1540,49 @@ static inline unsigned long __raw_local_irq_save(void)
 	.popsection
 
 
+#define COND_PUSH(set, mask, reg)			\
+	.if ((~set) & mask); push %reg; .endif
+#define COND_POP(set, mask, reg)			\
+	.if ((~set) & mask); pop %reg; .endif
+
 #ifdef CONFIG_X86_64
-#define PV_SAVE_REGS				\
-	push %rax;				\
-	push %rcx;				\
-	push %rdx;				\
-	push %rsi;				\
-	push %rdi;				\
-	push %r8;				\
-	push %r9;				\
-	push %r10;				\
-	push %r11
-#define PV_RESTORE_REGS				\
-	pop %r11;				\
-	pop %r10;				\
-	pop %r9;				\
-	pop %r8;				\
-	pop %rdi;				\
-	pop %rsi;				\
-	pop %rdx;				\
-	pop %rcx;				\
-	pop %rax
+
+#define PV_SAVE_REGS(set)			\
+	COND_PUSH(set, CLBR_RAX, rax);		\
+	COND_PUSH(set, CLBR_RCX, rcx);		\
+	COND_PUSH(set, CLBR_RDX, rdx);		\
+	COND_PUSH(set, CLBR_RSI, rsi);		\
+	COND_PUSH(set, CLBR_RDI, rdi);		\
+	COND_PUSH(set, CLBR_R8, r8);		\
+	COND_PUSH(set, CLBR_R9, r9);		\
+	COND_PUSH(set, CLBR_R10, r10);		\
+	COND_PUSH(set, CLBR_R11, r11)
+#define PV_RESTORE_REGS(set)			\
+	COND_POP(set, CLBR_R11, r11);		\
+	COND_POP(set, CLBR_R10, r10);		\
+	COND_POP(set, CLBR_R9, r9);		\
+	COND_POP(set, CLBR_R8, r8);		\
+	COND_POP(set, CLBR_RDI, rdi);		\
+	COND_POP(set, CLBR_RSI, rsi);		\
+	COND_POP(set, CLBR_RDX, rdx);		\
+	COND_POP(set, CLBR_RCX, rcx);		\
+	COND_POP(set, CLBR_RAX, rax)
+
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
 #define PARA_INDIRECT(addr)	*addr(%rip)
 #else
-#define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
-#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+#define PV_SAVE_REGS(set)			\
+	COND_PUSH(set, CLBR_EAX, eax);		\
+	COND_PUSH(set, CLBR_EDI, edi);		\
+	COND_PUSH(set, CLBR_ECX, ecx);		\
+	COND_PUSH(set, CLBR_EDX, edx)
+#define PV_RESTORE_REGS(set)			\
+	COND_POP(set, CLBR_EDX, edx);		\
+	COND_POP(set, CLBR_ECX, ecx);		\
+	COND_POP(set, CLBR_EDI, edi);		\
+	COND_POP(set, CLBR_EAX, eax)
+
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
 #define PARA_INDIRECT(addr)	*%cs:addr
@@ -1568,15 +1594,15 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define DISABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
-		  PV_SAVE_REGS;						\
+		  PV_SAVE_REGS(clobbers);				\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
-		  PV_RESTORE_REGS;)			\
+		  PV_RESTORE_REGS(clobbers);)
 
 #define ENABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
-		  PV_SAVE_REGS;						\
+		  PV_SAVE_REGS(clobbers);				\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
-		  PV_RESTORE_REGS;)
+		  PV_RESTORE_REGS(clobbers);)
 
 #define USERGS_SYSRET32							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
@@ -1606,11 +1632,15 @@ static inline unsigned long __raw_local_irq_save(void)
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
 		  swapgs)
 
+/*
+ * Note: swapgs is very special, and in practise is either going to be
+ * implemented with a single "swapgs" instruction or something very
+ * special.  Either way, we don't need to save any registers for
+ * it.
+ */
 #define SWAPGS								\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
-		  PV_SAVE_REGS;						\
-		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);		\
-		  PV_RESTORE_REGS					\
+		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
 		 )
 
 #define GET_CR2_INTO_RCX				\
-- 
cgit v1.2.3


From ecb93d1ccd0aac63f03be2db3cac3fa974716f4c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 28 Jan 2009 14:35:05 -0800
Subject: x86/paravirt: add register-saving thunks to reduce caller register
 pressure

Impact: Optimization

One of the problems with inserting a pile of C calls where previously
there were none is that the register pressure is greatly increased.
The C calling convention says that the caller must expect a certain
set of registers may be trashed by the callee, and that the callee can
use those registers without restriction.  This includes the function
argument registers, and several others.

This patch seeks to alleviate this pressure by introducing wrapper
thunks that will do the register saving/restoring, so that the
callsite doesn't need to worry about it, but the callee function can
be conventional compiler-generated code.  In many cases (particularly
performance-sensitive cases) the callee will be in assembler anyway,
and need not use the compiler's calling convention.

Standard calling convention is:
	 arguments	    return	scratch
x86-32	 eax edx ecx	    eax		?
x86-64	 rdi rsi rdx rcx    rax		r8 r9 r10 r11

The thunk preserves all argument and scratch registers.  The return
register is not preserved, and is available as a scratch register for
unwrapped callee code (and of course the return value).

Wrapped function pointers are themselves wrapped in a struct
paravirt_callee_save structure, in order to get some warning from the
compiler when functions with mismatched calling conventions are used.

The most common paravirt ops, both statically and dynamically, are
interrupt enable/disable/save/restore, so handle them first.  This is
particularly easy since their calls are handled specially anyway.

XXX Deal with VMI.  What's their calling convention?

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h | 126 +++++++++++++++++++++++++++++++---------
 arch/x86/kernel/paravirt.c      |   8 +--
 arch/x86/kernel/vsmp_64.c       |  12 ++--
 arch/x86/lguest/boot.c          |  13 +++--
 arch/x86/xen/enlighten.c        |   8 +--
 arch/x86/xen/irq.c              |  14 +++--
 6 files changed, 132 insertions(+), 49 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dcce961262bf..f9107b88631b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -17,6 +17,10 @@
 #ifdef CONFIG_X86_32
 /* CLBR_ANY should match all regs platform has. For i386, that's just it */
 #define CLBR_ANY  ((1 << 4) - 1)
+
+#define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
+#define CLBR_RET_REG	(CLBR_EAX)
+#define CLBR_SCRATCH	(0)
 #else
 #define CLBR_RAX  CLBR_EAX
 #define CLBR_RCX  CLBR_ECX
@@ -27,16 +31,19 @@
 #define CLBR_R9   (1 << 6)
 #define CLBR_R10  (1 << 7)
 #define CLBR_R11  (1 << 8)
+
 #define CLBR_ANY  ((1 << 9) - 1)
 
 #define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
 			 CLBR_RCX | CLBR_R8 | CLBR_R9)
-#define CLBR_RET_REG	(CLBR_RAX | CLBR_RDX)
+#define CLBR_RET_REG	(CLBR_RAX)
 #define CLBR_SCRATCH	(CLBR_R10 | CLBR_R11)
 
 #include <asm/desc_defs.h>
 #endif /* X86_64 */
 
+#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 #include <linux/cpumask.h>
@@ -50,6 +57,14 @@ struct tss_struct;
 struct mm_struct;
 struct desc_struct;
 
+/*
+ * Wrapper type for pointers to code which uses the non-standard
+ * calling convention.  See PV_CALL_SAVE_REGS_THUNK below.
+ */
+struct paravirt_callee_save {
+	void *func;
+};
+
 /* general info */
 struct pv_info {
 	unsigned int kernel_rpl;
@@ -199,11 +214,15 @@ struct pv_irq_ops {
 	 * expected to use X86_EFLAGS_IF; all other bits
 	 * returned from save_fl are undefined, and may be ignored by
 	 * restore_fl.
+	 *
+	 * NOTE: These functions callers expect the callee to preserve
+	 * more registers than the standard C calling convention.
 	 */
-	unsigned long (*save_fl)(void);
-	void (*restore_fl)(unsigned long);
-	void (*irq_disable)(void);
-	void (*irq_enable)(void);
+	struct paravirt_callee_save save_fl;
+	struct paravirt_callee_save restore_fl;
+	struct paravirt_callee_save irq_disable;
+	struct paravirt_callee_save irq_enable;
+
 	void (*safe_halt)(void);
 	void (*halt)(void);
 
@@ -1437,12 +1456,37 @@ extern struct paravirt_patch_site __parainstructions[],
 	__parainstructions_end[];
 
 #ifdef CONFIG_X86_32
-#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
-#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
+#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
+#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
+
+/* save and restore all caller-save registers, except return value */
+#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS
+#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS
+
 #define PV_FLAGS_ARG "0"
 #define PV_EXTRA_CLOBBERS
 #define PV_VEXTRA_CLOBBERS
 #else
+/* save and restore all caller-save registers, except return value */
+#define PV_SAVE_ALL_CALLER_REGS						\
+	"push %rcx;"							\
+	"push %rdx;"							\
+	"push %rsi;"							\
+	"push %rdi;"							\
+	"push %r8;"							\
+	"push %r9;"							\
+	"push %r10;"							\
+	"push %r11;"
+#define PV_RESTORE_ALL_CALLER_REGS					\
+	"pop %r11;"							\
+	"pop %r10;"							\
+	"pop %r9;"							\
+	"pop %r8;"							\
+	"pop %rdi;"							\
+	"pop %rsi;"							\
+	"pop %rdx;"							\
+	"pop %rcx;"
+
 /* We save some registers, but all of them, that's too much. We clobber all
  * caller saved registers but the argument parameter */
 #define PV_SAVE_REGS "pushq %%rdi;"
@@ -1452,52 +1496,76 @@ extern struct paravirt_patch_site __parainstructions[],
 #define PV_FLAGS_ARG "D"
 #endif
 
+/*
+ * Generate a thunk around a function which saves all caller-save
+ * registers except for the return value.  This allows C functions to
+ * be called from assembler code where fewer than normal registers are
+ * available.  It may also help code generation around calls from C
+ * code if the common case doesn't use many registers.
+ *
+ * When a callee is wrapped in a thunk, the caller can assume that all
+ * arg regs and all scratch registers are preserved across the
+ * call. The return value in rax/eax will not be saved, even for void
+ * functions.
+ */
+#define PV_CALLEE_SAVE_REGS_THUNK(func)					\
+	extern typeof(func) __raw_callee_save_##func;			\
+	static void *__##func##__ __used = func;			\
+									\
+	asm(".pushsection .text;"					\
+	    "__raw_callee_save_" #func ": "				\
+	    PV_SAVE_ALL_CALLER_REGS					\
+	    "call " #func ";"						\
+	    PV_RESTORE_ALL_CALLER_REGS					\
+	    "ret;"							\
+	    ".popsection")
+
+/* Get a reference to a callee-save function */
+#define PV_CALLEE_SAVE(func)						\
+	((struct paravirt_callee_save) { __raw_callee_save_##func })
+
+/* Promise that "func" already uses the right calling convention */
+#define __PV_IS_CALLEE_SAVE(func)			\
+	((struct paravirt_callee_save) { func })
+
 static inline unsigned long __raw_local_save_flags(void)
 {
 	unsigned long f;
 
-	asm volatile(paravirt_alt(PV_SAVE_REGS
-				  PARAVIRT_CALL
-				  PV_RESTORE_REGS)
+	asm volatile(paravirt_alt(PARAVIRT_CALL)
 		     : "=a"(f)
 		     : paravirt_type(pv_irq_ops.save_fl),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "cc" PV_VEXTRA_CLOBBERS);
+		     : "memory", "cc");
 	return f;
 }
 
 static inline void raw_local_irq_restore(unsigned long f)
 {
-	asm volatile(paravirt_alt(PV_SAVE_REGS
-				  PARAVIRT_CALL
-				  PV_RESTORE_REGS)
+	asm volatile(paravirt_alt(PARAVIRT_CALL)
 		     : "=a"(f)
 		     : PV_FLAGS_ARG(f),
 		       paravirt_type(pv_irq_ops.restore_fl),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "cc" PV_EXTRA_CLOBBERS);
+		     : "memory", "cc");
 }
 
 static inline void raw_local_irq_disable(void)
 {
-	asm volatile(paravirt_alt(PV_SAVE_REGS
-				  PARAVIRT_CALL
-				  PV_RESTORE_REGS)
+	asm volatile(paravirt_alt(PARAVIRT_CALL)
 		     :
 		     : paravirt_type(pv_irq_ops.irq_disable),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+		     : "memory", "eax", "cc");
 }
 
 static inline void raw_local_irq_enable(void)
 {
-	asm volatile(paravirt_alt(PV_SAVE_REGS
-				  PARAVIRT_CALL
-				  PV_RESTORE_REGS)
+	asm volatile(paravirt_alt(PARAVIRT_CALL)
 		     :
 		     : paravirt_type(pv_irq_ops.irq_enable),
 		       paravirt_clobber(CLBR_EAX)
-		     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
+		     : "memory", "eax", "cc");
 }
 
 static inline unsigned long __raw_local_irq_save(void)
@@ -1541,9 +1609,9 @@ static inline unsigned long __raw_local_irq_save(void)
 
 
 #define COND_PUSH(set, mask, reg)			\
-	.if ((~set) & mask); push %reg; .endif
+	.if ((~(set)) & mask); push %reg; .endif
 #define COND_POP(set, mask, reg)			\
-	.if ((~set) & mask); pop %reg; .endif
+	.if ((~(set)) & mask); pop %reg; .endif
 
 #ifdef CONFIG_X86_64
 
@@ -1594,15 +1662,15 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define DISABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
-		  PV_SAVE_REGS(clobbers);				\
+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
-		  PV_RESTORE_REGS(clobbers);)
+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #define ENABLE_INTERRUPTS(clobbers)					\
 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
-		  PV_SAVE_REGS(clobbers);				\
+		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
-		  PV_RESTORE_REGS(clobbers);)
+		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 
 #define USERGS_SYSRET32							\
 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index dd25e2b1593b..8adb6b5aa421 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -310,10 +310,10 @@ struct pv_time_ops pv_time_ops = {
 
 struct pv_irq_ops pv_irq_ops = {
 	.init_IRQ = native_init_IRQ,
-	.save_fl = native_save_fl,
-	.restore_fl = native_restore_fl,
-	.irq_disable = native_irq_disable,
-	.irq_enable = native_irq_enable,
+	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
+	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
+	.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
+	.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
 	.safe_halt = native_safe_halt,
 	.halt = native_halt,
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a688f3bfaec2..c609205df594 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void)
 		flags &= ~X86_EFLAGS_IF;
 	return flags;
 }
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
 
 static void vsmp_restore_fl(unsigned long flags)
 {
@@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags)
 		flags |= X86_EFLAGS_AC;
 	native_restore_fl(flags);
 }
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
 
 static void vsmp_irq_disable(void)
 {
@@ -53,6 +55,7 @@ static void vsmp_irq_disable(void)
 
 	native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
 }
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
 
 static void vsmp_irq_enable(void)
 {
@@ -60,6 +63,7 @@ static void vsmp_irq_enable(void)
 
 	native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
 }
+PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
 
 static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
 				  unsigned long addr, unsigned len)
@@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void)
 	       cap, ctl);
 	if (cap & ctl & (1 << 4)) {
 		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
-		pv_irq_ops.irq_disable = vsmp_irq_disable;
-		pv_irq_ops.irq_enable  = vsmp_irq_enable;
-		pv_irq_ops.save_fl  = vsmp_save_fl;
-		pv_irq_ops.restore_fl  = vsmp_restore_fl;
+		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
+		pv_irq_ops.irq_enable  = PV_CALLEE_SAVE(vsmp_irq_enable);
+		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);
+		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);
 		pv_init_ops.patch = vsmp_patch;
 
 		ctl &= ~(1 << 4);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 92f1c6f3e19d..19e33b6cd593 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -173,24 +173,29 @@ static unsigned long save_fl(void)
 {
 	return lguest_data.irq_enabled;
 }
+PV_CALLEE_SAVE_REGS_THUNK(save_fl);
 
 /* restore_flags() just sets the flags back to the value given. */
 static void restore_fl(unsigned long flags)
 {
 	lguest_data.irq_enabled = flags;
 }
+PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
 
 /* Interrupts go off... */
 static void irq_disable(void)
 {
 	lguest_data.irq_enabled = 0;
 }
+PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
 
 /* Interrupts go on... */
 static void irq_enable(void)
 {
 	lguest_data.irq_enabled = X86_EFLAGS_IF;
 }
+PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
+
 /*:*/
 /*M:003 Note that we don't check for outstanding interrupts when we re-enable
  * them (or when we unmask an interrupt).  This seems to work for the moment,
@@ -984,10 +989,10 @@ __init void lguest_init(void)
 
 	/* interrupt-related operations */
 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
-	pv_irq_ops.save_fl = save_fl;
-	pv_irq_ops.restore_fl = restore_fl;
-	pv_irq_ops.irq_disable = irq_disable;
-	pv_irq_ops.irq_enable = irq_enable;
+	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
+	pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
+	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
+	pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
 	/* init-time operations */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0cd2a165f179..ff6d530ccc77 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -676,10 +676,10 @@ void xen_setup_vcpu_info_placement(void)
 	if (have_vcpu_info_placement) {
 		printk(KERN_INFO "Xen: using vcpu_info placement\n");
 
-		pv_irq_ops.save_fl = xen_save_fl_direct;
-		pv_irq_ops.restore_fl = xen_restore_fl_direct;
-		pv_irq_ops.irq_disable = xen_irq_disable_direct;
-		pv_irq_ops.irq_enable = xen_irq_enable_direct;
+		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
+		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
+		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
+		pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
 		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
 	}
 }
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 2e8271431e1a..5a070900ad35 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -50,6 +50,7 @@ static unsigned long xen_save_fl(void)
 	*/
 	return (-flags) & X86_EFLAGS_IF;
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
 
 static void xen_restore_fl(unsigned long flags)
 {
@@ -76,6 +77,7 @@ static void xen_restore_fl(unsigned long flags)
 			xen_force_evtchn_callback();
 	}
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
 
 static void xen_irq_disable(void)
 {
@@ -86,6 +88,7 @@ static void xen_irq_disable(void)
 	percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
 	preempt_enable_no_resched();
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
 
 static void xen_irq_enable(void)
 {
@@ -106,6 +109,7 @@ static void xen_irq_enable(void)
 	if (unlikely(vcpu->evtchn_upcall_pending))
 		xen_force_evtchn_callback();
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
 
 static void xen_safe_halt(void)
 {
@@ -124,10 +128,12 @@ static void xen_halt(void)
 
 static const struct pv_irq_ops xen_irq_ops __initdata = {
 	.init_IRQ = __xen_init_IRQ,
-	.save_fl = xen_save_fl,
-	.restore_fl = xen_restore_fl,
-	.irq_disable = xen_irq_disable,
-	.irq_enable = xen_irq_enable,
+
+	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
+	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
+	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
+	.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
+
 	.safe_halt = xen_safe_halt,
 	.halt = xen_halt,
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From 791bad9d28d405d9397ea0c370ffb7c7bdd2aa6e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 28 Jan 2009 14:35:06 -0800
Subject: x86/paravirt: implement PVOP_CALL macros for callee-save functions

Impact: Optimization

Functions with the callee save calling convention clobber many fewer
registers than the normal C calling convention.  Implement variants of
PVOP_V?CALL* accordingly.  This only bothers with functions up to 3
args, since functions with more args may as well use the normal
calling convention.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h | 134 +++++++++++++++++++++++++++++-----------
 1 file changed, 99 insertions(+), 35 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index f9107b88631b..beb10ecdbe67 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -510,25 +510,45 @@ int paravirt_disable_iospace(void);
  * makes sure the incoming and outgoing types are always correct.
  */
 #ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS			unsigned long __eax, __edx, __ecx
+#define PVOP_VCALL_ARGS				\
+	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
 #define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
+
+#define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x)		"d" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x)		"c" ((unsigned long)(x))
+
 #define PVOP_VCALL_CLOBBERS		"=a" (__eax), "=d" (__edx),	\
 					"=c" (__ecx)
 #define PVOP_CALL_CLOBBERS		PVOP_VCALL_CLOBBERS
+
+#define PVOP_VCALLEE_CLOBBERS		"=a" (__eax)
+#define PVOP_CALLEE_CLOBBERS		PVOP_VCALLEE_CLOBBERS
+
 #define EXTRA_CLOBBERS
 #define VEXTRA_CLOBBERS
-#else
-#define PVOP_VCALL_ARGS		unsigned long __edi, __esi, __edx, __ecx
+#else  /* CONFIG_X86_64 */
+#define PVOP_VCALL_ARGS					\
+	unsigned long __edi = __edi, __esi = __esi,	\
+		__edx = __edx, __ecx = __ecx
 #define PVOP_CALL_ARGS		PVOP_VCALL_ARGS, __eax
+
+#define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x)		"S" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x)		"d" ((unsigned long)(x))
+#define PVOP_CALL_ARG4(x)		"c" ((unsigned long)(x))
+
 #define PVOP_VCALL_CLOBBERS	"=D" (__edi),				\
 				"=S" (__esi), "=d" (__edx),		\
 				"=c" (__ecx)
-
 #define PVOP_CALL_CLOBBERS	PVOP_VCALL_CLOBBERS, "=a" (__eax)
 
+#define PVOP_VCALLEE_CLOBBERS	"=a" (__eax)
+#define PVOP_CALLEE_CLOBBERS	PVOP_VCALLEE_CLOBBERS
+
 #define EXTRA_CLOBBERS	 , "r8", "r9", "r10", "r11"
 #define VEXTRA_CLOBBERS	 , "rax", "r8", "r9", "r10", "r11"
-#endif
+#endif	/* CONFIG_X86_32 */
 
 #ifdef CONFIG_PARAVIRT_DEBUG
 #define PVOP_TEST_NULL(op)	BUG_ON(op == NULL)
@@ -536,10 +556,11 @@ int paravirt_disable_iospace(void);
 #define PVOP_TEST_NULL(op)	((void)op)
 #endif
 
-#define __PVOP_CALL(rettype, op, pre, post, ...)			\
+#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,		\
+		      pre, post, ...)					\
 	({								\
 		rettype __ret;						\
-		PVOP_CALL_ARGS;					\
+		PVOP_CALL_ARGS;						\
 		PVOP_TEST_NULL(op);					\
 		/* This is 32-bit specific, but is okay in 64-bit */	\
 		/* since this condition will never hold */		\
@@ -547,70 +568,113 @@ int paravirt_disable_iospace(void);
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : PVOP_CALL_CLOBBERS		\
+				     : call_clbr			\
 				     : paravirt_type(op),		\
-				       paravirt_clobber(CLBR_ANY),	\
+				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
-				     : "memory", "cc" EXTRA_CLOBBERS);	\
+				     : "memory", "cc" extra_clbr);	\
 			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
 		} else {						\
 			asm volatile(pre				\
 				     paravirt_alt(PARAVIRT_CALL)	\
 				     post				\
-				     : PVOP_CALL_CLOBBERS		\
+				     : call_clbr			\
 				     : paravirt_type(op),		\
-				       paravirt_clobber(CLBR_ANY),	\
+				       paravirt_clobber(clbr),		\
 				       ##__VA_ARGS__			\
-				     : "memory", "cc" EXTRA_CLOBBERS);	\
+				     : "memory", "cc" extra_clbr);	\
 			__ret = (rettype)__eax;				\
 		}							\
 		__ret;							\
 	})
-#define __PVOP_VCALL(op, pre, post, ...)				\
+
+#define __PVOP_CALL(rettype, op, pre, post, ...)			\
+	____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,	\
+		      EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)			\
+	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
+		      PVOP_CALLEE_CLOBBERS, ,				\
+		      pre, post, ##__VA_ARGS__)
+
+
+#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)	\
 	({								\
 		PVOP_VCALL_ARGS;					\
 		PVOP_TEST_NULL(op);					\
 		asm volatile(pre					\
 			     paravirt_alt(PARAVIRT_CALL)		\
 			     post					\
-			     : PVOP_VCALL_CLOBBERS			\
+			     : call_clbr				\
 			     : paravirt_type(op),			\
-			       paravirt_clobber(CLBR_ANY),		\
+			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
-			     : "memory", "cc" VEXTRA_CLOBBERS);		\
+			     : "memory", "cc" extra_clbr);		\
 	})
 
+#define __PVOP_VCALL(op, pre, post, ...)				\
+	____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,		\
+		       VEXTRA_CLOBBERS,					\
+		       pre, post, ##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...)			\
+	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
+		      PVOP_VCALLEE_CLOBBERS, ,				\
+		      pre, post, ##__VA_ARGS__)
+
+
+
 #define PVOP_CALL0(rettype, op)						\
 	__PVOP_CALL(rettype, op, "", "")
 #define PVOP_VCALL0(op)							\
 	__PVOP_VCALL(op, "", "")
 
+#define PVOP_CALLEE0(rettype, op)					\
+	__PVOP_CALLEESAVE(rettype, op, "", "")
+#define PVOP_VCALLEE0(op)						\
+	__PVOP_VCALLEESAVE(op, "", "")
+
+
 #define PVOP_CALL1(rettype, op, arg1)					\
-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
+	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
 #define PVOP_VCALL1(op, arg1)						\
-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
+	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+
+#define PVOP_CALLEE1(rettype, op, arg1)					\
+	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALLEE1(op, arg1)						\
+	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+
 
 #define PVOP_CALL2(rettype, op, arg1, arg2)				\
-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), 	\
-	"1" ((unsigned long)(arg2)))
+	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
+		    PVOP_CALL_ARG2(arg2))
 #define PVOP_VCALL2(op, arg1, arg2)					\
-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), 		\
-	"1" ((unsigned long)(arg2)))
+	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
+		     PVOP_CALL_ARG2(arg2))
+
+#define PVOP_CALLEE2(rettype, op, arg1, arg2)				\
+	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),	\
+			  PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALLEE2(op, arg1, arg2)					\
+	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),		\
+			   PVOP_CALL_ARG2(arg2))
+
 
 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),	\
-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
+		    PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
 #define PVOP_VCALL3(op, arg1, arg2, arg3)				\
-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),		\
-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
+	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
+		     PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
 
 /* This is the only difference in x86_64. We can make it much simpler */
 #ifdef CONFIG_X86_32
 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
 	__PVOP_CALL(rettype, op,					\
 		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
-		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
-		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
+		    PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
 	__PVOP_VCALL(op,						\
 		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
@@ -618,13 +682,13 @@ int paravirt_disable_iospace(void);
 		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
 #else
 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),	\
-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),		\
-	"3"((unsigned long)(arg4)))
+	__PVOP_CALL(rettype, op, "", "",				\
+		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
+		    PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),		\
-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),		\
-	"3"((unsigned long)(arg4)))
+	__PVOP_VCALL(op, "", "",					\
+		     PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
+		     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 #endif
 
 static inline int paravirt_enabled(void)
-- 
cgit v1.2.3


From da5de7c22eb705be709a57e486e7475a6969b994 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 28 Jan 2009 14:35:07 -0800
Subject: x86/paravirt: use callee-saved convention for pte_val/make_pte/etc

Impact: Optimization

In the native case, pte_val, make_pte, etc are all just identity
functions, so there's no need to clobber a lot of registers over them.

(This changes the 32-bit callee-save calling convention to return both
EAX and EDX so functions can return 64-bit values.)

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h | 78 ++++++++++++++++++++---------------------
 arch/x86/kernel/paravirt.c      | 53 +++++++---------------------
 arch/x86/xen/mmu.c              | 24 ++++++++-----
 3 files changed, 67 insertions(+), 88 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index beb10ecdbe67..2d098b78bc10 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -19,7 +19,7 @@
 #define CLBR_ANY  ((1 << 4) - 1)
 
 #define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
-#define CLBR_RET_REG	(CLBR_EAX)
+#define CLBR_RET_REG	(CLBR_EAX | CLBR_EDX)
 #define CLBR_SCRATCH	(0)
 #else
 #define CLBR_RAX  CLBR_EAX
@@ -308,11 +308,11 @@ struct pv_mmu_ops {
 	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
 					pte_t *ptep, pte_t pte);
 
-	pteval_t (*pte_val)(pte_t);
-	pte_t (*make_pte)(pteval_t pte);
+	struct paravirt_callee_save pte_val;
+	struct paravirt_callee_save make_pte;
 
-	pgdval_t (*pgd_val)(pgd_t);
-	pgd_t (*make_pgd)(pgdval_t pgd);
+	struct paravirt_callee_save pgd_val;
+	struct paravirt_callee_save make_pgd;
 
 #if PAGETABLE_LEVELS >= 3
 #ifdef CONFIG_X86_PAE
@@ -327,12 +327,12 @@ struct pv_mmu_ops {
 
 	void (*set_pud)(pud_t *pudp, pud_t pudval);
 
-	pmdval_t (*pmd_val)(pmd_t);
-	pmd_t (*make_pmd)(pmdval_t pmd);
+	struct paravirt_callee_save pmd_val;
+	struct paravirt_callee_save make_pmd;
 
 #if PAGETABLE_LEVELS == 4
-	pudval_t (*pud_val)(pud_t);
-	pud_t (*make_pud)(pudval_t pud);
+	struct paravirt_callee_save pud_val;
+	struct paravirt_callee_save make_pud;
 
 	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
 #endif	/* PAGETABLE_LEVELS == 4 */
@@ -1155,13 +1155,13 @@ static inline pte_t __pte(pteval_t val)
 	pteval_t ret;
 
 	if (sizeof(pteval_t) > sizeof(long))
-		ret = PVOP_CALL2(pteval_t,
-				 pv_mmu_ops.make_pte,
-				 val, (u64)val >> 32);
+		ret = PVOP_CALLEE2(pteval_t,
+				   pv_mmu_ops.make_pte,
+				   val, (u64)val >> 32);
 	else
-		ret = PVOP_CALL1(pteval_t,
-				 pv_mmu_ops.make_pte,
-				 val);
+		ret = PVOP_CALLEE1(pteval_t,
+				   pv_mmu_ops.make_pte,
+				   val);
 
 	return (pte_t) { .pte = ret };
 }
@@ -1171,11 +1171,11 @@ static inline pteval_t pte_val(pte_t pte)
 	pteval_t ret;
 
 	if (sizeof(pteval_t) > sizeof(long))
-		ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
-				 pte.pte, (u64)pte.pte >> 32);
+		ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
+				   pte.pte, (u64)pte.pte >> 32);
 	else
-		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
-				 pte.pte);
+		ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
+				   pte.pte);
 
 	return ret;
 }
@@ -1185,11 +1185,11 @@ static inline pgd_t __pgd(pgdval_t val)
 	pgdval_t ret;
 
 	if (sizeof(pgdval_t) > sizeof(long))
-		ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
-				 val, (u64)val >> 32);
+		ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
+				   val, (u64)val >> 32);
 	else
-		ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
-				 val);
+		ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
+				   val);
 
 	return (pgd_t) { ret };
 }
@@ -1199,11 +1199,11 @@ static inline pgdval_t pgd_val(pgd_t pgd)
 	pgdval_t ret;
 
 	if (sizeof(pgdval_t) > sizeof(long))
-		ret =  PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
-				  pgd.pgd, (u64)pgd.pgd >> 32);
+		ret =  PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
+				    pgd.pgd, (u64)pgd.pgd >> 32);
 	else
-		ret =  PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
-				  pgd.pgd);
+		ret =  PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
+				    pgd.pgd);
 
 	return ret;
 }
@@ -1267,11 +1267,11 @@ static inline pmd_t __pmd(pmdval_t val)
 	pmdval_t ret;
 
 	if (sizeof(pmdval_t) > sizeof(long))
-		ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
-				 val, (u64)val >> 32);
+		ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
+				   val, (u64)val >> 32);
 	else
-		ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
-				 val);
+		ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
+				   val);
 
 	return (pmd_t) { ret };
 }
@@ -1281,11 +1281,11 @@ static inline pmdval_t pmd_val(pmd_t pmd)
 	pmdval_t ret;
 
 	if (sizeof(pmdval_t) > sizeof(long))
-		ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
-				  pmd.pmd, (u64)pmd.pmd >> 32);
+		ret =  PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
+				    pmd.pmd, (u64)pmd.pmd >> 32);
 	else
-		ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
-				  pmd.pmd);
+		ret =  PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
+				    pmd.pmd);
 
 	return ret;
 }
@@ -1307,11 +1307,11 @@ static inline pud_t __pud(pudval_t val)
 	pudval_t ret;
 
 	if (sizeof(pudval_t) > sizeof(long))
-		ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
-				 val, (u64)val >> 32);
+		ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
+				   val, (u64)val >> 32);
 	else
-		ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
-				 val);
+		ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
+				   val);
 
 	return (pud_t) { ret };
 }
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8adb6b5aa421..cea11c8e3049 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -391,43 +391,12 @@ struct pv_apic_ops pv_apic_ops = {
 #endif
 };
 
-typedef pte_t make_pte_t(pteval_t);
-typedef pmd_t make_pmd_t(pmdval_t);
-typedef pud_t make_pud_t(pudval_t);
-typedef pgd_t make_pgd_t(pgdval_t);
-
-typedef pteval_t pte_val_t(pte_t);
-typedef pmdval_t pmd_val_t(pmd_t);
-typedef pudval_t pud_val_t(pud_t);
-typedef pgdval_t pgd_val_t(pgd_t);
-
-
 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
 /* 32-bit pagetable entries */
-#define paravirt_native_make_pte	(make_pte_t *)_paravirt_ident_32
-#define paravirt_native_pte_val		(pte_val_t *)_paravirt_ident_32
-
-#define paravirt_native_make_pmd	(make_pmd_t *)_paravirt_ident_32
-#define paravirt_native_pmd_val		(pmd_val_t *)_paravirt_ident_32
-
-#define paravirt_native_make_pud	(make_pud_t *)_paravirt_ident_32
-#define paravirt_native_pud_val		(pud_val_t *)_paravirt_ident_32
-
-#define paravirt_native_make_pgd	(make_pgd_t *)_paravirt_ident_32
-#define paravirt_native_pgd_val		(pgd_val_t *)_paravirt_ident_32
+#define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_32)
 #else
 /* 64-bit pagetable entries */
-#define paravirt_native_make_pte	(make_pte_t *)_paravirt_ident_64
-#define paravirt_native_pte_val		(pte_val_t *)_paravirt_ident_64
-
-#define paravirt_native_make_pmd	(make_pmd_t *)_paravirt_ident_64
-#define paravirt_native_pmd_val		(pmd_val_t *)_paravirt_ident_64
-
-#define paravirt_native_make_pud	(make_pud_t *)_paravirt_ident_64
-#define paravirt_native_pud_val		(pud_val_t *)_paravirt_ident_64
-
-#define paravirt_native_make_pgd	(make_pgd_t *)_paravirt_ident_64
-#define paravirt_native_pgd_val		(pgd_val_t *)_paravirt_ident_64
+#define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_64)
 #endif
 
 struct pv_mmu_ops pv_mmu_ops = {
@@ -481,21 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.pmd_clear = native_pmd_clear,
 #endif
 	.set_pud = native_set_pud,
-	.pmd_val = paravirt_native_pmd_val,
-	.make_pmd = paravirt_native_make_pmd,
+
+	.pmd_val = PTE_IDENT,
+	.make_pmd = PTE_IDENT,
 
 #if PAGETABLE_LEVELS == 4
-	.pud_val = paravirt_native_pud_val,
-	.make_pud = paravirt_native_make_pud,
+	.pud_val = PTE_IDENT,
+	.make_pud = PTE_IDENT,
+
 	.set_pgd = native_set_pgd,
 #endif
 #endif /* PAGETABLE_LEVELS >= 3 */
 
-	.pte_val = paravirt_native_pte_val,
-	.pgd_val = paravirt_native_pgd_val,
+	.pte_val = PTE_IDENT,
+	.pgd_val = PTE_IDENT,
 
-	.make_pte = paravirt_native_make_pte,
-	.make_pgd = paravirt_native_make_pgd,
+	.make_pte = PTE_IDENT,
+	.make_pgd = PTE_IDENT,
 
 	.dup_mmap = paravirt_nop,
 	.exit_mmap = paravirt_nop,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 94e452c0b00c..5e41f7fc6cf1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -492,28 +492,33 @@ pteval_t xen_pte_val(pte_t pte)
 {
 	return pte_mfn_to_pfn(pte.pte);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
 
 pgdval_t xen_pgd_val(pgd_t pgd)
 {
 	return pte_mfn_to_pfn(pgd.pgd);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
 
 pte_t xen_make_pte(pteval_t pte)
 {
 	pte = pte_pfn_to_mfn(pte);
 	return native_make_pte(pte);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
 
 pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	pgd = pte_pfn_to_mfn(pgd);
 	return native_make_pgd(pgd);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
 
 pmdval_t xen_pmd_val(pmd_t pmd)
 {
 	return pte_mfn_to_pfn(pmd.pmd);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
 
 void xen_set_pud_hyper(pud_t *ptr, pud_t val)
 {
@@ -590,12 +595,14 @@ pmd_t xen_make_pmd(pmdval_t pmd)
 	pmd = pte_pfn_to_mfn(pmd);
 	return native_make_pmd(pmd);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
 
 #if PAGETABLE_LEVELS == 4
 pudval_t xen_pud_val(pud_t pud)
 {
 	return pte_mfn_to_pfn(pud.pud);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
 
 pud_t xen_make_pud(pudval_t pud)
 {
@@ -603,6 +610,7 @@ pud_t xen_make_pud(pudval_t pud)
 
 	return native_make_pud(pud);
 }
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
 
 pgd_t *xen_get_user_pgd(pgd_t *pgd)
 {
@@ -1813,11 +1821,11 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.ptep_modify_prot_start = __ptep_modify_prot_start,
 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
 
-	.pte_val = xen_pte_val,
-	.pgd_val = xen_pgd_val,
+	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
+	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
 
-	.make_pte = xen_make_pte,
-	.make_pgd = xen_make_pgd,
+	.make_pte = PV_CALLEE_SAVE(xen_make_pte),
+	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
 
 #ifdef CONFIG_X86_PAE
 	.set_pte_atomic = xen_set_pte_atomic,
@@ -1827,12 +1835,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 #endif	/* CONFIG_X86_PAE */
 	.set_pud = xen_set_pud_hyper,
 
-	.make_pmd = xen_make_pmd,
-	.pmd_val = xen_pmd_val,
+	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
+	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 
 #if PAGETABLE_LEVELS == 4
-	.pud_val = xen_pud_val,
-	.make_pud = xen_make_pud,
+	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
+	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
 
 	.alloc_pud = xen_alloc_pte_init,
-- 
cgit v1.2.3


From 4767afbf1f60f73997a7eb69a86d380f1fb27a92 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 29 Jan 2009 01:51:34 -0800
Subject: x86/paravirt: fix missing callee-save call on pud_val

Impact: Fix build when CONFIG_PARAVIRT_DEBUG is enabled

Fix missed convertion to using callee-saved calls for pud_val, which
causes a compile error when CONFIG_PARAVIRT_DEBUG is enabled.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/paravirt.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2d098b78bc10..b17365c39749 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1321,11 +1321,11 @@ static inline pudval_t pud_val(pud_t pud)
 	pudval_t ret;
 
 	if (sizeof(pudval_t) > sizeof(long))
-		ret =  PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
-				  pud.pud, (u64)pud.pud >> 32);
+		ret =  PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
+				    pud.pud, (u64)pud.pud >> 32);
 	else
-		ret =  PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
-				  pud.pud);
+		ret =  PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
+				    pud.pud);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 552be871e67ff577ed36beb2f53d078b42304739 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Fri, 30 Jan 2009 17:47:53 +0900
Subject: x86: pass in cpu number to switch_to_new_gdt()

Impact: cleanup, prepare for xen boot fix.

Xen needs to call this function very early to setup the GDT and
per-cpu segments.  Remove the call to smp_processor_id() and just
pass in the cpu number.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/processor.h    |  2 +-
 arch/x86/kernel/cpu/common.c        |  7 +++----
 arch/x86/kernel/setup_percpu.c      |  2 +-
 arch/x86/kernel/smpboot.c           |  2 +-
 arch/x86/mach-voyager/voyager_smp.c | 11 ++++++-----
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index befa20b4a68c..1c25eb69ea86 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -768,7 +768,7 @@ extern int sysenter_setup(void);
 extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
-extern void switch_to_new_gdt(void);
+extern void switch_to_new_gdt(int);
 extern void cpu_init(void);
 
 static inline unsigned long get_debugctlmsr(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 652fdc9a757a..6eacd64b602e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -255,10 +255,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 
 /* Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one. */
-void switch_to_new_gdt(void)
+void switch_to_new_gdt(int cpu)
 {
 	struct desc_ptr gdt_descr;
-	int cpu = smp_processor_id();
 
 	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
 	gdt_descr.size = GDT_SIZE - 1;
@@ -993,7 +992,7 @@ void __cpuinit cpu_init(void)
 	 * and set up the GDT descriptor:
 	 */
 
-	switch_to_new_gdt();
+	switch_to_new_gdt(cpu);
 	loadsegment(fs, 0);
 
 	load_idt((const struct desc_ptr *)&idt_descr);
@@ -1098,7 +1097,7 @@ void __cpuinit cpu_init(void)
 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
 	load_idt(&idt_descr);
-	switch_to_new_gdt();
+	switch_to_new_gdt(cpu);
 
 	/*
 	 * Set up and load the per-CPU TSS and LDT
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0d1e7ac439f4..ef91747bbed5 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -122,7 +122,7 @@ void __init setup_per_cpu_areas(void)
 		 * area.  Reload any changed state for the boot CPU.
 		 */
 		if (cpu == boot_cpu_id)
-			switch_to_new_gdt();
+			switch_to_new_gdt(cpu);
 
 		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
 	}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f9dbcff43546..612d3c74f6a3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1185,7 +1185,7 @@ out:
 void __init native_smp_prepare_boot_cpu(void)
 {
 	int me = smp_processor_id();
-	switch_to_new_gdt();
+	switch_to_new_gdt(me);
 	/* already set me in cpu_online_mask in boot_cpu_init() */
 	cpumask_set_cpu(me, cpu_callout_mask);
 	per_cpu(cpu_state, me) = CPU_ONLINE;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 331cd6d56483..58c7cac3440d 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1746,12 +1746,13 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
 
 static void __cpuinit voyager_smp_prepare_boot_cpu(void)
 {
-	switch_to_new_gdt();
+	int cpu = smp_processor_id();
+	switch_to_new_gdt(cpu);
 
-	cpu_set(smp_processor_id(), cpu_online_map);
-	cpu_set(smp_processor_id(), cpu_callout_map);
-	cpu_set(smp_processor_id(), cpu_possible_map);
-	cpu_set(smp_processor_id(), cpu_present_map);
+	cpu_set(cpu, cpu_online_map);
+	cpu_set(cpu, cpu_callout_map);
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
 }
 
 static int __cpuinit voyager_cpu_up(unsigned int cpu)
-- 
cgit v1.2.3


From 11e3a840cd5b731cdd8f6f956dfae78a8046d09c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 30 Jan 2009 17:47:54 +0900
Subject: x86: split loading percpu segments from loading gdt

Impact: split out a function, no functional change

Xen needs to be able to access percpu data from very early on.  For
various reasons, it cannot also load the gdt at that time.   It does,
however, have a pefectly functional gdt at that point, so there's no
pressing need to reload the gdt.

Split the function to load the segment registers off, so Xen can call
it directly.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1c25eb69ea86..656d02ea509b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -769,6 +769,7 @@ extern struct desc_ptr		early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
 static inline unsigned long get_debugctlmsr(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6eacd64b602e..0f73ea423089 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -253,6 +253,16 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 
 __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 
+void load_percpu_segment(int cpu)
+{
+#ifdef CONFIG_X86_32
+	loadsegment(fs, __KERNEL_PERCPU);
+#else
+	loadsegment(gs, 0);
+	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+#endif
+}
+
 /* Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one. */
 void switch_to_new_gdt(int cpu)
@@ -263,12 +273,8 @@ void switch_to_new_gdt(int cpu)
 	gdt_descr.size = GDT_SIZE - 1;
 	load_gdt(&gdt_descr);
 	/* Reload the per-cpu base */
-#ifdef CONFIG_X86_32
-	loadsegment(fs, __KERNEL_PERCPU);
-#else
-	loadsegment(gs, 0);
-	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
-#endif
+
+	load_percpu_segment(cpu);
 }
 
 static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
-- 
cgit v1.2.3


From e584f559c7b8711cccdf319400acd6294b2c074e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Fri, 30 Jan 2009 23:17:23 -0800
Subject: x86/paravirt: don't restore second return reg

Impact: bugfix

In the 32-bit calling convention, %eax:%edx is used to return 64-bit
values.  Don't save and restore %edx around wrapped functions, or they
can't return a full 64-bit result.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index b17365c39749..016dce311305 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1524,8 +1524,8 @@ extern struct paravirt_patch_site __parainstructions[],
 #define PV_RESTORE_REGS "popl %edx; popl %ecx;"
 
 /* save and restore all caller-save registers, except return value */
-#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS
-#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS
+#define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
+#define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
 
 #define PV_FLAGS_ARG "0"
 #define PV_EXTRA_CLOBBERS
-- 
cgit v1.2.3


From 0eb592dbba40baebec9cdde3ff4574185de6cbcc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 3 Feb 2009 16:00:38 -0800
Subject: x86/paravirt: return full 64-bit result

Impact: Bug fix

A hunk went missing in the original patch, and callee-save callsites were
not marked as returning the upper 32-bit of result, causing Badness.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/paravirt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/include')

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 016dce311305..c85e7475e171 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -522,7 +522,7 @@ int paravirt_disable_iospace(void);
 					"=c" (__ecx)
 #define PVOP_CALL_CLOBBERS		PVOP_VCALL_CLOBBERS
 
-#define PVOP_VCALLEE_CLOBBERS		"=a" (__eax)
+#define PVOP_VCALLEE_CLOBBERS		"=a" (__eax), "=d" (__edx)
 #define PVOP_CALLEE_CLOBBERS		PVOP_VCALLEE_CLOBBERS
 
 #define EXTRA_CLOBBERS
-- 
cgit v1.2.3