From b01cc1b0eae0dea19257b29347116505fbedf679 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Mon, 26 Apr 2010 19:03:05 -0700
Subject: x86: Convert remaining x86 clocksources to
 clocksource_register_hz/khz

This converts the remaining x86 clocksources to use
clocksource_register_hz/khz.

CC: jacob.jun.pan@intel.com
CC: Glauber Costa <glommer@redhat.com>
CC: Dimitri Sivanich <sivanich@sgi.com>
CC: Rusty Russell <rusty@rustcorp.com.au>
CC: Jeremy Fitzhardinge <jeremy@xensource.com>
CC: Chris McDermott <lcm@us.ibm.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> [xen]
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/x86/kernel/apb_timer.c    | 10 +---------
 arch/x86/kernel/i8253.c        |  6 +-----
 arch/x86/kernel/kvmclock.c     |  6 +-----
 arch/x86/lguest/boot.c         |  4 +---
 arch/x86/platform/uv/uv_time.c |  6 +-----
 arch/x86/xen/time.c            |  6 +-----
 6 files changed, 6 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51ef31a89be9..29ebf5a3b192 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -177,7 +177,6 @@ static struct clocksource clocksource_apbt = {
 	.rating		= APBT_CLOCKSOURCE_RATING,
 	.read		= apbt_read_clocksource,
 	.mask		= APBT_MASK,
-	.shift		= APBT_SHIFT,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.resume		= apbt_restart_clocksource,
 };
@@ -595,14 +594,7 @@ static int apbt_clocksource_register(void)
 	if (t1 == apbt_read_clocksource(&clocksource_apbt))
 		panic("APBT counter not counting. APBT disabled\n");
 
-	/*
-	 * initialize and register APBT clocksource
-	 * convert that to ns/clock cycle
-	 * mult = (ns/c) * 2^APBT_SHIFT
-	 */
-	clocksource_apbt.mult = div_sc(MSEC_PER_SEC,
-				       (unsigned long) apbt_freq, APBT_SHIFT);
-	clocksource_register(&clocksource_apbt);
+	clocksource_register_khz(&clocksource_apbt, (u32)apbt_freq*1000);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2dfd31597443..212fe6590aab 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -188,8 +188,6 @@ static struct clocksource pit_cs = {
 	.rating		= 110,
 	.read		= pit_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.mult		= 0,
-	.shift		= 20,
 };
 
 static int __init init_pit_clocksource(void)
@@ -205,9 +203,7 @@ static int __init init_pit_clocksource(void)
 	    pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
-	pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
-
-	return clocksource_register(&pit_cs);
+	return clocksource_register_hz(&pit_cs, CLOCK_TICK_RATE);
 }
 arch_initcall(init_pit_clocksource);
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f98d3eafe07a..6389a6bca11b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -26,8 +26,6 @@
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
 
-#define KVM_SCALE 22
-
 static int kvmclock = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
@@ -120,8 +118,6 @@ static struct clocksource kvm_clock = {
 	.read = kvm_clock_get_cycles,
 	.rating = 400,
 	.mask = CLOCKSOURCE_MASK(64),
-	.mult = 1 << KVM_SCALE,
-	.shift = KVM_SCALE,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -203,7 +199,7 @@ void __init kvmclock_init(void)
 	machine_ops.crash_shutdown  = kvm_crash_shutdown;
 #endif
 	kvm_get_preset_lpj();
-	clocksource_register(&kvm_clock);
+	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
 	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0c..5b96fd95bdab 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -913,8 +913,6 @@ static struct clocksource lguest_clock = {
 	.rating		= 200,
 	.read		= lguest_clock_read,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 1 << 22,
-	.shift		= 22,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -997,7 +995,7 @@ static void lguest_time_init(void)
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
 	set_irq_handler(0, lguest_time_irq);
 
-	clocksource_register(&lguest_clock);
+	clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9daf5d1af9f1..0eb90184515f 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -40,7 +40,6 @@ static struct clocksource clocksource_uv = {
 	.rating		= 400,
 	.read		= uv_read_rtc,
 	.mask		= (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -372,14 +371,11 @@ static __init int uv_rtc_setup_clock(void)
 	if (!is_uv_system())
 		return -ENODEV;
 
-	clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-				clocksource_uv.shift);
-
 	/* If single blade, prefer tsc */
 	if (uv_num_possible_blades() == 1)
 		clocksource_uv.rating = 250;
 
-	rc = clocksource_register(&clocksource_uv);
+	rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
 	if (rc)
 		printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
 	else
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a5..04e11597a8c5 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -26,8 +26,6 @@
 
 #include "xen-ops.h"
 
-#define XEN_SHIFT 22
-
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP	100000
 #define NS_PER_TICK	(1000000000LL / HZ)
@@ -211,8 +209,6 @@ static struct clocksource xen_clocksource __read_mostly = {
 	.rating = 400,
 	.read = xen_clocksource_get_cycles,
 	.mask = ~0,
-	.mult = 1<<XEN_SHIFT,		/* time directly in nanoseconds */
-	.shift = XEN_SHIFT,
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -446,7 +442,7 @@ static __init void xen_time_init(void)
 	int cpu = smp_processor_id();
 	struct timespec tp;
 
-	clocksource_register(&xen_clocksource);
+	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
 		/* Successfully turned off 100Hz tick, so we have the
-- 
cgit v1.2.3


From d60c3041778c11f564969fb62b337df68232ee80 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Mon, 26 Apr 2010 20:20:47 -0700
Subject: ia64: convert to clocksource_register_hz/khz

This converts the ia64 clocksources to use clocksource_register_hz/khz

CC: Tony Luck <tony.luck@intel.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Tony Luck <tony.luck@intel.com> [clocksource_itc path]
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/ia64/kernel/cyclone.c      | 6 +-----
 arch/ia64/kernel/time.c         | 9 ++-------
 arch/ia64/sn/kernel/sn2/timer.c | 6 +-----
 drivers/char/hpet.c             | 6 +-----
 4 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index d52f1f78eff2..f64097b5118a 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -31,8 +31,6 @@ static struct clocksource clocksource_cyclone = {
         .rating         = 300,
         .read           = read_cyclone,
         .mask           = (1LL << 40) - 1,
-        .mult           = 0, /*to be caluclated*/
-        .shift          = 16,
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -118,9 +116,7 @@ int __init init_cyclone_clock(void)
 	/* initialize last tick */
 	cyclone_mc = cyclone_timer;
 	clocksource_cyclone.fsys_mmio = cyclone_timer;
-	clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
-						clocksource_cyclone.shift);
-	clocksource_register(&clocksource_cyclone);
+	clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
 
 	return 0;
 }
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489e..41c40f0e4796 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -73,8 +73,6 @@ static struct clocksource clocksource_itc = {
 	.rating         = 350,
 	.read           = itc_get_cycles,
 	.mask           = CLOCKSOURCE_MASK(64),
-	.mult           = 0, /*to be calculated*/
-	.shift          = 16,
 	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 #ifdef CONFIG_PARAVIRT
 	.resume		= paravirt_clocksource_resume,
@@ -374,11 +372,8 @@ ia64_init_itm (void)
 	ia64_cpu_local_tick();
 
 	if (!itc_clocksource) {
-		/* Sort out mult/shift values: */
-		clocksource_itc.mult =
-			clocksource_hz2mult(local_cpu_data->itc_freq,
-						clocksource_itc.shift);
-		clocksource_register(&clocksource_itc);
+		clocksource_register_hz(&clocksource_itc,
+						local_cpu_data->itc_freq);
 		itc_clocksource = &clocksource_itc;
 	}
 }
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
index 21d6f09e3447..c34efda122e1 100644
--- a/arch/ia64/sn/kernel/sn2/timer.c
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -33,8 +33,6 @@ static struct clocksource clocksource_sn2 = {
         .rating         = 450,
         .read           = read_sn2,
         .mask           = (1LL << 55) - 1,
-        .mult           = 0,
-        .shift          = 10,
         .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -57,9 +55,7 @@ ia64_sn_udelay (unsigned long usecs)
 void __init sn_timer_init(void)
 {
 	clocksource_sn2.fsys_mmio = RTC_COUNTER_ADDR;
-	clocksource_sn2.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
-							clocksource_sn2.shift);
-	clocksource_register(&clocksource_sn2);
+	clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second);
 
 	ia64_udelay = &ia64_sn_udelay;
 }
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 7066e801b9d3..051474c65b78 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -84,8 +84,6 @@ static struct clocksource clocksource_hpet = {
 	.rating		= 250,
 	.read		= read_hpet,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 0,		/* to be calculated */
-	.shift		= 10,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 static struct clocksource *hpet_clocksource;
@@ -934,9 +932,7 @@ int hpet_alloc(struct hpet_data *hdp)
 	if (!hpet_clocksource) {
 		hpet_mctr = (void __iomem *)&hpetp->hp_hpet->hpet_mc;
 		CLKSRC_FSYS_MMIO_SET(clocksource_hpet.fsys_mmio, hpet_mctr);
-		clocksource_hpet.mult = clocksource_hz2mult(hpetp->hp_tick_freq,
-						clocksource_hpet.shift);
-		clocksource_register(&clocksource_hpet);
+		clocksource_register_hz(&clocksource_hpet, hpetp->hp_tick_freq);
 		hpetp->hp_clocksource = &clocksource_hpet;
 		hpet_clocksource = &clocksource_hpet;
 	}
-- 
cgit v1.2.3


From b8f39f7dfe12d4c8402c493a24fbf1e21d086771 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Mon, 26 Apr 2010 20:22:23 -0700
Subject: microblaze: convert to clocksource_register_hz/khz

This converts the microblaze clocksources to use clocksource_register_hz/khz

CC: Michal Simek <monstr@monstr.eu>
CC: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/microblaze/kernel/timer.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index a5aa33db1df3..68ec7d1e8439 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -217,16 +217,12 @@ static struct clocksource clocksource_microblaze = {
 	.rating		= 300,
 	.read		= microblaze_read,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= 8, /* I can shift it */
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static int __init microblaze_clocksource_init(void)
 {
-	clocksource_microblaze.mult =
-			clocksource_hz2mult(timer_clock_freq,
-						clocksource_microblaze.shift);
-	if (clocksource_register(&clocksource_microblaze))
+	if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq))
 		panic("failed to register clocksource");
 
 	/* stop timer1 */
-- 
cgit v1.2.3


From 7861434fe9b5c9dad1bbb1f674cded95950e778e Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 19 Oct 2010 17:49:07 -0700
Subject: alpha: convert to clocksource_register_hz

Converts alpha to use clocksource_register_hz.

CC: Richard Henderson <rth@twiddle.net>
CC: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
CC: Matt Turner <mattst88@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/alpha/kernel/time.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a4..33b81108dc83 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -378,8 +378,7 @@ static struct clocksource clocksource_rpcc = {
 
 static inline void register_rpcc_clocksource(long cycle_freq)
 {
-	clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4);
-	clocksource_register(&clocksource_rpcc);
+	clocksource_register_hz(&clocksource_rpcc, cycle_freq);
 }
 #else /* !CONFIG_SMP */
 static inline void register_rpcc_clocksource(long cycle_freq)
-- 
cgit v1.2.3


From 39280742efb00ab61ad62486c737fdd3e980c30f Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Mon, 26 Apr 2010 20:24:37 -0700
Subject: sparc: convert to clocksource_register_hz/khz

This converts the sparc clocksources to use clocksource_register_hz/khz

CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/sparc/kernel/time_64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 3bc9c9979b92..58aa27b29984 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -816,14 +816,12 @@ void __init time_init(void)
 		clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
 
 	clocksource_tick.name = tick_ops->name;
-	clocksource_calc_mult_shift(&clocksource_tick, freq, 4);
 	clocksource_tick.read = clocksource_tick_read;
 
+	clocksource_register_hz(&clocksource_tick, freq);
 	printk("clocksource: mult[%x] shift[%d]\n",
 	       clocksource_tick.mult, clocksource_tick.shift);
 
-	clocksource_register(&clocksource_tick);
-
 	sparc64_clockevent.name = tick_ops->name;
 	clockevents_calc_mult_shift(&sparc64_clockevent, freq, 4);
 
-- 
cgit v1.2.3


From 75c4fd8c7862f37eeae5c80f33bbe4dce97571d4 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Mon, 26 Apr 2010 20:23:11 -0700
Subject: mips: convert to clocksource_register_hz/khz

This converts the mips clocksources to use clocksource_register_hz/khz

CC: Ralf Baechle <ralf@linux-mips.org>
CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/mips/alchemy/common/time.c                 |  3 +--
 arch/mips/cavium-octeon/csrc-octeon.c           |  3 +--
 arch/mips/include/asm/time.h                    |  6 -----
 arch/mips/jz4740/time.c                         |  3 +--
 arch/mips/kernel/cevt-txx9.c                    |  3 +--
 arch/mips/kernel/csrc-bcm1480.c                 |  3 +--
 arch/mips/kernel/csrc-ioasic.c                  |  4 +--
 arch/mips/kernel/csrc-powertv.c                 | 35 +++----------------------
 arch/mips/kernel/csrc-r4k.c                     |  4 +--
 arch/mips/kernel/csrc-sb1250.c                  |  3 +--
 arch/mips/kernel/i8253.c                        |  5 +---
 arch/mips/loongson/common/cs5536/cs5536_mfgpt.c |  5 +---
 arch/mips/sgi-ip27/ip27-timer.c                 |  3 +--
 13 files changed, 14 insertions(+), 66 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c
index 2aecb2fdf982..d5da6adbf634 100644
--- a/arch/mips/alchemy/common/time.c
+++ b/arch/mips/alchemy/common/time.c
@@ -141,8 +141,7 @@ static int __init alchemy_time_init(unsigned int m2int)
 		goto cntr_err;
 
 	/* register counter1 clocksource and event device */
-	clocksource_set_clock(&au1x_counter1_clocksource, 32768);
-	clocksource_register(&au1x_counter1_clocksource);
+	clocksource_register_hz(&au1x_counter1_clocksource, 32768);
 
 	cd->shift = 32;
 	cd->mult = div_sc(32768, NSEC_PER_SEC, cd->shift);
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
index 26bf71130bf8..29d56afbb02d 100644
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -105,8 +105,7 @@ unsigned long long notrace sched_clock(void)
 void __init plat_time_init(void)
 {
 	clocksource_mips.rating = 300;
-	clocksource_set_clock(&clocksource_mips, octeon_get_clock_rate());
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate());
 }
 
 static u64 octeon_udelay_factor;
diff --git a/arch/mips/include/asm/time.h b/arch/mips/include/asm/time.h
index c7f1bfef1574..bc14447e69b5 100644
--- a/arch/mips/include/asm/time.h
+++ b/arch/mips/include/asm/time.h
@@ -84,12 +84,6 @@ static inline int init_mips_clocksource(void)
 #endif
 }
 
-static inline void clocksource_set_clock(struct clocksource *cs,
-					 unsigned int clock)
-{
-	clocksource_calc_mult_shift(cs, clock, 4);
-}
-
 static inline void clockevent_set_clock(struct clock_event_device *cd,
 					unsigned int clock)
 {
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index fe01678d94fd..03dfd4e0da4e 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -121,8 +121,7 @@ void __init plat_time_init(void)
 
 	clockevents_register_device(&jz4740_clockevent);
 
-	clocksource_set_clock(&jz4740_clocksource, clk_rate);
-	ret = clocksource_register(&jz4740_clocksource);
+	ret = clocksource_register_hz(&jz4740_clocksource, clk_rate);
 
 	if (ret)
 		printk(KERN_ERR "Failed to register clocksource: %d\n", ret);
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index 0b7377361e22..f0ab92a1b057 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -51,8 +51,7 @@ void __init txx9_clocksource_init(unsigned long baseaddr,
 {
 	struct txx9_tmr_reg __iomem *tmrptr;
 
-	clocksource_set_clock(&txx9_clocksource.cs, TIMER_CLK(imbusclk));
-	clocksource_register(&txx9_clocksource.cs);
+	clocksource_register_hz(&txx9_clocksource.cs, TIMER_CLK(imbusclk));
 
 	tmrptr = ioremap(baseaddr, sizeof(struct txx9_tmr_reg));
 	__raw_writel(TCR_BASE, &tmrptr->tcr);
diff --git a/arch/mips/kernel/csrc-bcm1480.c b/arch/mips/kernel/csrc-bcm1480.c
index 51489f8a825e..f96f99c794a3 100644
--- a/arch/mips/kernel/csrc-bcm1480.c
+++ b/arch/mips/kernel/csrc-bcm1480.c
@@ -49,6 +49,5 @@ void __init sb1480_clocksource_init(void)
 
 	plldiv = G_BCM1480_SYS_PLL_DIV(__raw_readq(IOADDR(A_SCD_SYSTEM_CFG)));
 	zbbus = ((plldiv >> 1) * 50000000) + ((plldiv & 1) * 25000000);
-	clocksource_set_clock(cs, zbbus);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, zbbus);
 }
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
index 23da108506b0..46bd7fa98d6c 100644
--- a/arch/mips/kernel/csrc-ioasic.c
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -59,7 +59,5 @@ void __init dec_ioasic_clocksource_init(void)
 	printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq);
 
 	clocksource_dec.rating = 200 + freq / 10000000;
-	clocksource_set_clock(&clocksource_dec, freq);
-
-	clocksource_register(&clocksource_dec);
+	clocksource_register_hz(&clocksource_dec, freq);
 }
diff --git a/arch/mips/kernel/csrc-powertv.c b/arch/mips/kernel/csrc-powertv.c
index a27c16c8690e..2e7c5232da8d 100644
--- a/arch/mips/kernel/csrc-powertv.c
+++ b/arch/mips/kernel/csrc-powertv.c
@@ -78,9 +78,7 @@ static void __init powertv_c0_hpt_clocksource_init(void)
 
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
-
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 }
 
 /**
@@ -130,43 +128,16 @@ static struct clocksource clocksource_tim_c = {
 /**
  * powertv_tim_c_clocksource_init - set up a clock source for the TIM_C clock
  *
- * The hard part here is coming up with a constant k and shift s such that
- * the 48-bit TIM_C value multiplied by k doesn't overflow and that value,
- * when shifted right by s, yields the corresponding number of nanoseconds.
  * We know that TIM_C counts at 27 MHz/8, so each cycle corresponds to
- * 1 / (27,000,000/8) seconds. Multiply that by a billion and you get the
- * number of nanoseconds. Since the TIM_C value has 48 bits and the math is
- * done in 64 bits, avoiding an overflow means that k must be less than
- * 64 - 48 = 16 bits.
+ * 1 / (27,000,000/8) seconds.
  */
 static void __init powertv_tim_c_clocksource_init(void)
 {
-	int			prescale;
-	unsigned long		dividend;
-	unsigned long		k;
-	int			s;
-	const int		max_k_bits = (64 - 48) - 1;
-	const unsigned long	billion = 1000000000;
 	const unsigned long	counts_per_second = 27000000 / 8;
 
-	prescale = BITS_PER_LONG - ilog2(billion) - 1;
-	dividend = billion << prescale;
-	k = dividend / counts_per_second;
-	s = ilog2(k) - max_k_bits;
-
-	if (s < 0)
-		s = prescale;
-
-	else {
-		k >>= s;
-		s += prescale;
-	}
-
-	clocksource_tim_c.mult = k;
-	clocksource_tim_c.shift = s;
 	clocksource_tim_c.rating = 200;
 
-	clocksource_register(&clocksource_tim_c);
+	clocksource_register_hz(&clocksource_tim_c, counts_per_second);
 	tim_c = (struct tim_c *) asic_reg_addr(tim_ch);
 }
 
diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c
index e95a3cd48eea..decd1fa38d55 100644
--- a/arch/mips/kernel/csrc-r4k.c
+++ b/arch/mips/kernel/csrc-r4k.c
@@ -30,9 +30,7 @@ int __init init_r4k_clocksource(void)
 	/* Calculate a somewhat reasonable rating value */
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
-
-	clocksource_register(&clocksource_mips);
+	clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
 
 	return 0;
 }
diff --git a/arch/mips/kernel/csrc-sb1250.c b/arch/mips/kernel/csrc-sb1250.c
index d14d3d1907fa..e9606d907685 100644
--- a/arch/mips/kernel/csrc-sb1250.c
+++ b/arch/mips/kernel/csrc-sb1250.c
@@ -65,6 +65,5 @@ void __init sb1250_clocksource_init(void)
 		     IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM,
 						 R_SCD_TIMER_CFG)));
 
-	clocksource_set_clock(cs, V_SCD_TIMER_FREQ);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, V_SCD_TIMER_FREQ);
 }
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index 2392a7a296d4..9fadd17888d9 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -196,8 +196,6 @@ static struct clocksource clocksource_pit = {
 	.rating = 110,
 	.read	= pit_read,
 	.mask	= CLOCKSOURCE_MASK(32),
-	.mult	= 0,
-	.shift	= 20,
 };
 
 static int __init init_pit_clocksource(void)
@@ -205,7 +203,6 @@ static int __init init_pit_clocksource(void)
 	if (num_possible_cpus() > 1) /* PIT does not scale! */
 		return 0;
 
-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
-	return clocksource_register(&clocksource_pit);
+	return clocksource_register_hz(&clocksource_pit, CLOCK_TICK_RATE);
 }
 arch_initcall(init_pit_clocksource);
diff --git a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
index 8c807c965199..0cb1b9760e34 100644
--- a/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
+++ b/arch/mips/loongson/common/cs5536/cs5536_mfgpt.c
@@ -201,8 +201,6 @@ static struct clocksource clocksource_mfgpt = {
 	.rating = 120, /* Functional for real use, but not desired */
 	.read = mfgpt_read,
 	.mask = CLOCKSOURCE_MASK(32),
-	.mult = 0,
-	.shift = 22,
 };
 
 int __init init_mfgpt_clocksource(void)
@@ -210,8 +208,7 @@ int __init init_mfgpt_clocksource(void)
 	if (num_possible_cpus() > 1)	/* MFGPT does not scale! */
 		return 0;
 
-	clocksource_mfgpt.mult = clocksource_hz2mult(MFGPT_TICK_RATE, 22);
-	return clocksource_register(&clocksource_mfgpt);
+	return clocksource_register_hz(&clocksource_mfgpt, MFGPT_TICK_RATE);
 }
 
 arch_initcall(init_mfgpt_clocksource);
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index d6802d6d1f82..3cac88382d4c 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -177,8 +177,7 @@ static void __init hub_rt_clocksource_init(void)
 {
 	struct clocksource *cs = &hub_rt_clocksource;
 
-	clocksource_set_clock(cs, CYCLES_PER_SEC);
-	clocksource_register(cs);
+	clocksource_register_hz(cs, CYCLES_PER_SEC);
 }
 
 void __init plat_time_init(void)
-- 
cgit v1.2.3


From a1c57e0fec53defe745e64417eacdbd3618c3e66 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Mon, 26 Apr 2010 20:20:07 -0700
Subject: blackfin: convert to clocksource_register_hz

This converts the blackfin clocksource to use clocksource_register_hz.

CC: Mike Frysinger <vapier@gentoo.org>
CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <johnstul@us.ibm.com>
---
 arch/blackfin/kernel/time-ts.c | 35 ++---------------------------------
 1 file changed, 2 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index 8c9a43daf80f..4a013714500b 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -23,29 +23,6 @@
 #include <asm/gptimers.h>
 #include <asm/nmi.h>
 
-/* Accelerators for sched_clock()
- * convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_khz * 10^3))
- *		ns = cycles * (10^6 / cpu_khz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^6 * SC / cpu_khz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- *  (mathieu.desnoyers@polymtl.ca)
- *
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
 #if defined(CONFIG_CYCLES_CLOCKSOURCE)
 
@@ -63,7 +40,6 @@ static struct clocksource bfin_cs_cycles = {
 	.rating		= 400,
 	.read		= bfin_read_cycles,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.shift		= CYC2NS_SCALE_FACTOR,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -75,10 +51,7 @@ static inline unsigned long long bfin_cs_cycles_sched_clock(void)
 
 static int __init bfin_cs_cycles_init(void)
 {
-	bfin_cs_cycles.mult = \
-		clocksource_hz2mult(get_cclk(), bfin_cs_cycles.shift);
-
-	if (clocksource_register(&bfin_cs_cycles))
+	if (clocksource_register_hz(&bfin_cs_cycles, get_cclk()))
 		panic("failed to register clocksource");
 
 	return 0;
@@ -111,7 +84,6 @@ static struct clocksource bfin_cs_gptimer0 = {
 	.rating		= 350,
 	.read		= bfin_read_gptimer0,
 	.mask		= CLOCKSOURCE_MASK(32),
-	.shift		= CYC2NS_SCALE_FACTOR,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -125,10 +97,7 @@ static int __init bfin_cs_gptimer0_init(void)
 {
 	setup_gptimer0();
 
-	bfin_cs_gptimer0.mult = \
-		clocksource_hz2mult(get_sclk(), bfin_cs_gptimer0.shift);
-
-	if (clocksource_register(&bfin_cs_gptimer0))
+	if (clocksource_register_hz(&bfin_cs_gptimer0, get_sclk()))
 		panic("failed to register clocksource");
 
 	return 0;
-- 
cgit v1.2.3


From 45bb1674b976ef81429c1e42de05844b49d45dea Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Sun, 13 Mar 2011 15:10:17 +0000
Subject: x86, olpc: Use device tree for platform identification

Make OLPC fully depend on device tree, and use it to identify the OLPC
platform details. Some nodes are exposed as platform devices where we
plan to use device tree for device probing.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
LKML-Reference: <20110313151017.C255F9D401E@zog.reactivated.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig                 |  2 +-
 arch/x86/include/asm/olpc_ofw.h  |  9 +++----
 arch/x86/platform/olpc/Makefile  |  4 +---
 arch/x86/platform/olpc/olpc.c    | 51 +++++++++++++++++++++-------------------
 arch/x86/platform/olpc/olpc_dt.c | 19 +++++++++++++++
 5 files changed, 51 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b4c2e9c67623..471221bcc4fa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2071,7 +2071,7 @@ config OLPC
 	depends on !X86_PAE
 	select GPIOLIB
 	select OF
-	select OF_PROMTREE if PROC_DEVICETREE
+	select OF_PROMTREE
 	---help---
 	  Add support for detecting the unique features of the OLPC
 	  XO hardware.
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index c5d3a5abbb9f..24487712e0b1 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -26,15 +26,12 @@ extern void setup_olpc_ofw_pgd(void);
 /* check if OFW was detected during boot */
 extern bool olpc_ofw_present(void);
 
+extern void olpc_dt_build_devicetree(void);
+
 #else /* !CONFIG_OLPC */
 static inline void olpc_ofw_detect(void) { }
 static inline void setup_olpc_ofw_pgd(void) { }
-#endif /* !CONFIG_OLPC */
-
-#ifdef CONFIG_OF_PROMTREE
-extern void olpc_dt_build_devicetree(void);
-#else
 static inline void olpc_dt_build_devicetree(void) { }
-#endif
+#endif /* !CONFIG_OLPC */
 
 #endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index c2a8cab65e5d..81c5e2165c24 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,2 @@
-obj-$(CONFIG_OLPC)		+= olpc.o
+obj-$(CONFIG_OLPC)		+= olpc.o olpc_ofw.o olpc_dt.o
 obj-$(CONFIG_OLPC_XO1)		+= olpc-xo1.o
-obj-$(CONFIG_OLPC)		+= olpc_ofw.o
-obj-$(CONFIG_OF_PROMTREE)	+= olpc_dt.o
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index edaf3fe8dc5e..0060fd59ea00 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/string.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 
 #include <asm/geode.h>
 #include <asm/setup.h>
@@ -187,41 +188,43 @@ err:
 }
 EXPORT_SYMBOL_GPL(olpc_ec_cmd);
 
-static bool __init check_ofw_architecture(void)
+static bool __init check_ofw_architecture(struct device_node *root)
 {
-	size_t propsize;
-	char olpc_arch[5];
-	const void *args[] = { NULL, "architecture", olpc_arch, (void *)5 };
-	void *res[] = { &propsize };
+	const char *olpc_arch;
+	int propsize;
 
-	if (olpc_ofw("getprop", args, res)) {
-		printk(KERN_ERR "ofw: getprop call failed!\n");
-		return false;
-	}
+	olpc_arch = of_get_property(root, "architecture", &propsize);
 	return propsize == 5 && strncmp("OLPC", olpc_arch, 5) == 0;
 }
 
-static u32 __init get_board_revision(void)
+static u32 __init get_board_revision(struct device_node *root)
 {
-	size_t propsize;
-	__be32 rev;
-	const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
-	void *res[] = { &propsize };
-
-	if (olpc_ofw("getprop", args, res) || propsize != 4) {
-		printk(KERN_ERR "ofw: getprop call failed!\n");
-		return cpu_to_be32(0);
-	}
-	return be32_to_cpu(rev);
+	int propsize;
+	const __be32 *rev;
+
+	rev = of_get_property(root, "board-revision-int", &propsize);
+	if (propsize != 4)
+		return 0;
+
+	return be32_to_cpu(*rev);
 }
 
 static bool __init platform_detect(void)
 {
-	if (!check_ofw_architecture())
+	struct device_node *root = of_find_node_by_path("/");
+	bool success;
+
+	if (!root)
 		return false;
-	olpc_platform_info.flags |= OLPC_F_PRESENT;
-	olpc_platform_info.boardrev = get_board_revision();
-	return true;
+
+	success = check_ofw_architecture(root);
+	if (success) {
+		olpc_platform_info.boardrev = get_board_revision(root);
+		olpc_platform_info.flags |= OLPC_F_PRESENT;
+	}
+
+	of_node_put(root);
+	return success;
 }
 
 static int __init add_xo1_platform_devices(void)
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index dab874647530..4ce208f885ef 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -19,7 +19,9 @@
 #include <linux/kernel.h>
 #include <linux/bootmem.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/of_pdt.h>
+#include <asm/olpc.h>
 #include <asm/olpc_ofw.h>
 
 static phandle __init olpc_dt_getsibling(phandle node)
@@ -181,3 +183,20 @@ void __init olpc_dt_build_devicetree(void)
 	pr_info("PROM DT: Built device tree with %u bytes of memory.\n",
 			prom_early_allocated);
 }
+
+/* A list of DT node/bus matches that we want to expose as platform devices */
+static struct of_device_id __initdata of_ids[] = {
+	{ .compatible = "olpc,xo1-battery" },
+	{ .compatible = "olpc,xo1-dcon" },
+	{ .compatible = "olpc,xo1-rtc" },
+	{},
+};
+
+static int __init olpc_create_platform_devices(void)
+{
+	if (machine_is_olpc())
+		return of_platform_bus_probe(NULL, of_ids, NULL);
+	else
+		return 0;
+}
+device_initcall(olpc_create_platform_devices);
-- 
cgit v1.2.3


From 5d94e81f69d4b1d1102d3ab557ce0a817c11fbbb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Mar 2011 10:36:19 -0800
Subject: x86: Introduce pci_map_biosrom()

The isci driver needs to retrieve its preboot OROM image which contains
necessary runtime parameters like platform specific sas addresses and
phy configuration.  There is no ROM BAR associated with this area,
instead we will need to scan legacy expansion ROM space.

1/ Promote the probe_roms_32 implementation to x86-64
2/ Add a facility to find and map an adapter rom by pci device (according to
   PCI Firmware Specification Revision 3.0)

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
LKML-Reference: <20110308183226.6246.90354.stgit@localhost6.localdomain6>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/probe_roms.h |   8 ++
 arch/x86/include/asm/setup.h      |   2 +-
 arch/x86/kernel/Makefile          |   2 +-
 arch/x86/kernel/head32.c          |   1 -
 arch/x86/kernel/probe_roms.c      | 267 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/probe_roms_32.c   | 166 ------------------------
 arch/x86/kernel/x86_init.c        |   2 +-
 7 files changed, 278 insertions(+), 170 deletions(-)
 create mode 100644 arch/x86/include/asm/probe_roms.h
 create mode 100644 arch/x86/kernel/probe_roms.c
 delete mode 100644 arch/x86/kernel/probe_roms_32.c

(limited to 'arch')

diff --git a/arch/x86/include/asm/probe_roms.h b/arch/x86/include/asm/probe_roms.h
new file mode 100644
index 000000000000..4950a0b1d09c
--- /dev/null
+++ b/arch/x86/include/asm/probe_roms.h
@@ -0,0 +1,8 @@
+#ifndef _PROBE_ROMS_H_
+#define _PROBE_ROMS_H_
+struct pci_dev;
+
+extern void __iomem *pci_map_biosrom(struct pci_dev *pdev);
+extern void pci_unmap_biosrom(void __iomem *rom);
+extern size_t pci_biosrom_size(struct pci_dev *pdev);
+#endif
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index db8aa19a08a2..03d3a32ace20 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -104,10 +104,10 @@ void *extend_brk(size_t size, size_t align);
 	type *name;					\
 	RESERVE_BRK(name, sizeof(type) * entries)
 
+extern void probe_roms(void);
 #ifdef __i386__
 
 void __init i386_start_kernel(void);
-extern void probe_roms(void);
 
 #else
 void __init x86_64_start_kernel(char *real_mode);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 62445ba2f8a8..f33d738c8da9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,7 +36,7 @@ obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o ldt.o dumpstack.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
-obj-$(CONFIG_X86_32)	+= probe_roms_32.o
+obj-y			+= probe_roms.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 7f138b3c3c52..eab4940c7306 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -23,7 +23,6 @@
 static void __init i386_default_early_setup(void)
 {
 	/* Initialize 32bit specific setup functions */
-	x86_init.resources.probe_roms = probe_roms;
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
 
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
new file mode 100644
index 000000000000..ba0a4cce53be
--- /dev/null
+++ b/arch/x86/kernel/probe_roms.c
@@ -0,0 +1,267 @@
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/mmzone.h>
+#include <linux/ioport.h>
+#include <linux/seq_file.h>
+#include <linux/console.h>
+#include <linux/init.h>
+#include <linux/edd.h>
+#include <linux/dmi.h>
+#include <linux/pfn.h>
+#include <linux/pci.h>
+#include <asm/pci-direct.h>
+
+
+#include <asm/e820.h>
+#include <asm/mmzone.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/setup_arch.h>
+
+static struct resource system_rom_resource = {
+	.name	= "System ROM",
+	.start	= 0xf0000,
+	.end	= 0xfffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource extension_rom_resource = {
+	.name	= "Extension ROM",
+	.start	= 0xe0000,
+	.end	= 0xeffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource adapter_rom_resources[] = { {
+	.name 	= "Adapter ROM",
+	.start	= 0xc8000,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+} };
+
+static struct resource video_rom_resource = {
+	.name 	= "Video ROM",
+	.start	= 0xc0000,
+	.end	= 0xc7fff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+/* does this oprom support the given pci device, or any of the devices
+ * that the driver supports?
+ */
+static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short device)
+{
+	struct pci_driver *drv = pdev->driver;
+	const struct pci_device_id *id;
+
+	if (pdev->vendor == vendor && pdev->device == device)
+		return true;
+
+	for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
+		if (id->vendor == vendor && id->device == device)
+			break;
+
+	return id && id->vendor;
+}
+
+static bool probe_list(struct pci_dev *pdev, unsigned short vendor,
+		       const unsigned char *rom_list)
+{
+	unsigned short device;
+
+	do {
+		if (probe_kernel_address(rom_list, device) != 0)
+			device = 0;
+
+		if (device && match_id(pdev, vendor, device))
+			break;
+
+		rom_list += 2;
+	} while (device);
+
+	return !!device;
+}
+
+static struct resource *find_oprom(struct pci_dev *pdev)
+{
+	struct resource *oprom = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
+		struct resource *res = &adapter_rom_resources[i];
+		unsigned short offset, vendor, device, list, rev;
+		const unsigned char *rom;
+
+		if (res->end == 0)
+			break;
+
+		rom = isa_bus_to_virt(res->start);
+		if (probe_kernel_address(rom + 0x18, offset) != 0)
+			continue;
+
+		if (probe_kernel_address(rom + offset + 0x4, vendor) != 0)
+			continue;
+
+		if (probe_kernel_address(rom + offset + 0x6, device) != 0)
+			continue;
+
+		if (match_id(pdev, vendor, device)) {
+			oprom = res;
+			break;
+		}
+
+		if (probe_kernel_address(rom + offset + 0x8, list) == 0 &&
+		    probe_kernel_address(rom + offset + 0xc, rev) == 0 &&
+		    rev >= 3 && list &&
+		    probe_list(pdev, vendor, rom + offset + list)) {
+			oprom = res;
+			break;
+		}
+	}
+
+	return oprom;
+}
+
+void *pci_map_biosrom(struct pci_dev *pdev)
+{
+	struct resource *oprom = find_oprom(pdev);
+
+	if (!oprom)
+		return NULL;
+
+	return ioremap(oprom->start, resource_size(oprom));
+}
+EXPORT_SYMBOL(pci_map_biosrom);
+
+void pci_unmap_biosrom(void __iomem *image)
+{
+	iounmap(image);
+}
+EXPORT_SYMBOL(pci_unmap_biosrom);
+
+size_t pci_biosrom_size(struct pci_dev *pdev)
+{
+	struct resource *oprom = find_oprom(pdev);
+
+	return oprom ? resource_size(oprom) : 0;
+}
+EXPORT_SYMBOL(pci_biosrom_size);
+
+#define ROMSIGNATURE 0xaa55
+
+static int __init romsignature(const unsigned char *rom)
+{
+	const unsigned short * const ptr = (const unsigned short *)rom;
+	unsigned short sig;
+
+	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
+}
+
+static int __init romchecksum(const unsigned char *rom, unsigned long length)
+{
+	unsigned char sum, c;
+
+	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
+		sum += c;
+	return !length && !sum;
+}
+
+void __init probe_roms(void)
+{
+	const unsigned char *rom;
+	unsigned long start, length, upper;
+	unsigned char c;
+	int i;
+
+	/* video rom */
+	upper = adapter_rom_resources[0].start;
+	for (start = video_rom_resource.start; start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		video_rom_resource.start = start;
+
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = c * 512;
+
+		/* if checksum okay, trust length byte */
+		if (length && romchecksum(rom, length))
+			video_rom_resource.end = start + length - 1;
+
+		request_resource(&iomem_resource, &video_rom_resource);
+		break;
+	}
+
+	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+	if (start < upper)
+		start = upper;
+
+	/* system rom */
+	request_resource(&iomem_resource, &system_rom_resource);
+	upper = system_rom_resource.start;
+
+	/* check for extension rom (ignore length byte!) */
+	rom = isa_bus_to_virt(extension_rom_resource.start);
+	if (romsignature(rom)) {
+		length = extension_rom_resource.end - extension_rom_resource.start + 1;
+		if (romchecksum(rom, length)) {
+			request_resource(&iomem_resource, &extension_rom_resource);
+			upper = extension_rom_resource.start;
+		}
+	}
+
+	/* check for adapter roms on 2k boundaries */
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		if (probe_kernel_address(rom + 2, c) != 0)
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = c * 512;
+
+		/* but accept any length that fits if checksum okay */
+		if (!length || start + length > upper || !romchecksum(rom, length))
+			continue;
+
+		adapter_rom_resources[i].start = start;
+		adapter_rom_resources[i].end = start + length - 1;
+		request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+		start = adapter_rom_resources[i++].end & ~2047UL;
+	}
+}
+
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
deleted file mode 100644
index 071e7fea42e5..000000000000
--- a/arch/x86/kernel/probe_roms_32.c
+++ /dev/null
@@ -1,166 +0,0 @@
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/uaccess.h>
-#include <linux/mmzone.h>
-#include <linux/ioport.h>
-#include <linux/seq_file.h>
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/edd.h>
-#include <linux/dmi.h>
-#include <linux/pfn.h>
-#include <linux/pci.h>
-#include <asm/pci-direct.h>
-
-
-#include <asm/e820.h>
-#include <asm/mmzone.h>
-#include <asm/setup.h>
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/setup_arch.h>
-
-static struct resource system_rom_resource = {
-	.name	= "System ROM",
-	.start	= 0xf0000,
-	.end	= 0xfffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource extension_rom_resource = {
-	.name	= "Extension ROM",
-	.start	= 0xe0000,
-	.end	= 0xeffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource adapter_rom_resources[] = { {
-	.name 	= "Adapter ROM",
-	.start	= 0xc8000,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-} };
-
-static struct resource video_rom_resource = {
-	.name 	= "Video ROM",
-	.start	= 0xc0000,
-	.end	= 0xc7fff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-#define ROMSIGNATURE 0xaa55
-
-static int __init romsignature(const unsigned char *rom)
-{
-	const unsigned short * const ptr = (const unsigned short *)rom;
-	unsigned short sig;
-
-	return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
-}
-
-static int __init romchecksum(const unsigned char *rom, unsigned long length)
-{
-	unsigned char sum, c;
-
-	for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
-		sum += c;
-	return !length && !sum;
-}
-
-void __init probe_roms(void)
-{
-	const unsigned char *rom;
-	unsigned long start, length, upper;
-	unsigned char c;
-	int i;
-
-	/* video rom */
-	upper = adapter_rom_resources[0].start;
-	for (start = video_rom_resource.start; start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		video_rom_resource.start = start;
-
-		if (probe_kernel_address(rom + 2, c) != 0)
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = c * 512;
-
-		/* if checksum okay, trust length byte */
-		if (length && romchecksum(rom, length))
-			video_rom_resource.end = start + length - 1;
-
-		request_resource(&iomem_resource, &video_rom_resource);
-		break;
-	}
-
-	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
-	if (start < upper)
-		start = upper;
-
-	/* system rom */
-	request_resource(&iomem_resource, &system_rom_resource);
-	upper = system_rom_resource.start;
-
-	/* check for extension rom (ignore length byte!) */
-	rom = isa_bus_to_virt(extension_rom_resource.start);
-	if (romsignature(rom)) {
-		length = extension_rom_resource.end - extension_rom_resource.start + 1;
-		if (romchecksum(rom, length)) {
-			request_resource(&iomem_resource, &extension_rom_resource);
-			upper = extension_rom_resource.start;
-		}
-	}
-
-	/* check for adapter roms on 2k boundaries */
-	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		if (probe_kernel_address(rom + 2, c) != 0)
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = c * 512;
-
-		/* but accept any length that fits if checksum okay */
-		if (!length || start + length > upper || !romchecksum(rom, length))
-			continue;
-
-		adapter_rom_resources[i].start = start;
-		adapter_rom_resources[i].end = start + length - 1;
-		request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
-		start = adapter_rom_resources[i++].end & ~2047UL;
-	}
-}
-
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128b..6eee0828e327 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -35,7 +35,7 @@ void iommu_shutdown_noop(void) { }
 struct x86_init_ops x86_init __initdata = {
 
 	.resources = {
-		.probe_roms		= x86_init_noop,
+		.probe_roms		= probe_roms,
 		.reserve_resources	= reserve_standard_io_resources,
 		.memory_setup		= default_machine_specific_memory_setup,
 	},
-- 
cgit v1.2.3


From 9f1f1bfd8d7e579f07dbe56d6f93bd594da43b3d Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Wed, 23 Mar 2011 21:31:40 +0600
Subject: x86, mpparse: Remove unnecessary variable

'ret' isn't used by check_slot(), gets initialized but has no real use,
so remove it.

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
LKML-Reference: <AANLkTikh3y+is3xixKBdyHhr_cHxzPFJF729Fcvt8+Ns@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6f789a887c06..ef32d4c09c64 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -722,14 +722,12 @@ inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
 static int
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	int ret = 0;
-
 	if (!mpc_new_phys || count <= mpc_new_length) {
 		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
 		return -1;
 	}
 
-	return ret;
+	return 0;
 }
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
-- 
cgit v1.2.3


From 8e8ec596e6c0144e2dd500a57ee23dde9684df46 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Sun, 13 Mar 2011 16:54:26 +0800
Subject: crypto: caam - Add support for the Freescale SEC4/CAAM

The SEC4 supercedes the SEC2.x/3.x as Freescale's
Integrated Security Engine.  Its programming model is
incompatible with all prior versions of the SEC (talitos).

The SEC4 is also known as the Cryptographic Accelerator
and Assurance Module (CAAM); this driver is named caam.

This initial submission does not include support for Data Path
mode operation - AEAD descriptors are submitted via the job
ring interface, while the Queue Interface (QI) is enabled
for use by others.  Only AEAD algorithms are implemented
at this time, for use with IPsec.

Many thanks to the Freescale STC team for their contributions
to this driver.

Signed-off-by: Steve Cornelius <sec@pobox.com>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/crypto/fsl-sec4.txt        |  409 +++++
 arch/powerpc/boot/dts/p4080ds.dts                  |   95 +-
 drivers/crypto/Kconfig                             |    2 +
 drivers/crypto/Makefile                            |    1 +
 drivers/crypto/caam/Kconfig                        |   72 +
 drivers/crypto/caam/Makefile                       |    8 +
 drivers/crypto/caam/caamalg.c                      | 1163 ++++++++++++++
 drivers/crypto/caam/compat.h                       |   35 +
 drivers/crypto/caam/ctrl.c                         |  270 ++++
 drivers/crypto/caam/desc.h                         | 1605 ++++++++++++++++++++
 drivers/crypto/caam/desc_constr.h                  |  204 +++
 drivers/crypto/caam/error.c                        |  248 +++
 drivers/crypto/caam/error.h                        |   10 +
 drivers/crypto/caam/intern.h                       |  113 ++
 drivers/crypto/caam/jr.c                           |  523 +++++++
 drivers/crypto/caam/jr.h                           |   21 +
 drivers/crypto/caam/regs.h                         |  663 ++++++++
 17 files changed, 5441 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/crypto/fsl-sec4.txt
 create mode 100644 drivers/crypto/caam/Kconfig
 create mode 100644 drivers/crypto/caam/Makefile
 create mode 100644 drivers/crypto/caam/caamalg.c
 create mode 100644 drivers/crypto/caam/compat.h
 create mode 100644 drivers/crypto/caam/ctrl.c
 create mode 100644 drivers/crypto/caam/desc.h
 create mode 100644 drivers/crypto/caam/desc_constr.h
 create mode 100644 drivers/crypto/caam/error.c
 create mode 100644 drivers/crypto/caam/error.h
 create mode 100644 drivers/crypto/caam/intern.h
 create mode 100644 drivers/crypto/caam/jr.c
 create mode 100644 drivers/crypto/caam/jr.h
 create mode 100644 drivers/crypto/caam/regs.h

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
new file mode 100644
index 000000000000..fce16a85e2c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -0,0 +1,409 @@
+=====================================================================
+SEC 4 Device Tree Binding
+Copyright (C) 2008-2011 Freescale Semiconductor Inc.
+
+ CONTENTS
+   -Overview
+   -SEC 4 Node
+   -Job Ring Node
+   -Run Time Integrity Check (RTIC) Node
+   -Run Time Integrity Check (RTIC) Memory Node
+   -Secure Non-Volatile Storage (SNVS) Node
+   -Full Example
+
+NOTE: the SEC 4 is also known as Freescale's Cryptographic Accelerator
+Accelerator and Assurance Module (CAAM).
+
+=====================================================================
+Overview
+
+DESCRIPTION
+
+SEC 4 h/w can process requests from 2 types of sources.
+1. DPAA Queue Interface (HW interface between Queue Manager & SEC 4).
+2. Job Rings (HW interface between cores & SEC 4 registers).
+
+High Speed Data Path Configuration:
+
+HW interface between QM & SEC 4 and also BM & SEC 4, on DPAA-enabled parts
+such as the P4080.  The number of simultaneous dequeues the QI can make is
+equal to the number of Descriptor Controller (DECO) engines in a particular
+SEC version.  E.g., the SEC 4.0 in the P4080 has 5 DECOs and can thus
+dequeue from 5 subportals simultaneously.
+
+Job Ring Data Path Configuration:
+
+Each JR is located on a separate 4k page, they may (or may not) be made visible
+in the memory partition devoted to a particular core.  The P4080 has 4 JRs, so
+up to 4 JRs can be configured; and all 4 JRs process requests in parallel.
+
+=====================================================================
+P4080 SEC 4 Node
+
+Description
+
+    Node defines the base address of the SEC 4 block.
+    This block specifies the address range of all global
+    configuration registers for the SEC 4 block.  It
+    also receives interrupts from the Run Time Integrity Check
+    (RTIC) function within the SEC 4 block.
+
+PROPERTIES
+
+   - compatible
+      Usage: required
+      Value type: <string>
+      Definition: Must include "fsl,p4080-sec4.0","fsl,sec-4.0"
+
+   - #address-cells
+       Usage: required
+       Value type: <u32>
+       Definition: A standard property.  Defines the number of cells
+           for representing physical addresses in child nodes.
+
+   - #size-cells
+       Usage: required
+       Value type: <u32>
+       Definition: A standard property.  Defines the number of cells
+           for representing the size of physical addresses in
+           child nodes.
+
+   - reg
+      Usage: required
+      Value type: <prop-encoded-array>
+      Definition: A standard property.  Specifies the physical
+          address and length of the SEC4.0 configuration registers.
+          registers
+
+   - ranges
+       Usage: required
+       Value type: <prop-encoded-array>
+       Definition: A standard property.  Specifies the physical address
+           range of the SEC 4.0 register space (-SNVS not included).  A
+           triplet that includes the child address, parent address, &
+           length.
+
+   - interrupts
+      Usage: required
+      Value type: <prop_encoded-array>
+      Definition:  Specifies the interrupts generated by this
+           device.  The value of the interrupts property
+           consists of one interrupt specifier. The format
+           of the specifier is defined by the binding document
+           describing the node's interrupt parent.
+
+   - interrupt-parent
+      Usage: (required if interrupt property is defined)
+      Value type: <phandle>
+      Definition: A single <phandle> value that points
+          to the interrupt parent to which the child domain
+          is being mapped.
+
+   Note: All other standard properties (see the ePAPR) are allowed
+   but are optional.
+
+
+EXAMPLE
+	crypto@300000 {
+		compatible = "fsl,p4080-sec4.0", "fsl,sec4.0";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x300000 0x10000>;
+		ranges = <0 0x300000 0x10000>;
+		interrupt-parent = <&mpic>;
+		interrupts = <92 2>;
+	};
+
+=====================================================================
+P4080 Job Ring (JR) Node
+
+    Child of the crypto node defines data processing interface to SEC 4
+    across the peripheral bus for purposes of processing
+    cryptographic descriptors. The specified address
+    range can be made visible to one (or more) cores.
+    The interrupt defined for this node is controlled within
+    the address range of this node.
+
+  - compatible
+      Usage: required
+      Value type: <string>
+      Definition: Must include "fsl,p4080-sec4.0-job-ring","fsl,sec4.0-job-ring"
+
+  - reg
+      Usage: required
+      Value type: <prop-encoded-array>
+      Definition: Specifies a two JR parameters:  an offset from
+          the parent physical address and the length the JR registers.
+
+   - fsl,liodn
+       Usage: optional-but-recommended
+       Value type: <prop-encoded-array>
+       Definition:
+           Specifies the LIODN to be used in conjunction with
+           the ppid-to-liodn table that specifies the PPID to LIODN mapping.
+           Needed if the PAMU is used.  Value is a 12 bit value
+           where value is a LIODN ID for this JR. This property is
+           normally set by boot firmware.
+
+   - interrupts
+      Usage: required
+      Value type: <prop_encoded-array>
+      Definition:  Specifies the interrupts generated by this
+           device.  The value of the interrupts property
+           consists of one interrupt specifier. The format
+           of the specifier is defined by the binding document
+           describing the node's interrupt parent.
+
+   - interrupt-parent
+      Usage: (required if interrupt property is defined)
+      Value type: <phandle>
+      Definition: A single <phandle> value that points
+          to the interrupt parent to which the child domain
+          is being mapped.
+
+EXAMPLE
+	jr@1000 {
+		compatible = "fsl,p4080-sec4.0-job-ring",
+			     "fsl,sec4.0-job-ring";
+		reg = <0x1000 0x1000>;
+		fsl,liodn = <0x081>;
+		interrupt-parent = <&mpic>;
+		interrupts = <88 2>;
+	};
+
+
+=====================================================================
+P4080 Run Time Integrity Check (RTIC) Node
+
+  Child node of the crypto node.  Defines a register space that
+  contains up to 5 sets of addresses and their lengths (sizes) that
+  will be checked at run time.  After an initial hash result is
+  calculated, these addresses are checked by HW to monitor any
+  change.  If any memory is modified, a Security Violation is
+  triggered (see SNVS definition).
+
+
+  - compatible
+      Usage: required
+      Value type: <string>
+      Definition: Must include "fsl,p4080-sec4.0-rtic","fsl,sec4.0-rtic".
+
+   - #address-cells
+       Usage: required
+       Value type: <u32>
+       Definition: A standard property.  Defines the number of cells
+           for representing physical addresses in child nodes.  Must
+           have a value of 1.
+
+   - #size-cells
+       Usage: required
+       Value type: <u32>
+       Definition: A standard property.  Defines the number of cells
+           for representing the size of physical addresses in
+           child nodes.  Must have a value of 1.
+
+  - reg
+      Usage: required
+      Value type: <prop-encoded-array>
+      Definition: A standard property.  Specifies a two parameters:
+          an offset from the parent physical address and the length
+          the SEC4 registers.
+
+   - ranges
+       Usage: required
+       Value type: <prop-encoded-array>
+       Definition: A standard property.  Specifies the physical address
+           range of the SEC 4 register space (-SNVS not included).  A
+           triplet that includes the child address, parent address, &
+           length.
+
+EXAMPLE
+	rtic@6000 {
+		compatible = "fsl,p4080-sec4.0-rtic",
+			     "fsl,sec4.0-rtic";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x6000 0x100>;
+		ranges = <0x0 0x6100 0xe00>;
+	};
+
+=====================================================================
+P4080 Run Time Integrity Check (RTIC) Memory Node
+  A child node that defines individual RTIC memory regions that are used to
+  perform run-time integrity check of memory areas that should not modified.
+  The node defines a register that contains the memory address &
+  length (combined) and a second register that contains the hash result
+  in big endian format.
+
+  - compatible
+      Usage: required
+      Value type: <string>
+      Definition: Must include "fsl,p4080-sec4.0-rtic-memory","fsl,sec4.0-rtic-memory".
+
+  - reg
+      Usage: required
+      Value type: <prop-encoded-array>
+      Definition: A standard property.  Specifies two parameters:
+          an offset from the parent physical address and the length:
+
+          1. The location of the RTIC memory address & length registers.
+          2. The location RTIC hash result.
+
+  - fsl,rtic-region
+       Usage: optional-but-recommended
+       Value type: <prop-encoded-array>
+       Definition:
+           Specifies the HW address (36 bit address) for this region
+           followed by the length of the HW partition to be checked;
+           the address is represented as a 64 bit quantity followed
+           by a 32 bit length.
+
+   - fsl,liodn
+       Usage: optional-but-recommended
+       Value type: <prop-encoded-array>
+       Definition:
+           Specifies the LIODN to be used in conjunction with
+           the ppid-to-liodn table that specifies the PPID to LIODN
+           mapping.  Needed if the PAMU is used.  Value is a 12 bit value
+           where value is a LIODN ID for this RTIC memory region. This
+           property is normally set by boot firmware.
+
+EXAMPLE
+	rtic-a@0 {
+		compatible = "fsl,p4080-sec4.0-rtic-memory",
+			     "fsl,sec4.0-rtic-memory";
+		reg = <0x00 0x20 0x100 0x80>;
+		fsl,liodn   = <0x03c>;
+		fsl,rtic-region  = <0x12345678 0x12345678 0x12345678>;
+	};
+
+=====================================================================
+P4080 Secure Non-Volatile Storage (SNVS) Node
+
+    Node defines address range and the associated
+    interrupt for the SNVS function.  This function
+    monitors security state information & reports
+    security violations.
+
+  - compatible
+      Usage: required
+      Value type: <string>
+      Definition: Must include "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon".
+
+  - reg
+      Usage: required
+      Value type: <prop-encoded-array>
+      Definition: A standard property.  Specifies the physical
+          address and length of the SEC4 configuration
+          registers.
+
+   - interrupts
+      Usage: required
+      Value type: <prop_encoded-array>
+      Definition:  Specifies the interrupts generated by this
+           device.  The value of the interrupts property
+           consists of one interrupt specifier. The format
+           of the specifier is defined by the binding document
+           describing the node's interrupt parent.
+
+   - interrupt-parent
+      Usage: (required if interrupt property is defined)
+      Value type: <phandle>
+      Definition: A single <phandle> value that points
+          to the interrupt parent to which the child domain
+          is being mapped.
+
+EXAMPLE
+	sec_mon@314000 {
+		compatible = "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon";
+		reg = <0x314000 0x1000>;
+		interrupt-parent = <&mpic>;
+		interrupts = <93 2>;
+	};
+
+=====================================================================
+FULL EXAMPLE
+
+	crypto: crypto@300000 {
+		compatible = "fsl,p4080-sec4.0", "fsl,sec4.0";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0x300000 0x10000>;
+		ranges = <0 0x300000 0x10000>;
+		interrupt-parent = <&mpic>;
+		interrupts = <92 2>;
+
+		sec_jr0: jr@1000 {
+			compatible = "fsl,p4080-sec4.0-job-ring",
+				     "fsl,sec4.0-job-ring";
+			reg = <0x1000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <88 2>;
+		};
+
+		sec_jr1: jr@2000 {
+			compatible = "fsl,p4080-sec4.0-job-ring",
+				     "fsl,sec4.0-job-ring";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <89 2>;
+		};
+
+		sec_jr2: jr@3000 {
+			compatible = "fsl,p4080-sec4.0-job-ring",
+				     "fsl,sec4.0-job-ring";
+			reg = <0x3000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <90 2>;
+		};
+
+		sec_jr3: jr@4000 {
+			compatible = "fsl,p4080-sec4.0-job-ring",
+				     "fsl,sec4.0-job-ring";
+			reg = <0x4000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <91 2>;
+		};
+
+		rtic@6000 {
+			compatible = "fsl,p4080-sec4.0-rtic",
+				     "fsl,sec4.0-rtic";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x6000 0x100>;
+			ranges = <0x0 0x6100 0xe00>;
+
+			rtic_a: rtic-a@0 {
+				compatible = "fsl,p4080-sec4.0-rtic-memory",
+					     "fsl,sec4.0-rtic-memory";
+				reg = <0x00 0x20 0x100 0x80>;
+			};
+
+			rtic_b: rtic-b@20 {
+				compatible = "fsl,p4080-sec4.0-rtic-memory",
+					     "fsl,sec4.0-rtic-memory";
+				reg = <0x20 0x20 0x200 0x80>;
+			};
+
+			rtic_c: rtic-c@40 {
+				compatible = "fsl,p4080-sec4.0-rtic-memory",
+					     "fsl,sec4.0-rtic-memory";
+				reg = <0x40 0x20 0x300 0x80>;
+			};
+
+			rtic_d: rtic-d@60 {
+				compatible = "fsl,p4080-sec4.0-rtic-memory",
+					     "fsl,sec4.0-rtic-memory";
+				reg = <0x60 0x20 0x500 0x80>;
+			};
+		};
+	};
+
+	sec_mon: sec_mon@314000 {
+		compatible = "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon";
+		reg = <0x314000 0x1000>;
+		interrupt-parent = <&mpic>;
+		interrupts = <93 2>;
+	};
+
+=====================================================================
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 5b7fc29dd6cf..c78e80155019 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -1,7 +1,7 @@
 /*
  * P4080DS Device Tree Source
  *
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute	it and/or modify it
  * under  the terms of	the GNU General	 Public License as published by the
@@ -33,6 +33,17 @@
 		dma1 = &dma1;
 		sdhc = &sdhc;
 
+		crypto = &crypto;
+		sec_jr0 = &sec_jr0;
+		sec_jr1 = &sec_jr1;
+		sec_jr2 = &sec_jr2;
+		sec_jr3 = &sec_jr3;
+		rtic_a = &rtic_a;
+		rtic_b = &rtic_b;
+		rtic_c = &rtic_c;
+		rtic_d = &rtic_d;
+		sec_mon = &sec_mon;
+
 		rio0 = &rapidio0;
 	};
 
@@ -410,6 +421,88 @@
 			dr_mode = "host";
 			phy_type = "ulpi";
 		};
+
+		crypto: crypto@300000 {
+			compatible = "fsl,p4080-sec4.0", "fsl,sec4.0";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x300000 0x10000>;
+			ranges = <0 0x300000 0x10000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <92 2>;
+
+			sec_jr0: jr@1000 {
+				compatible = "fsl,p4080-sec4.0-job-ring",
+					     "fsl,sec4.0-job-ring";
+				reg = <0x1000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <88 2>;
+			};
+
+			sec_jr1: jr@2000 {
+				compatible = "fsl,p4080-sec4.0-job-ring",
+					     "fsl,sec4.0-job-ring";
+				reg = <0x2000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <89 2>;
+			};
+
+			sec_jr2: jr@3000 {
+				compatible = "fsl,p4080-sec4.0-job-ring",
+					     "fsl,sec4.0-job-ring";
+				reg = <0x3000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <90 2>;
+			};
+
+			sec_jr3: jr@4000 {
+				compatible = "fsl,p4080-sec4.0-job-ring",
+					     "fsl,sec4.0-job-ring";
+				reg = <0x4000 0x1000>;
+				interrupt-parent = <&mpic>;
+				interrupts = <91 2>;
+			};
+
+			rtic@6000 {
+				compatible = "fsl,p4080-sec4.0-rtic",
+					     "fsl,sec4.0-rtic";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0x6000 0x100>;
+				ranges = <0x0 0x6100 0xe00>;
+
+				rtic_a: rtic-a@0 {
+					compatible = "fsl,p4080-sec4.0-rtic-memory",
+						     "fsl,sec4.0-rtic-memory";
+					reg = <0x00 0x20 0x100 0x80>;
+				};
+
+				rtic_b: rtic-b@20 {
+					compatible = "fsl,p4080-sec4.0-rtic-memory",
+						     "fsl,sec4.0-rtic-memory";
+					reg = <0x20 0x20 0x200 0x80>;
+				};
+
+				rtic_c: rtic-c@40 {
+					compatible = "fsl,p4080-sec4.0-rtic-memory",
+						     "fsl,sec4.0-rtic-memory";
+					reg = <0x40 0x20 0x300 0x80>;
+				};
+
+				rtic_d: rtic-d@60 {
+					compatible = "fsl,p4080-sec4.0-rtic-memory",
+						     "fsl,sec4.0-rtic-memory";
+					reg = <0x60 0x20 0x500 0x80>;
+				};
+			};
+		};
+
+		sec_mon: sec_mon@314000 {
+			compatible = "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon";
+			reg = <0x314000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <93 2>;
+		};
 	};
 
 	rapidio0: rapidio@ffe0c0000 {
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index e54185223c8c..a27224aa883e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -200,6 +200,8 @@ config CRYPTO_DEV_HIFN_795X_RNG
 	  Select this option if you want to enable the random number generator
 	  on the HIFN 795x crypto adapters.
 
+source drivers/crypto/caam/Kconfig
+
 config CRYPTO_DEV_TALITOS
 	tristate "Talitos Freescale Security Engine (SEC)"
 	select CRYPTO_ALGAPI
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 5203e34248d7..663c5efec13b 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@ n2_crypto-y := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
 obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
new file mode 100644
index 000000000000..2d876bb98ff4
--- /dev/null
+++ b/drivers/crypto/caam/Kconfig
@@ -0,0 +1,72 @@
+config CRYPTO_DEV_FSL_CAAM
+	tristate "Freescale CAAM-Multicore driver backend"
+	depends on FSL_SOC
+	help
+	  Enables the driver module for Freescale's Cryptographic Accelerator
+	  and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
+	  This module adds a job ring operation interface, and configures h/w
+	  to operate as a DPAA component automatically, depending
+	  on h/w feature availability.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called caam.
+
+config CRYPTO_DEV_FSL_CAAM_RINGSIZE
+	int "Job Ring size"
+	depends on CRYPTO_DEV_FSL_CAAM
+	range 2 9
+	default "9"
+	help
+	  Select size of Job Rings as a power of 2, within the
+	  range 2-9 (ring size 4-512).
+	  Examples:
+		2 => 4
+		3 => 8
+		4 => 16
+		5 => 32
+		6 => 64
+		7 => 128
+		8 => 256
+		9 => 512
+
+config CRYPTO_DEV_FSL_CAAM_INTC
+	bool "Job Ring interrupt coalescing"
+	depends on CRYPTO_DEV_FSL_CAAM
+	default y
+	help
+	  Enable the Job Ring's interrupt coalescing feature.
+
+config CRYPTO_DEV_FSL_CAAM_INTC_COUNT_THLD
+	int "Job Ring interrupt coalescing count threshold"
+	depends on CRYPTO_DEV_FSL_CAAM_INTC
+	range 1 255
+	default 255
+	help
+	  Select number of descriptor completions to queue before
+	  raising an interrupt, in the range 1-255. Note that a selection
+	  of 1 functionally defeats the coalescing feature, and a selection
+	  equal or greater than the job ring size will force timeouts.
+
+config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
+	int "Job Ring interrupt coalescing timer threshold"
+	depends on CRYPTO_DEV_FSL_CAAM_INTC
+	range 1 65535
+	default 2048
+	help
+	  Select number of bus clocks/64 to timeout in the case that one or
+	  more descriptor completions are queued without reaching the count
+	  threshold. Range is 1-65535.
+
+config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
+	tristate "Register algorithm implementations with the Crypto API"
+	depends on CRYPTO_DEV_FSL_CAAM
+	default y
+	select CRYPTO_ALGAPI
+	select CRYPTO_AUTHENC
+	help
+	  Selecting this will offload crypto for users of the
+	  scatterlist crypto API (such as the linux native IPSec
+	  stack) to the SEC4 via job ring.
+
+	  To compile this as a module, choose M here: the module
+	  will be called caamalg.
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
new file mode 100644
index 000000000000..ef39011b4505
--- /dev/null
+++ b/drivers/crypto/caam/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the CAAM backend and dependent components
+#
+
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
+
+caam-objs := ctrl.o jr.o error.o
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
new file mode 100644
index 000000000000..d7fe3d3d7db9
--- /dev/null
+++ b/drivers/crypto/caam/caamalg.c
@@ -0,0 +1,1163 @@
+/*
+ * caam - Freescale FSL CAAM support for crypto API
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ * Based on talitos crypto API driver.
+ *
+ * relationship of job descriptors to shared descriptors (SteveC Dec 10 2008):
+ *
+ * ---------------                     ---------------
+ * | JobDesc #1  |-------------------->|  ShareDesc  |
+ * | *(packet 1) |                     |   (PDB)     |
+ * ---------------      |------------->|  (hashKey)  |
+ *       .              |              | (cipherKey) |
+ *       .              |    |-------->| (operation) |
+ * ---------------      |    |         ---------------
+ * | JobDesc #2  |------|    |
+ * | *(packet 2) |           |
+ * ---------------           |
+ *       .                   |
+ *       .                   |
+ * ---------------           |
+ * | JobDesc #3  |------------
+ * | *(packet 3) |
+ * ---------------
+ *
+ * The SharedDesc never changes for a connection unless rekeyed, but
+ * each packet will likely be in a different place. So all we need
+ * to know to process the packet is where the input is, where the
+ * output goes, and what context we want to process with. Context is
+ * in the SharedDesc, packet references in the JobDesc.
+ *
+ * So, a job desc looks like:
+ *
+ * ---------------------
+ * | Header            |
+ * | ShareDesc Pointer |
+ * | SEQ_OUT_PTR       |
+ * | (output buffer)   |
+ * | SEQ_IN_PTR        |
+ * | (input buffer)    |
+ * | LOAD (to DECO)    |
+ * ---------------------
+ */
+
+#include "compat.h"
+
+#include "regs.h"
+#include "intern.h"
+#include "desc_constr.h"
+#include "jr.h"
+#include "error.h"
+
+/*
+ * crypto alg
+ */
+#define CAAM_CRA_PRIORITY		3000
+/* max key is sum of AES_MAX_KEY_SIZE, max split key size */
+#define CAAM_MAX_KEY_SIZE		(AES_MAX_KEY_SIZE + \
+					 SHA512_DIGEST_SIZE * 2)
+/* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
+#define CAAM_MAX_IV_LENGTH		16
+
+#ifdef DEBUG
+/* for print_hex_dumps with line references */
+#define xstr(s) str(s)
+#define str(s) #s
+#define debug(format, arg...) printk(format, arg)
+#else
+#define debug(format, arg...)
+#endif
+
+/*
+ * per-session context
+ */
+struct caam_ctx {
+	struct device *jrdev;
+	u32 *sh_desc;
+	dma_addr_t shared_desc_phys;
+	u32 class1_alg_type;
+	u32 class2_alg_type;
+	u32 alg_op;
+	u8 *key;
+	dma_addr_t key_phys;
+	unsigned int keylen;
+	unsigned int enckeylen;
+	unsigned int authkeylen;
+	unsigned int split_key_len;
+	unsigned int split_key_pad_len;
+	unsigned int authsize;
+};
+
+static int aead_authenc_setauthsize(struct crypto_aead *authenc,
+				    unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+
+	return 0;
+}
+
+struct split_key_result {
+	struct completion completion;
+	int err;
+};
+
+static void split_key_done(struct device *dev, u32 *desc, u32 err,
+			   void *context)
+{
+	struct split_key_result *res = context;
+
+#ifdef DEBUG
+	dev_err(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+	if (err) {
+		char tmp[256];
+
+		dev_err(dev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	res->err = err;
+
+	complete(&res->completion);
+}
+
+/*
+get a split ipad/opad key
+
+Split key generation-----------------------------------------------
+
+[00] 0xb0810008    jobdesc: stidx=1 share=never len=8
+[01] 0x04000014        key: class2->keyreg len=20
+			@0xffe01000
+[03] 0x84410014  operation: cls2-op sha1 hmac init dec
+[04] 0x24940000     fifold: class2 msgdata-last2 len=0 imm
+[05] 0xa4000001       jump: class2 local all ->1 [06]
+[06] 0x64260028    fifostr: class2 mdsplit-jdk len=40
+			@0xffe04000
+*/
+static u32 gen_split_key(struct caam_ctx *ctx, const u8 *key_in, u32 authkeylen)
+{
+	struct device *jrdev = ctx->jrdev;
+	u32 *desc;
+	struct split_key_result result;
+	dma_addr_t dma_addr_in, dma_addr_out;
+	int ret = 0;
+
+	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+
+	init_job_desc(desc, 0);
+
+	dma_addr_in = dma_map_single(jrdev, (void *)key_in, authkeylen,
+				     DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, dma_addr_in)) {
+		dev_err(jrdev, "unable to map key input memory\n");
+		kfree(desc);
+		return -ENOMEM;
+	}
+	append_key(desc, dma_addr_in, authkeylen, CLASS_2 |
+		       KEY_DEST_CLASS_REG);
+
+	/* Sets MDHA up into an HMAC-INIT */
+	append_operation(desc, ctx->alg_op | OP_ALG_DECRYPT |
+			     OP_ALG_AS_INIT);
+
+	/*
+	 * do a FIFO_LOAD of zero, this will trigger the internal key expansion
+	   into both pads inside MDHA
+	 */
+	append_fifo_load_as_imm(desc, NULL, 0, LDST_CLASS_2_CCB |
+				FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST2);
+
+	/*
+	 * FIFO_STORE with the explicit split-key content store
+	 * (0x26 output type)
+	 */
+	dma_addr_out = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
+				      DMA_FROM_DEVICE);
+	if (dma_mapping_error(jrdev, dma_addr_out)) {
+		dev_err(jrdev, "unable to map key output memory\n");
+		kfree(desc);
+		return -ENOMEM;
+	}
+	append_fifo_store(desc, dma_addr_out, ctx->split_key_len,
+			  LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key_in, authkeylen, 1);
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+
+	result.err = 0;
+	init_completion(&result.completion);
+
+	ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result);
+	if (!ret) {
+		/* in progress */
+		wait_for_completion_interruptible(&result.completion);
+		ret = result.err;
+#ifdef DEBUG
+		print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
+			       ctx->split_key_pad_len, 1);
+#endif
+	}
+
+	dma_unmap_single(jrdev, dma_addr_out, ctx->split_key_pad_len,
+			 DMA_FROM_DEVICE);
+	dma_unmap_single(jrdev, dma_addr_in, authkeylen, DMA_TO_DEVICE);
+
+	kfree(desc);
+
+	return ret;
+}
+
+static int build_sh_desc_ipsec(struct caam_ctx *ctx)
+{
+	struct device *jrdev = ctx->jrdev;
+	u32 *sh_desc;
+	u32 *jump_cmd;
+
+	/* build shared descriptor for this session */
+	sh_desc = kmalloc(CAAM_CMD_SZ * 4 + ctx->split_key_pad_len +
+			  ctx->enckeylen, GFP_DMA | GFP_KERNEL);
+	if (!sh_desc) {
+		dev_err(jrdev, "could not allocate shared descriptor\n");
+		return -ENOMEM;
+	}
+
+	init_sh_desc(sh_desc, HDR_SAVECTX | HDR_SHARE_SERIAL);
+
+	jump_cmd = append_jump(sh_desc, CLASS_BOTH | JUMP_TEST_ALL |
+			       JUMP_COND_SHRD | JUMP_COND_SELF);
+
+	/* process keys, starting with class 2/authentication */
+	append_key_as_imm(sh_desc, ctx->key, ctx->split_key_pad_len,
+			  ctx->split_key_len,
+			  CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
+
+	append_key_as_imm(sh_desc, (void *)ctx->key + ctx->split_key_pad_len,
+			  ctx->enckeylen, ctx->enckeylen,
+			  CLASS_1 | KEY_DEST_CLASS_REG);
+
+	/* update jump cmd now that we are at the jump target */
+	set_jump_tgt_here(sh_desc, jump_cmd);
+
+	ctx->shared_desc_phys = dma_map_single(jrdev, sh_desc,
+					       desc_bytes(sh_desc),
+					       DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->shared_desc_phys)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		kfree(sh_desc);
+		return -ENOMEM;
+	}
+
+	ctx->sh_desc = sh_desc;
+
+	return 0;
+}
+
+static int aead_authenc_setkey(struct crypto_aead *aead,
+			       const u8 *key, unsigned int keylen)
+{
+	/* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
+	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	struct rtattr *rta = (void *)key;
+	struct crypto_authenc_key_param *param;
+	unsigned int authkeylen;
+	unsigned int enckeylen;
+	int ret = 0;
+
+	param = RTA_DATA(rta);
+	enckeylen = be32_to_cpu(param->enckeylen);
+
+	key += RTA_ALIGN(rta->rta_len);
+	keylen -= RTA_ALIGN(rta->rta_len);
+
+	if (keylen < enckeylen)
+		goto badkey;
+
+	authkeylen = keylen - enckeylen;
+
+	if (keylen > CAAM_MAX_KEY_SIZE)
+		goto badkey;
+
+	/* Pick class 2 key length from algorithm submask */
+	ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
+				      OP_ALG_ALGSEL_SHIFT] * 2;
+	ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
+
+#ifdef DEBUG
+	printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n",
+	       keylen, enckeylen, authkeylen);
+	printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
+	       ctx->split_key_len, ctx->split_key_pad_len);
+	print_hex_dump(KERN_ERR, "key in @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+	ctx->key = kmalloc(ctx->split_key_pad_len + enckeylen,
+			   GFP_KERNEL | GFP_DMA);
+	if (!ctx->key) {
+		dev_err(jrdev, "could not allocate key output memory\n");
+		return -ENOMEM;
+	}
+
+	ret = gen_split_key(ctx, key, authkeylen);
+	if (ret) {
+		kfree(ctx->key);
+		goto badkey;
+	}
+
+	/* postpend encryption key to auth split key */
+	memcpy(ctx->key + ctx->split_key_pad_len, key + authkeylen, enckeylen);
+
+	ctx->key_phys = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len +
+				       enckeylen, DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->key_phys)) {
+		dev_err(jrdev, "unable to map key i/o memory\n");
+		kfree(ctx->key);
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "ctx.key@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
+		       ctx->split_key_pad_len + enckeylen, 1);
+#endif
+
+	ctx->keylen = keylen;
+	ctx->enckeylen = enckeylen;
+	ctx->authkeylen = authkeylen;
+
+	ret = build_sh_desc_ipsec(ctx);
+	if (ret) {
+		dma_unmap_single(jrdev, ctx->key_phys, ctx->split_key_pad_len +
+				 enckeylen, DMA_TO_DEVICE);
+		kfree(ctx->key);
+	}
+
+	return ret;
+badkey:
+	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+struct link_tbl_entry {
+	u64 ptr;
+	u32 len;
+	u8 reserved;
+	u8 buf_pool_id;
+	u16 offset;
+};
+
+/*
+ * ipsec_esp_edesc - s/w-extended ipsec_esp descriptor
+ * @src_nents: number of segments in input scatterlist
+ * @dst_nents: number of segments in output scatterlist
+ * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
+ * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
+ * @link_tbl_bytes: length of dma mapped link_tbl space
+ * @link_tbl_dma: bus physical mapped address of h/w link table
+ * @hw_desc: the h/w job descriptor followed by any referenced link tables
+ */
+struct ipsec_esp_edesc {
+	int assoc_nents;
+	int src_nents;
+	int dst_nents;
+	int link_tbl_bytes;
+	dma_addr_t link_tbl_dma;
+	struct link_tbl_entry *link_tbl;
+	u32 hw_desc[0];
+};
+
+static void ipsec_esp_unmap(struct device *dev,
+			    struct ipsec_esp_edesc *edesc,
+			    struct aead_request *areq)
+{
+	dma_unmap_sg(dev, areq->assoc, edesc->assoc_nents, DMA_TO_DEVICE);
+
+	if (unlikely(areq->dst != areq->src)) {
+		dma_unmap_sg(dev, areq->src, edesc->src_nents,
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(dev, areq->dst, edesc->dst_nents,
+			     DMA_FROM_DEVICE);
+	} else {
+		dma_unmap_sg(dev, areq->src, edesc->src_nents,
+			     DMA_BIDIRECTIONAL);
+	}
+
+	if (edesc->link_tbl_bytes)
+		dma_unmap_single(dev, edesc->link_tbl_dma,
+				 edesc->link_tbl_bytes,
+				 DMA_TO_DEVICE);
+}
+
+/*
+ * ipsec_esp descriptor callbacks
+ */
+static void ipsec_esp_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
+				   void *context)
+{
+	struct aead_request *areq = context;
+	struct ipsec_esp_edesc *edesc;
+#ifdef DEBUG
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	int ivsize = crypto_aead_ivsize(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+	edesc = (struct ipsec_esp_edesc *)((char *)desc -
+		 offsetof(struct ipsec_esp_edesc, hw_desc));
+
+	if (err) {
+		char tmp[256];
+
+		dev_err(jrdev, "%s\n", caam_jr_strstatus(tmp, err));
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	ipsec_esp_unmap(jrdev, edesc, areq);
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "assoc  @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(areq->assoc),
+		       areq->assoclen , 1);
+	print_hex_dump(KERN_ERR, "dstiv  @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(areq->src) - ivsize,
+		       edesc->src_nents ? 100 : ivsize, 1);
+	print_hex_dump(KERN_ERR, "dst    @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(areq->src),
+		       edesc->src_nents ? 100 : areq->cryptlen +
+		       ctx->authsize + 4, 1);
+#endif
+
+	kfree(edesc);
+
+	aead_request_complete(areq, err);
+}
+
+static void ipsec_esp_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
+				   void *context)
+{
+	struct aead_request *areq = context;
+	struct ipsec_esp_edesc *edesc;
+#ifdef DEBUG
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+	edesc = (struct ipsec_esp_edesc *)((char *)desc -
+		 offsetof(struct ipsec_esp_edesc, hw_desc));
+
+	if (err) {
+		char tmp[256];
+
+		dev_err(jrdev, "%08x: %s\n", err, caam_jr_strstatus(tmp, err));
+	}
+
+	ipsec_esp_unmap(jrdev, edesc, areq);
+
+	/*
+	 * verify hw auth check passed else return -EBADMSG
+	 */
+	if ((err & JRSTA_CCBERR_ERRID_MASK) == JRSTA_CCBERR_ERRID_ICVCHK)
+		err = -EBADMSG;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "iphdrout@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4,
+		       ((char *)sg_virt(areq->assoc) - sizeof(struct iphdr)),
+		       sizeof(struct iphdr) + areq->assoclen +
+		       ((areq->cryptlen > 1500) ? 1500 : areq->cryptlen) +
+		       ctx->authsize + 36, 1);
+	if (!err && edesc->link_tbl_bytes) {
+		struct scatterlist *sg = sg_last(areq->src, edesc->src_nents);
+		print_hex_dump(KERN_ERR, "sglastout@"xstr(__LINE__)": ",
+			       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(sg),
+			sg->length + ctx->authsize + 16, 1);
+	}
+#endif
+	kfree(edesc);
+
+	aead_request_complete(areq, err);
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * scatterlist must have been previously dma mapped
+ */
+static void sg_to_link_tbl(struct scatterlist *sg, int sg_count,
+			   struct link_tbl_entry *link_tbl_ptr, u32 offset)
+{
+	while (sg_count) {
+		link_tbl_ptr->ptr = sg_dma_address(sg);
+		link_tbl_ptr->len = sg_dma_len(sg);
+		link_tbl_ptr->reserved = 0;
+		link_tbl_ptr->buf_pool_id = 0;
+		link_tbl_ptr->offset = offset;
+		link_tbl_ptr++;
+		sg = sg_next(sg);
+		sg_count--;
+	}
+
+	/* set Final bit (marks end of link table) */
+	link_tbl_ptr--;
+	link_tbl_ptr->len |= 0x40000000;
+}
+
+/*
+ * fill in and submit ipsec_esp job descriptor
+ */
+static int ipsec_esp(struct ipsec_esp_edesc *edesc, struct aead_request *areq,
+		     u32 encrypt,
+		     void (*callback) (struct device *dev, u32 *desc,
+				       u32 err, void *context))
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	u32 *desc = edesc->hw_desc, options;
+	int ret, sg_count, assoc_sg_count;
+	int ivsize = crypto_aead_ivsize(aead);
+	int authsize = ctx->authsize;
+	dma_addr_t ptr, dst_dma, src_dma;
+#ifdef DEBUG
+	u32 *sh_desc = ctx->sh_desc;
+
+	debug("assoclen %d cryptlen %d authsize %d\n",
+	      areq->assoclen, areq->cryptlen, authsize);
+	print_hex_dump(KERN_ERR, "assoc  @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(areq->assoc),
+		       areq->assoclen , 1);
+	print_hex_dump(KERN_ERR, "presciv@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(areq->src) - ivsize,
+		       edesc->src_nents ? 100 : ivsize, 1);
+	print_hex_dump(KERN_ERR, "src    @"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(areq->src),
+			edesc->src_nents ? 100 : areq->cryptlen + authsize, 1);
+	print_hex_dump(KERN_ERR, "shrdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, sh_desc,
+		       desc_bytes(sh_desc), 1);
+#endif
+	assoc_sg_count = dma_map_sg(jrdev, areq->assoc, edesc->assoc_nents ?: 1,
+				    DMA_TO_DEVICE);
+	if (areq->src == areq->dst)
+		sg_count = dma_map_sg(jrdev, areq->src, edesc->src_nents ? : 1,
+				      DMA_BIDIRECTIONAL);
+	else
+		sg_count = dma_map_sg(jrdev, areq->src, edesc->src_nents ? : 1,
+				      DMA_TO_DEVICE);
+
+	/* start auth operation */
+	append_operation(desc, ctx->class2_alg_type | OP_ALG_AS_INITFINAL |
+			 (encrypt ? : OP_ALG_ICV_ON));
+
+	/* Load FIFO with data for Class 2 CHA */
+	options = FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG;
+	if (!edesc->assoc_nents) {
+		ptr = sg_dma_address(areq->assoc);
+	} else {
+		sg_to_link_tbl(areq->assoc, edesc->assoc_nents,
+			       edesc->link_tbl, 0);
+		ptr = edesc->link_tbl_dma;
+		options |= LDST_SGF;
+	}
+	append_fifo_load(desc, ptr, areq->assoclen, options);
+
+	/* copy iv from cipher/class1 input context to class2 infifo */
+	append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | ivsize);
+
+	/* start class 1 (cipher) operation */
+	append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL |
+			 encrypt);
+
+	/* load payload & instruct to class2 to snoop class 1 if encrypting */
+	options = 0;
+	if (!edesc->src_nents) {
+		src_dma = sg_dma_address(areq->src);
+	} else {
+		sg_to_link_tbl(areq->src, edesc->src_nents, edesc->link_tbl +
+			       edesc->assoc_nents, 0);
+		src_dma = edesc->link_tbl_dma + edesc->assoc_nents *
+			  sizeof(struct link_tbl_entry);
+		options |= LDST_SGF;
+	}
+	append_seq_in_ptr(desc, src_dma, areq->cryptlen + authsize, options);
+	append_seq_fifo_load(desc, areq->cryptlen, FIFOLD_CLASS_BOTH |
+			     FIFOLD_TYPE_LASTBOTH |
+			     (encrypt ? FIFOLD_TYPE_MSG1OUT2
+				      : FIFOLD_TYPE_MSG));
+
+	/* specify destination */
+	if (areq->src == areq->dst) {
+		dst_dma = src_dma;
+	} else {
+		sg_count = dma_map_sg(jrdev, areq->dst, edesc->dst_nents ? : 1,
+				      DMA_FROM_DEVICE);
+		if (!edesc->dst_nents) {
+			dst_dma = sg_dma_address(areq->dst);
+			options = 0;
+		} else {
+			sg_to_link_tbl(areq->dst, edesc->dst_nents,
+				       edesc->link_tbl + edesc->assoc_nents +
+				       edesc->src_nents, 0);
+			dst_dma = edesc->link_tbl_dma + (edesc->assoc_nents +
+				  edesc->src_nents) *
+				  sizeof(struct link_tbl_entry);
+			options = LDST_SGF;
+		}
+	}
+	append_seq_out_ptr(desc, dst_dma, areq->cryptlen + authsize, options);
+	append_seq_fifo_store(desc, areq->cryptlen, FIFOST_TYPE_MESSAGE_DATA);
+
+	/* ICV */
+	if (encrypt)
+		append_seq_store(desc, authsize, LDST_CLASS_2_CCB |
+				 LDST_SRCDST_BYTE_CONTEXT);
+	else
+		append_seq_fifo_load(desc, authsize, FIFOLD_CLASS_CLASS2 |
+				     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+
+#ifdef DEBUG
+	debug("job_desc_len %d\n", desc_len(desc));
+	print_hex_dump(KERN_ERR, "jobdesc@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc) , 1);
+	print_hex_dump(KERN_ERR, "jdlinkt@"xstr(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->link_tbl,
+			edesc->link_tbl_bytes, 1);
+#endif
+
+	ret = caam_jr_enqueue(jrdev, desc, callback, areq);
+	if (!ret)
+		ret = -EINPROGRESS;
+	else {
+		ipsec_esp_unmap(jrdev, edesc, areq);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+/*
+ * derive number of elements in scatterlist
+ */
+static int sg_count(struct scatterlist *sg_list, int nbytes, int *chained)
+{
+	struct scatterlist *sg = sg_list;
+	int sg_nents = 0;
+
+	*chained = 0;
+	while (nbytes > 0) {
+		sg_nents++;
+		nbytes -= sg->length;
+		if (!sg_is_last(sg) && (sg + 1)->length == 0)
+			*chained = 1;
+		sg = scatterwalk_sg_next(sg);
+	}
+
+	return sg_nents;
+}
+
+/*
+ * allocate and map the ipsec_esp extended descriptor
+ */
+static struct ipsec_esp_edesc *ipsec_esp_edesc_alloc(struct aead_request *areq,
+						     int desc_bytes)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = areq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+		      GFP_ATOMIC;
+	int assoc_nents, src_nents, dst_nents = 0, chained, link_tbl_bytes;
+	struct ipsec_esp_edesc *edesc;
+
+	assoc_nents = sg_count(areq->assoc, areq->assoclen, &chained);
+	BUG_ON(chained);
+	if (likely(assoc_nents == 1))
+		assoc_nents = 0;
+
+	src_nents = sg_count(areq->src, areq->cryptlen + ctx->authsize,
+			     &chained);
+	BUG_ON(chained);
+	if (src_nents == 1)
+		src_nents = 0;
+
+	if (unlikely(areq->dst != areq->src)) {
+		dst_nents = sg_count(areq->dst, areq->cryptlen + ctx->authsize,
+				     &chained);
+		BUG_ON(chained);
+		if (dst_nents == 1)
+			dst_nents = 0;
+	}
+
+	link_tbl_bytes = (assoc_nents + src_nents + dst_nents) *
+			 sizeof(struct link_tbl_entry);
+	debug("link_tbl_bytes %d\n", link_tbl_bytes);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct ipsec_esp_edesc) + desc_bytes +
+			link_tbl_bytes, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	edesc->assoc_nents = assoc_nents;
+	edesc->src_nents = src_nents;
+	edesc->dst_nents = dst_nents;
+	edesc->link_tbl = (void *)edesc + sizeof(struct ipsec_esp_edesc) +
+			  desc_bytes;
+	edesc->link_tbl_dma = dma_map_single(jrdev, edesc->link_tbl,
+					     link_tbl_bytes, DMA_TO_DEVICE);
+	edesc->link_tbl_bytes = link_tbl_bytes;
+
+	return edesc;
+}
+
+static int aead_authenc_encrypt(struct aead_request *areq)
+{
+	struct ipsec_esp_edesc *edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ivsize = crypto_aead_ivsize(aead);
+	u32 *desc;
+	dma_addr_t iv_dma;
+
+	/* allocate extended descriptor */
+	edesc = ipsec_esp_edesc_alloc(areq, 21 * sizeof(u32));
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	desc = edesc->hw_desc;
+
+	/* insert shared descriptor pointer */
+	init_job_desc_shared(desc, ctx->shared_desc_phys,
+			     desc_len(ctx->sh_desc), HDR_SHARE_DEFER);
+
+	iv_dma = dma_map_single(jrdev, areq->iv, ivsize, DMA_TO_DEVICE);
+	/* check dma error */
+
+	append_load(desc, iv_dma, ivsize,
+		    LDST_CLASS_1_CCB | LDST_SRCDST_BYTE_CONTEXT);
+
+	return ipsec_esp(edesc, areq, OP_ALG_ENCRYPT, ipsec_esp_encrypt_done);
+}
+
+static int aead_authenc_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	int ivsize = crypto_aead_ivsize(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	struct ipsec_esp_edesc *edesc;
+	u32 *desc;
+	dma_addr_t iv_dma;
+
+	req->cryptlen -= ctx->authsize;
+
+	/* allocate extended descriptor */
+	edesc = ipsec_esp_edesc_alloc(req, 21 * sizeof(u32));
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	desc = edesc->hw_desc;
+
+	/* insert shared descriptor pointer */
+	init_job_desc_shared(desc, ctx->shared_desc_phys,
+			     desc_len(ctx->sh_desc), HDR_SHARE_DEFER);
+
+	iv_dma = dma_map_single(jrdev, req->iv, ivsize, DMA_TO_DEVICE);
+	/* check dma error */
+
+	append_load(desc, iv_dma, ivsize,
+		    LDST_CLASS_1_CCB | LDST_SRCDST_BYTE_CONTEXT);
+
+	return ipsec_esp(edesc, req, !OP_ALG_ENCRYPT, ipsec_esp_decrypt_done);
+}
+
+static int aead_authenc_givencrypt(struct aead_givcrypt_request *req)
+{
+	struct aead_request *areq = &req->areq;
+	struct ipsec_esp_edesc *edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ivsize = crypto_aead_ivsize(aead);
+	dma_addr_t iv_dma;
+	u32 *desc;
+
+	iv_dma = dma_map_single(jrdev, req->giv, ivsize, DMA_FROM_DEVICE);
+
+	debug("%s: giv %p\n", __func__, req->giv);
+
+	/* allocate extended descriptor */
+	edesc = ipsec_esp_edesc_alloc(areq, 27 * sizeof(u32));
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	desc = edesc->hw_desc;
+
+	/* insert shared descriptor pointer */
+	init_job_desc_shared(desc, ctx->shared_desc_phys,
+			     desc_len(ctx->sh_desc), HDR_SHARE_DEFER);
+
+	/*
+	 * LOAD IMM Info FIFO
+	 * to DECO, Last, Padding, Random, Message, 16 bytes
+	 */
+	append_load_imm_u32(desc, NFIFOENTRY_DEST_DECO | NFIFOENTRY_LC1 |
+			    NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DTYPE_MSG |
+			    NFIFOENTRY_PTYPE_RND | ivsize,
+			    LDST_SRCDST_WORD_INFO_FIFO);
+
+	/*
+	 * disable info fifo entries since the above serves as the entry
+	 * this way, the MOVE command won't generate an entry.
+	 * Note that this isn't required in more recent versions of
+	 * SEC as a MOVE that doesn't do info FIFO entries is available.
+	 */
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+
+	/* MOVE DECO Alignment -> C1 Context 16 bytes */
+	append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO |
+		    MOVE_DEST_CLASS1CTX | ivsize);
+
+	/* re-enable info fifo entries */
+	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+	/* MOVE C1 Context -> OFIFO 16 bytes */
+	append_move(desc, MOVE_WAITCOMP | MOVE_SRC_CLASS1CTX |
+		    MOVE_DEST_OUTFIFO | ivsize);
+
+	append_fifo_store(desc, iv_dma, ivsize, FIFOST_TYPE_MESSAGE_DATA);
+
+	return ipsec_esp(edesc, areq, OP_ALG_ENCRYPT, ipsec_esp_encrypt_done);
+}
+
+struct caam_alg_template {
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	struct aead_alg aead;
+	u32 class1_alg_type;
+	u32 class2_alg_type;
+	u32 alg_op;
+};
+
+static struct caam_alg_template driver_algs[] = {
+	/* single-pass ipsec_esp descriptor */
+	{
+		.name = "authenc(hmac(sha1),cbc(aes))",
+		.driver_name = "authenc-hmac-sha1-cbc-aes-caam",
+		.blocksize = AES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha256),cbc(aes))",
+		.driver_name = "authenc-hmac-sha256-cbc-aes-caam",
+		.blocksize = AES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha1),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-sha1-cbc-des3_ede-caam",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha256),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-sha256-cbc-des3_ede-caam",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha1),cbc(des))",
+		.driver_name = "authenc-hmac-sha1-cbc-des-caam",
+		.blocksize = DES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha256),cbc(des))",
+		.driver_name = "authenc-hmac-sha256-cbc-des-caam",
+		.blocksize = DES_BLOCK_SIZE,
+		.aead = {
+			.setkey = aead_authenc_setkey,
+			.setauthsize = aead_authenc_setauthsize,
+			.encrypt = aead_authenc_encrypt,
+			.decrypt = aead_authenc_decrypt,
+			.givencrypt = aead_authenc_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = DES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+			},
+		.class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
+	},
+};
+
+struct caam_crypto_alg {
+	struct list_head entry;
+	struct device *ctrldev;
+	int class1_alg_type;
+	int class2_alg_type;
+	int alg_op;
+	struct crypto_alg crypto_alg;
+};
+
+static int caam_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct caam_crypto_alg *caam_alg =
+		 container_of(alg, struct caam_crypto_alg, crypto_alg);
+	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct caam_drv_private *priv = dev_get_drvdata(caam_alg->ctrldev);
+	int tgt_jr = atomic_inc_return(&priv->tfm_count);
+
+	/*
+	 * distribute tfms across job rings to ensure in-order
+	 * crypto request processing per tfm
+	 */
+	ctx->jrdev = priv->algapi_jr[(tgt_jr / 2) % priv->num_jrs_for_algapi];
+
+	/* copy descriptor header template value */
+	ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type;
+	ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam_alg->class2_alg_type;
+	ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_alg->alg_op;
+
+	return 0;
+}
+
+static void caam_cra_exit(struct crypto_tfm *tfm)
+{
+	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (!dma_mapping_error(ctx->jrdev, ctx->shared_desc_phys))
+		dma_unmap_single(ctx->jrdev, ctx->shared_desc_phys,
+				 desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
+	kfree(ctx->sh_desc);
+}
+
+static void __exit caam_algapi_exit(void)
+{
+
+	struct device_node *dev_node;
+	struct platform_device *pdev;
+	struct device *ctrldev;
+	struct caam_drv_private *priv;
+	struct caam_crypto_alg *t_alg, *n;
+	int i, err;
+
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+	if (!dev_node)
+		return;
+
+	pdev = of_find_device_by_node(dev_node);
+	if (!pdev)
+		return;
+
+	ctrldev = &pdev->dev;
+	of_node_put(dev_node);
+	priv = dev_get_drvdata(ctrldev);
+
+	if (!priv->alg_list.next)
+		return;
+
+	list_for_each_entry_safe(t_alg, n, &priv->alg_list, entry) {
+		crypto_unregister_alg(&t_alg->crypto_alg);
+		list_del(&t_alg->entry);
+		kfree(t_alg);
+	}
+
+	for (i = 0; i < priv->total_jobrs; i++) {
+		err = caam_jr_deregister(priv->algapi_jr[i]);
+		if (err < 0)
+			break;
+	}
+	kfree(priv->algapi_jr);
+}
+
+static struct caam_crypto_alg *caam_alg_alloc(struct device *ctrldev,
+					      struct caam_alg_template
+					      *template)
+{
+	struct caam_crypto_alg *t_alg;
+	struct crypto_alg *alg;
+
+	t_alg = kzalloc(sizeof(struct caam_crypto_alg), GFP_KERNEL);
+	if (!t_alg) {
+		dev_err(ctrldev, "failed to allocate t_alg\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	alg = &t_alg->crypto_alg;
+
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 template->driver_name);
+	alg->cra_module = THIS_MODULE;
+	alg->cra_init = caam_cra_init;
+	alg->cra_exit = caam_cra_exit;
+	alg->cra_priority = CAAM_CRA_PRIORITY;
+	alg->cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+	alg->cra_blocksize = template->blocksize;
+	alg->cra_alignmask = 0;
+	alg->cra_type = &crypto_aead_type;
+	alg->cra_ctxsize = sizeof(struct caam_ctx);
+	alg->cra_u.aead = template->aead;
+
+	t_alg->class1_alg_type = template->class1_alg_type;
+	t_alg->class2_alg_type = template->class2_alg_type;
+	t_alg->alg_op = template->alg_op;
+	t_alg->ctrldev = ctrldev;
+
+	return t_alg;
+}
+
+static int __init caam_algapi_init(void)
+{
+	struct device_node *dev_node;
+	struct platform_device *pdev;
+	struct device *ctrldev, **jrdev;
+	struct caam_drv_private *priv;
+	int i = 0, err = 0;
+
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+	if (!dev_node)
+		return -ENODEV;
+
+	pdev = of_find_device_by_node(dev_node);
+	if (!pdev)
+		return -ENODEV;
+
+	ctrldev = &pdev->dev;
+	priv = dev_get_drvdata(ctrldev);
+	of_node_put(dev_node);
+
+	INIT_LIST_HEAD(&priv->alg_list);
+
+	jrdev = kmalloc(sizeof(*jrdev) * priv->total_jobrs, GFP_KERNEL);
+	if (!jrdev)
+		return -ENOMEM;
+
+	for (i = 0; i < priv->total_jobrs; i++) {
+		err = caam_jr_register(ctrldev, &jrdev[i]);
+		if (err < 0)
+			break;
+	}
+	if (err < 0 && i == 0) {
+		dev_err(ctrldev, "algapi error in job ring registration: %d\n",
+			err);
+		return err;
+	}
+
+	priv->num_jrs_for_algapi = i;
+	priv->algapi_jr = jrdev;
+	atomic_set(&priv->tfm_count, -1);
+
+	/* register crypto algorithms the device supports */
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		/* TODO: check if h/w supports alg */
+		struct caam_crypto_alg *t_alg;
+
+		t_alg = caam_alg_alloc(ctrldev, &driver_algs[i]);
+		if (IS_ERR(t_alg)) {
+			err = PTR_ERR(t_alg);
+			dev_warn(ctrldev, "%s alg allocation failed\n",
+				 t_alg->crypto_alg.cra_driver_name);
+			continue;
+		}
+
+		err = crypto_register_alg(&t_alg->crypto_alg);
+		if (err) {
+			dev_warn(ctrldev, "%s alg registration failed\n",
+				t_alg->crypto_alg.cra_driver_name);
+			kfree(t_alg);
+		} else {
+			list_add_tail(&t_alg->entry, &priv->alg_list);
+			dev_info(ctrldev, "%s\n",
+				 t_alg->crypto_alg.cra_driver_name);
+		}
+	}
+
+	return err;
+}
+
+module_init(caam_algapi_init);
+module_exit(caam_algapi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM support for crypto API");
+MODULE_AUTHOR("Freescale Semiconductor - NMG/STC");
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
new file mode 100644
index 000000000000..950450346f70
--- /dev/null
+++ b/drivers/crypto/caam/compat.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef CAAM_COMPAT_H
+#define CAAM_COMPAT_H
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/crypto.h>
+#include <linux/hw_random.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/rtnetlink.h>
+#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/circ_buf.h>
+#include <net/xfrm.h>
+
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/sha.h>
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
+#include <crypto/scatterwalk.h>
+
+#endif /* !defined(CAAM_COMPAT_H) */
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
new file mode 100644
index 000000000000..aa2216160103
--- /dev/null
+++ b/drivers/crypto/caam/ctrl.c
@@ -0,0 +1,270 @@
+/*
+ * CAAM control-plane driver backend
+ * Controller-level driver, kernel property detection, initialization
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#include "compat.h"
+#include "regs.h"
+#include "intern.h"
+#include "jr.h"
+
+static int caam_remove(struct platform_device *pdev)
+{
+	struct device *ctrldev;
+	struct caam_drv_private *ctrlpriv;
+	struct caam_drv_private_jr *jrpriv;
+	struct caam_full __iomem *topregs;
+	int ring, ret = 0;
+
+	ctrldev = &pdev->dev;
+	ctrlpriv = dev_get_drvdata(ctrldev);
+	topregs = (struct caam_full __iomem *)ctrlpriv->ctrl;
+
+	/* shut down JobRs */
+	for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) {
+		ret |= caam_jr_shutdown(ctrlpriv->jrdev[ring]);
+		jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]);
+		irq_dispose_mapping(jrpriv->irq);
+	}
+
+	/* Shut down debug views */
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(ctrlpriv->dfs_root);
+#endif
+
+	/* Unmap controller region */
+	iounmap(&topregs->ctrl);
+
+	kfree(ctrlpriv->jrdev);
+	kfree(ctrlpriv);
+
+	return ret;
+}
+
+/* Probe routine for CAAM top (controller) level */
+static int caam_probe(struct platform_device *pdev,
+		      const struct of_device_id *devmatch)
+{
+	int d, ring, rspec;
+	struct device *dev;
+	struct device_node *nprop, *np;
+	struct caam_ctrl __iomem *ctrl;
+	struct caam_full __iomem *topregs;
+	struct caam_drv_private *ctrlpriv;
+	struct caam_perfmon *perfmon;
+	struct caam_deco **deco;
+	u32 deconum;
+
+	ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL);
+	if (!ctrlpriv)
+		return -ENOMEM;
+
+	dev = &pdev->dev;
+	dev_set_drvdata(dev, ctrlpriv);
+	ctrlpriv->pdev = pdev;
+	nprop = pdev->dev.of_node;
+
+	/* Get configuration properties from device tree */
+	/* First, get register page */
+	ctrl = of_iomap(nprop, 0);
+	if (ctrl == NULL) {
+		dev_err(dev, "caam: of_iomap() failed\n");
+		return -ENOMEM;
+	}
+	ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl;
+
+	/* topregs used to derive pointers to CAAM sub-blocks only */
+	topregs = (struct caam_full __iomem *)ctrl;
+
+	/* Get the IRQ of the controller (for security violations only) */
+	ctrlpriv->secvio_irq = of_irq_to_resource(nprop, 0, NULL);
+
+	/*
+	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
+	 * 36-bit pointers in master configuration register
+	 */
+	setbits32(&topregs->ctrl.mcr, MCFGR_WDENABLE |
+		  (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
+
+	if (sizeof(dma_addr_t) == sizeof(u64))
+		dma_set_mask(dev, DMA_BIT_MASK(36));
+
+	/* Find out how many DECOs are present */
+	deconum = (rd_reg64(&topregs->ctrl.perfmon.cha_num) &
+		   CHA_NUM_DECONUM_MASK) >> CHA_NUM_DECONUM_SHIFT;
+
+	ctrlpriv->deco = kmalloc(deconum * sizeof(struct caam_deco *),
+				 GFP_KERNEL);
+
+	deco = (struct caam_deco __force **)&topregs->deco;
+	for (d = 0; d < deconum; d++)
+		ctrlpriv->deco[d] = deco[d];
+
+	/*
+	 * Detect and enable JobRs
+	 * First, find out how many ring spec'ed, allocate references
+	 * for all, then go probe each one.
+	 */
+	rspec = 0;
+	for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring")
+		rspec++;
+	ctrlpriv->jrdev = kzalloc(sizeof(struct device *) * rspec, GFP_KERNEL);
+	if (ctrlpriv->jrdev == NULL) {
+		iounmap(&topregs->ctrl);
+		return -ENOMEM;
+	}
+
+	ring = 0;
+	ctrlpriv->total_jobrs = 0;
+	for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") {
+		caam_jr_probe(pdev, np, ring);
+		ctrlpriv->total_jobrs++;
+		ring++;
+	}
+
+	/* Check to see if QI present. If so, enable */
+	ctrlpriv->qi_present = !!(rd_reg64(&topregs->ctrl.perfmon.comp_parms) &
+				  CTPR_QI_MASK);
+	if (ctrlpriv->qi_present) {
+		ctrlpriv->qi = (struct caam_queue_if __force *)&topregs->qi;
+		/* This is all that's required to physically enable QI */
+		wr_reg32(&topregs->qi.qi_control_lo, QICTL_DQEN);
+	}
+
+	/* If no QI and no rings specified, quit and go home */
+	if ((!ctrlpriv->qi_present) && (!ctrlpriv->total_jobrs)) {
+		dev_err(dev, "no queues configured, terminating\n");
+		caam_remove(pdev);
+		return -ENOMEM;
+	}
+
+	/* NOTE: RTIC detection ought to go here, around Si time */
+
+	/* Initialize queue allocator lock */
+	spin_lock_init(&ctrlpriv->jr_alloc_lock);
+
+	/* Report "alive" for developer to see */
+	dev_info(dev, "device ID = 0x%016llx\n",
+		 rd_reg64(&topregs->ctrl.perfmon.caam_id));
+	dev_info(dev, "job rings = %d, qi = %d\n",
+		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
+
+#ifdef CONFIG_DEBUG_FS
+	/*
+	 * FIXME: needs better naming distinction, as some amalgamation of
+	 * "caam" and nprop->full_name. The OF name isn't distinctive,
+	 * but does separate instances
+	 */
+	perfmon = (struct caam_perfmon __force *)&ctrl->perfmon;
+
+	ctrlpriv->dfs_root = debugfs_create_dir("caam", NULL);
+	ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root);
+
+	/* Controller-level - performance monitor counters */
+	ctrlpriv->ctl_rq_dequeued =
+		debugfs_create_u64("rq_dequeued",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->req_dequeued);
+	ctrlpriv->ctl_ob_enc_req =
+		debugfs_create_u64("ob_rq_encrypted",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->ob_enc_req);
+	ctrlpriv->ctl_ib_dec_req =
+		debugfs_create_u64("ib_rq_decrypted",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->ib_dec_req);
+	ctrlpriv->ctl_ob_enc_bytes =
+		debugfs_create_u64("ob_bytes_encrypted",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->ob_enc_bytes);
+	ctrlpriv->ctl_ob_prot_bytes =
+		debugfs_create_u64("ob_bytes_protected",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->ob_prot_bytes);
+	ctrlpriv->ctl_ib_dec_bytes =
+		debugfs_create_u64("ib_bytes_decrypted",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->ib_dec_bytes);
+	ctrlpriv->ctl_ib_valid_bytes =
+		debugfs_create_u64("ib_bytes_validated",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->ib_valid_bytes);
+
+	/* Controller level - global status values */
+	ctrlpriv->ctl_faultaddr =
+		debugfs_create_u64("fault_addr",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->faultaddr);
+	ctrlpriv->ctl_faultdetail =
+		debugfs_create_u32("fault_detail",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->faultdetail);
+	ctrlpriv->ctl_faultstatus =
+		debugfs_create_u32("fault_status",
+				   S_IFCHR | S_IRUSR | S_IRGRP | S_IROTH,
+				   ctrlpriv->ctl, &perfmon->status);
+
+	/* Internal covering keys (useful in non-secure mode only) */
+	ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0];
+	ctrlpriv->ctl_kek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
+	ctrlpriv->ctl_kek = debugfs_create_blob("kek",
+						S_IFCHR | S_IRUSR |
+						S_IRGRP | S_IROTH,
+						ctrlpriv->ctl,
+						&ctrlpriv->ctl_kek_wrap);
+
+	ctrlpriv->ctl_tkek_wrap.data = &ctrlpriv->ctrl->tkek[0];
+	ctrlpriv->ctl_tkek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
+	ctrlpriv->ctl_tkek = debugfs_create_blob("tkek",
+						 S_IFCHR | S_IRUSR |
+						 S_IRGRP | S_IROTH,
+						 ctrlpriv->ctl,
+						 &ctrlpriv->ctl_tkek_wrap);
+
+	ctrlpriv->ctl_tdsk_wrap.data = &ctrlpriv->ctrl->tdsk[0];
+	ctrlpriv->ctl_tdsk_wrap.size = KEK_KEY_SIZE * sizeof(u32);
+	ctrlpriv->ctl_tdsk = debugfs_create_blob("tdsk",
+						 S_IFCHR | S_IRUSR |
+						 S_IRGRP | S_IROTH,
+						 ctrlpriv->ctl,
+						 &ctrlpriv->ctl_tdsk_wrap);
+#endif
+	return 0;
+}
+
+static struct of_device_id caam_match[] = {
+	{
+		.compatible = "fsl,sec4.0",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, caam_match);
+
+static struct of_platform_driver caam_driver = {
+	.driver = {
+		.name = "caam",
+		.owner = THIS_MODULE,
+		.of_match_table = caam_match,
+	},
+	.probe       = caam_probe,
+	.remove      = __devexit_p(caam_remove),
+};
+
+static int __init caam_base_init(void)
+{
+	return of_register_platform_driver(&caam_driver);
+}
+
+static void __exit caam_base_exit(void)
+{
+	return of_unregister_platform_driver(&caam_driver);
+}
+
+module_init(caam_base_init);
+module_exit(caam_base_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM request backend");
+MODULE_AUTHOR("Freescale Semiconductor - NMG/STC");
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
new file mode 100644
index 000000000000..974a75842da9
--- /dev/null
+++ b/drivers/crypto/caam/desc.h
@@ -0,0 +1,1605 @@
+/*
+ * CAAM descriptor composition header
+ * Definitions to support CAAM descriptor instruction generation
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef DESC_H
+#define DESC_H
+
+/* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
+#define MAX_CAAM_DESCSIZE       64
+
+/* Block size of any entity covered/uncovered with a KEK/TKEK */
+#define KEK_BLOCKSIZE		16
+
+/*
+ * Supported descriptor command types as they show up
+ * inside a descriptor command word.
+ */
+#define CMD_SHIFT               27
+#define CMD_MASK                0xf8000000
+
+#define CMD_KEY                 (0x00 << CMD_SHIFT)
+#define CMD_SEQ_KEY             (0x01 << CMD_SHIFT)
+#define CMD_LOAD                (0x02 << CMD_SHIFT)
+#define CMD_SEQ_LOAD            (0x03 << CMD_SHIFT)
+#define CMD_FIFO_LOAD           (0x04 << CMD_SHIFT)
+#define CMD_SEQ_FIFO_LOAD       (0x05 << CMD_SHIFT)
+#define CMD_STORE               (0x0a << CMD_SHIFT)
+#define CMD_SEQ_STORE           (0x0b << CMD_SHIFT)
+#define CMD_FIFO_STORE          (0x0c << CMD_SHIFT)
+#define CMD_SEQ_FIFO_STORE      (0x0d << CMD_SHIFT)
+#define CMD_MOVE_LEN            (0x0e << CMD_SHIFT)
+#define CMD_MOVE                (0x0f << CMD_SHIFT)
+#define CMD_OPERATION           (0x10 << CMD_SHIFT)
+#define CMD_SIGNATURE           (0x12 << CMD_SHIFT)
+#define CMD_JUMP                (0x14 << CMD_SHIFT)
+#define CMD_MATH                (0x15 << CMD_SHIFT)
+#define CMD_DESC_HDR            (0x16 << CMD_SHIFT)
+#define CMD_SHARED_DESC_HDR     (0x17 << CMD_SHIFT)
+#define CMD_SEQ_IN_PTR          (0x1e << CMD_SHIFT)
+#define CMD_SEQ_OUT_PTR         (0x1f << CMD_SHIFT)
+
+/* General-purpose class selector for all commands */
+#define CLASS_SHIFT             25
+#define CLASS_MASK              (0x03 << CLASS_SHIFT)
+
+#define CLASS_NONE              (0x00 << CLASS_SHIFT)
+#define CLASS_1                 (0x01 << CLASS_SHIFT)
+#define CLASS_2                 (0x02 << CLASS_SHIFT)
+#define CLASS_BOTH              (0x03 << CLASS_SHIFT)
+
+/*
+ * Descriptor header command constructs
+ * Covers shared, job, and trusted descriptor headers
+ */
+
+/*
+ * Do Not Run - marks a descriptor inexecutable if there was
+ * a preceding error somewhere
+ */
+#define HDR_DNR                 0x01000000
+
+/*
+ * ONE - should always be set. Combination of ONE (always
+ * set) and ZRO (always clear) forms an endianness sanity check
+ */
+#define HDR_ONE                 0x00800000
+#define HDR_ZRO                 0x00008000
+
+/* Start Index or SharedDesc Length */
+#define HDR_START_IDX_MASK      0x3f
+#define HDR_START_IDX_SHIFT     16
+
+/* If shared descriptor header, 6-bit length */
+#define HDR_DESCLEN_SHR_MASK  0x3f
+
+/* If non-shared header, 7-bit length */
+#define HDR_DESCLEN_MASK      0x7f
+
+/* This is a TrustedDesc (if not SharedDesc) */
+#define HDR_TRUSTED             0x00004000
+
+/* Make into TrustedDesc (if not SharedDesc) */
+#define HDR_MAKE_TRUSTED        0x00002000
+
+/* Save context if self-shared (if SharedDesc) */
+#define HDR_SAVECTX             0x00001000
+
+/* Next item points to SharedDesc */
+#define HDR_SHARED              0x00001000
+
+/*
+ * Reverse Execution Order - execute JobDesc first, then
+ * execute SharedDesc (normally SharedDesc goes first).
+ */
+#define HDR_REVERSE             0x00000800
+
+/* Propogate DNR property to SharedDesc */
+#define HDR_PROP_DNR            0x00000800
+
+/* JobDesc/SharedDesc share property */
+#define HDR_SD_SHARE_MASK       0x03
+#define HDR_SD_SHARE_SHIFT      8
+#define HDR_JD_SHARE_MASK       0x07
+#define HDR_JD_SHARE_SHIFT      8
+
+#define HDR_SHARE_NEVER         (0x00 << HDR_SD_SHARE_SHIFT)
+#define HDR_SHARE_WAIT          (0x01 << HDR_SD_SHARE_SHIFT)
+#define HDR_SHARE_SERIAL        (0x02 << HDR_SD_SHARE_SHIFT)
+#define HDR_SHARE_ALWAYS        (0x03 << HDR_SD_SHARE_SHIFT)
+#define HDR_SHARE_DEFER         (0x04 << HDR_SD_SHARE_SHIFT)
+
+/* JobDesc/SharedDesc descriptor length */
+#define HDR_JD_LENGTH_MASK      0x7f
+#define HDR_SD_LENGTH_MASK      0x3f
+
+/*
+ * KEY/SEQ_KEY Command Constructs
+ */
+
+/* Key Destination Class: 01 = Class 1, 02 - Class 2  */
+#define KEY_DEST_CLASS_SHIFT    25  /* use CLASS_1 or CLASS_2 */
+#define KEY_DEST_CLASS_MASK     (0x03 << KEY_DEST_CLASS_SHIFT)
+
+/* Scatter-Gather Table/Variable Length Field */
+#define KEY_SGF                 0x01000000
+#define KEY_VLF                 0x01000000
+
+/* Immediate - Key follows command in the descriptor */
+#define KEY_IMM                 0x00800000
+
+/*
+ * Encrypted - Key is encrypted either with the KEK, or
+ * with the TDKEK if TK is set
+ */
+#define KEY_ENC                 0x00400000
+
+/*
+ * No Write Back - Do not allow key to be FIFO STOREd
+ */
+#define KEY_NWB			0x00200000
+
+/*
+ * Enhanced Encryption of Key
+ */
+#define KEY_EKT			0x00100000
+
+/*
+ * Encrypted with Trusted Key
+ */
+#define KEY_TK			0x00008000
+
+/*
+ * KDEST - Key Destination: 0 - class key register,
+ * 1 - PKHA 'e', 2 - AFHA Sbox, 3 - MDHA split-key
+ */
+#define KEY_DEST_SHIFT          16
+#define KEY_DEST_MASK           (0x03 << KEY_DEST_SHIFT)
+
+#define KEY_DEST_CLASS_REG      (0x00 << KEY_DEST_SHIFT)
+#define KEY_DEST_PKHA_E         (0x01 << KEY_DEST_SHIFT)
+#define KEY_DEST_AFHA_SBOX      (0x02 << KEY_DEST_SHIFT)
+#define KEY_DEST_MDHA_SPLIT     (0x03 << KEY_DEST_SHIFT)
+
+/* Length in bytes */
+#define KEY_LENGTH_MASK         0x000003ff
+
+/*
+ * LOAD/SEQ_LOAD/STORE/SEQ_STORE Command Constructs
+ */
+
+/*
+ * Load/Store Destination: 0 = class independent CCB,
+ * 1 = class 1 CCB, 2 = class 2 CCB, 3 = DECO
+ */
+#define LDST_CLASS_SHIFT        25
+#define LDST_CLASS_MASK         (0x03 << LDST_CLASS_SHIFT)
+#define LDST_CLASS_IND_CCB      (0x00 << LDST_CLASS_SHIFT)
+#define LDST_CLASS_1_CCB        (0x01 << LDST_CLASS_SHIFT)
+#define LDST_CLASS_2_CCB        (0x02 << LDST_CLASS_SHIFT)
+#define LDST_CLASS_DECO         (0x03 << LDST_CLASS_SHIFT)
+
+/* Scatter-Gather Table/Variable Length Field */
+#define LDST_SGF                0x01000000
+#define LDST_VLF		LDST_SGF
+
+/* Immediate - Key follows this command in descriptor    */
+#define LDST_IMM_MASK           1
+#define LDST_IMM_SHIFT          23
+#define LDST_IMM                (LDST_IMM_MASK << LDST_IMM_SHIFT)
+
+/* SRC/DST - Destination for LOAD, Source for STORE   */
+#define LDST_SRCDST_SHIFT       16
+#define LDST_SRCDST_MASK        (0x7f << LDST_SRCDST_SHIFT)
+
+#define LDST_SRCDST_BYTE_CONTEXT	(0x20 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_BYTE_KEY		(0x40 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_BYTE_INFIFO		(0x7c << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_BYTE_OUTFIFO	(0x7e << LDST_SRCDST_SHIFT)
+
+#define LDST_SRCDST_WORD_MODE_REG	(0x00 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_KEYSZ_REG	(0x01 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DATASZ_REG	(0x02 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_ICVSZ_REG	(0x03 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_CHACTRL	(0x06 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECOCTRL       (0x06 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_IRQCTRL	(0x07 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECO_PCLOVRD   (0x07 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_CLRW		(0x08 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECO_MATH0     (0x08 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_STAT		(0x09 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECO_MATH1     (0x09 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECO_MATH2     (0x0a << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECO_AAD_SZ    (0x0b << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DECO_MATH3     (0x0b << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_CLASS1_ICV_SZ  (0x0c << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_ALTDS_CLASS1   (0x0f << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_PKHA_A_SZ      (0x10 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_PKHA_B_SZ      (0x11 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_PKHA_N_SZ      (0x12 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_PKHA_E_SZ      (0x13 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_DESCBUF        (0x40 << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_INFO_FIFO      (0x7a << LDST_SRCDST_SHIFT)
+
+/* Offset in source/destination                        */
+#define LDST_OFFSET_SHIFT       8
+#define LDST_OFFSET_MASK        (0xff << LDST_OFFSET_SHIFT)
+
+/* LDOFF definitions used when DST = LDST_SRCDST_WORD_DECOCTRL */
+/* These could also be shifted by LDST_OFFSET_SHIFT - this reads better */
+#define LDOFF_CHG_SHARE_SHIFT        0
+#define LDOFF_CHG_SHARE_MASK         (0x3 << LDOFF_CHG_SHARE_SHIFT)
+#define LDOFF_CHG_SHARE_NEVER        (0x1 << LDOFF_CHG_SHARE_SHIFT)
+#define LDOFF_CHG_SHARE_OK_NO_PROP   (0x2 << LDOFF_CHG_SHARE_SHIFT)
+#define LDOFF_CHG_SHARE_OK_PROP      (0x3 << LDOFF_CHG_SHARE_SHIFT)
+
+#define LDOFF_ENABLE_AUTO_NFIFO         (1 << 2)
+#define LDOFF_DISABLE_AUTO_NFIFO        (1 << 3)
+
+#define LDOFF_CHG_NONSEQLIODN_SHIFT     4
+#define LDOFF_CHG_NONSEQLIODN_MASK      (0x3 << LDOFF_CHG_NONSEQLIODN_SHIFT)
+#define LDOFF_CHG_NONSEQLIODN_SEQ       (0x1 << LDOFF_CHG_NONSEQLIODN_SHIFT)
+#define LDOFF_CHG_NONSEQLIODN_NON_SEQ   (0x2 << LDOFF_CHG_NONSEQLIODN_SHIFT)
+#define LDOFF_CHG_NONSEQLIODN_TRUSTED   (0x3 << LDOFF_CHG_NONSEQLIODN_SHIFT)
+
+#define LDOFF_CHG_SEQLIODN_SHIFT     6
+#define LDOFF_CHG_SEQLIODN_MASK      (0x3 << LDOFF_CHG_SEQLIODN_SHIFT)
+#define LDOFF_CHG_SEQLIODN_SEQ       (0x1 << LDOFF_CHG_SEQLIODN_SHIFT)
+#define LDOFF_CHG_SEQLIODN_NON_SEQ   (0x2 << LDOFF_CHG_SEQLIODN_SHIFT)
+#define LDOFF_CHG_SEQLIODN_TRUSTED   (0x3 << LDOFF_CHG_SEQLIODN_SHIFT)
+
+/* Data length in bytes                                 */
+#define LDST_LEN_SHIFT          0
+#define LDST_LEN_MASK           (0xff << LDST_LEN_SHIFT)
+
+/* Special Length definitions when dst=deco-ctrl */
+#define LDLEN_ENABLE_OSL_COUNT      (1 << 7)
+#define LDLEN_RST_CHA_OFIFO_PTR     (1 << 6)
+#define LDLEN_RST_OFIFO             (1 << 5)
+#define LDLEN_SET_OFIFO_OFF_VALID   (1 << 4)
+#define LDLEN_SET_OFIFO_OFF_RSVD    (1 << 3)
+#define LDLEN_SET_OFIFO_OFFSET_SHIFT 0
+#define LDLEN_SET_OFIFO_OFFSET_MASK (3 << LDLEN_SET_OFIFO_OFFSET_SHIFT)
+
+/*
+ * FIFO_LOAD/FIFO_STORE/SEQ_FIFO_LOAD/SEQ_FIFO_STORE
+ * Command Constructs
+ */
+
+/*
+ * Load Destination: 0 = skip (SEQ_FIFO_LOAD only),
+ * 1 = Load for Class1, 2 = Load for Class2, 3 = Load both
+ * Store Source: 0 = normal, 1 = Class1key, 2 = Class2key
+ */
+#define FIFOLD_CLASS_SHIFT      25
+#define FIFOLD_CLASS_MASK       (0x03 << FIFOLD_CLASS_SHIFT)
+#define FIFOLD_CLASS_SKIP       (0x00 << FIFOLD_CLASS_SHIFT)
+#define FIFOLD_CLASS_CLASS1     (0x01 << FIFOLD_CLASS_SHIFT)
+#define FIFOLD_CLASS_CLASS2     (0x02 << FIFOLD_CLASS_SHIFT)
+#define FIFOLD_CLASS_BOTH       (0x03 << FIFOLD_CLASS_SHIFT)
+
+#define FIFOST_CLASS_SHIFT      25
+#define FIFOST_CLASS_MASK       (0x03 << FIFOST_CLASS_SHIFT)
+#define FIFOST_CLASS_NORMAL     (0x00 << FIFOST_CLASS_SHIFT)
+#define FIFOST_CLASS_CLASS1KEY  (0x01 << FIFOST_CLASS_SHIFT)
+#define FIFOST_CLASS_CLASS2KEY  (0x02 << FIFOST_CLASS_SHIFT)
+
+/*
+ * Scatter-Gather Table/Variable Length Field
+ * If set for FIFO_LOAD, refers to a SG table. Within
+ * SEQ_FIFO_LOAD, is variable input sequence
+ */
+#define FIFOLDST_SGF_SHIFT      24
+#define FIFOLDST_SGF_MASK       (1 << FIFOLDST_SGF_SHIFT)
+#define FIFOLDST_VLF_MASK       (1 << FIFOLDST_SGF_SHIFT)
+#define FIFOLDST_SGF            (1 << FIFOLDST_SGF_SHIFT)
+#define FIFOLDST_VLF            (1 << FIFOLDST_SGF_SHIFT)
+
+/* Immediate - Data follows command in descriptor */
+#define FIFOLD_IMM_SHIFT      23
+#define FIFOLD_IMM_MASK       (1 << FIFOLD_IMM_SHIFT)
+#define FIFOLD_IMM            (1 << FIFOLD_IMM_SHIFT)
+
+/* Continue - Not the last FIFO store to come */
+#define FIFOST_CONT_SHIFT     23
+#define FIFOST_CONT_MASK      (1 << FIFOST_CONT_SHIFT)
+#define FIFOST_CONT_MASK      (1 << FIFOST_CONT_SHIFT)
+
+/*
+ * Extended Length - use 32-bit extended length that
+ * follows the pointer field. Illegal with IMM set
+ */
+#define FIFOLDST_EXT_SHIFT      22
+#define FIFOLDST_EXT_MASK       (1 << FIFOLDST_EXT_SHIFT)
+#define FIFOLDST_EXT            (1 << FIFOLDST_EXT_SHIFT)
+
+/* Input data type.*/
+#define FIFOLD_TYPE_SHIFT       16
+#define FIFOLD_CONT_TYPE_SHIFT  19 /* shift past last-flush bits */
+#define FIFOLD_TYPE_MASK        (0x3f << FIFOLD_TYPE_SHIFT)
+
+/* PK types */
+#define FIFOLD_TYPE_PK          (0x00 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_MASK     (0x30 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_TYPEMASK (0x0f << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_A0       (0x00 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_A1       (0x01 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_A2       (0x02 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_A3       (0x03 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_B0       (0x04 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_B1       (0x05 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_B2       (0x06 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_B3       (0x07 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_N        (0x08 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_A        (0x0c << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_PK_B        (0x0d << FIFOLD_TYPE_SHIFT)
+
+/* Other types. Need to OR in last/flush bits as desired */
+#define FIFOLD_TYPE_MSG_MASK    (0x38 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_MSG         (0x10 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_MSG1OUT2    (0x18 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_IV          (0x20 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_BITDATA     (0x28 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_AAD         (0x30 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_ICV         (0x38 << FIFOLD_TYPE_SHIFT)
+
+/* Last/Flush bits for use with "other" types above */
+#define FIFOLD_TYPE_ACT_MASK    (0x07 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_NOACTION    (0x00 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_FLUSH1      (0x01 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_LAST1       (0x02 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_LAST2FLUSH  (0x03 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_LAST2       (0x04 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_LASTBOTH    (0x06 << FIFOLD_TYPE_SHIFT)
+#define FIFOLD_TYPE_LASTBOTHFL  (0x07 << FIFOLD_TYPE_SHIFT)
+
+#define FIFOLDST_LEN_MASK       0xffff
+#define FIFOLDST_EXT_LEN_MASK   0xffffffff
+
+/* Output data types */
+#define FIFOST_TYPE_SHIFT       16
+#define FIFOST_TYPE_MASK        (0x3f << FIFOST_TYPE_SHIFT)
+
+#define FIFOST_TYPE_PKHA_A0      (0x00 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_A1      (0x01 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_A2      (0x02 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_A3      (0x03 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_B0      (0x04 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_B1      (0x05 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_B2      (0x06 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_B3      (0x07 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_N       (0x08 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_A       (0x0c << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_B       (0x0d << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_AF_SBOX_JKEK (0x10 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_AF_SBOX_TKEK (0x21 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_E_JKEK  (0x22 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_PKHA_E_TKEK  (0x23 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_KEY_KEK      (0x24 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_KEY_TKEK     (0x25 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_SPLIT_KEK    (0x26 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_SPLIT_TKEK   (0x27 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_OUTFIFO_KEK  (0x28 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_OUTFIFO_TKEK (0x29 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_MESSAGE_DATA (0x30 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_RNGSTORE     (0x34 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_RNGFIFO      (0x35 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_SKIP         (0x3f << FIFOST_TYPE_SHIFT)
+
+/*
+ * OPERATION Command Constructs
+ */
+
+/* Operation type selectors - OP TYPE */
+#define OP_TYPE_SHIFT           24
+#define OP_TYPE_MASK            (0x07 << OP_TYPE_SHIFT)
+
+#define OP_TYPE_UNI_PROTOCOL    (0x00 << OP_TYPE_SHIFT)
+#define OP_TYPE_PK              (0x01 << OP_TYPE_SHIFT)
+#define OP_TYPE_CLASS1_ALG      (0x02 << OP_TYPE_SHIFT)
+#define OP_TYPE_CLASS2_ALG      (0x04 << OP_TYPE_SHIFT)
+#define OP_TYPE_DECAP_PROTOCOL  (0x06 << OP_TYPE_SHIFT)
+#define OP_TYPE_ENCAP_PROTOCOL  (0x07 << OP_TYPE_SHIFT)
+
+/* ProtocolID selectors - PROTID */
+#define OP_PCLID_SHIFT          16
+#define OP_PCLID_MASK           (0xff << 16)
+
+/* Assuming OP_TYPE = OP_TYPE_UNI_PROTOCOL */
+#define OP_PCLID_IKEV1_PRF      (0x01 << OP_PCLID_SHIFT)
+#define OP_PCLID_IKEV2_PRF      (0x02 << OP_PCLID_SHIFT)
+#define OP_PCLID_SSL30_PRF      (0x08 << OP_PCLID_SHIFT)
+#define OP_PCLID_TLS10_PRF      (0x09 << OP_PCLID_SHIFT)
+#define OP_PCLID_TLS11_PRF      (0x0a << OP_PCLID_SHIFT)
+#define OP_PCLID_DTLS10_PRF     (0x0c << OP_PCLID_SHIFT)
+#define OP_PCLID_PRF            (0x06 << OP_PCLID_SHIFT)
+#define OP_PCLID_BLOB           (0x0d << OP_PCLID_SHIFT)
+#define OP_PCLID_SECRETKEY      (0x11 << OP_PCLID_SHIFT)
+#define OP_PCLID_PUBLICKEYPAIR  (0x14 << OP_PCLID_SHIFT)
+#define OP_PCLID_DSASIGN        (0x15 << OP_PCLID_SHIFT)
+#define OP_PCLID_DSAVERIFY      (0x16 << OP_PCLID_SHIFT)
+
+/* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */
+#define OP_PCLID_IPSEC          (0x01 << OP_PCLID_SHIFT)
+#define OP_PCLID_SRTP           (0x02 << OP_PCLID_SHIFT)
+#define OP_PCLID_MACSEC         (0x03 << OP_PCLID_SHIFT)
+#define OP_PCLID_WIFI           (0x04 << OP_PCLID_SHIFT)
+#define OP_PCLID_WIMAX          (0x05 << OP_PCLID_SHIFT)
+#define OP_PCLID_SSL30          (0x08 << OP_PCLID_SHIFT)
+#define OP_PCLID_TLS10          (0x09 << OP_PCLID_SHIFT)
+#define OP_PCLID_TLS11          (0x0a << OP_PCLID_SHIFT)
+#define OP_PCLID_TLS12          (0x0b << OP_PCLID_SHIFT)
+#define OP_PCLID_DTLS           (0x0c << OP_PCLID_SHIFT)
+
+/*
+ * ProtocolInfo selectors
+ */
+#define OP_PCLINFO_MASK                          0xffff
+
+/* for OP_PCLID_IPSEC */
+#define OP_PCL_IPSEC_CIPHER_MASK                 0xff00
+#define OP_PCL_IPSEC_AUTH_MASK                   0x00ff
+
+#define OP_PCL_IPSEC_DES_IV64                    0x0100
+#define OP_PCL_IPSEC_DES                         0x0200
+#define OP_PCL_IPSEC_3DES                        0x0300
+#define OP_PCL_IPSEC_AES_CBC                     0x0c00
+#define OP_PCL_IPSEC_AES_CTR                     0x0d00
+#define OP_PCL_IPSEC_AES_XTS                     0x1600
+#define OP_PCL_IPSEC_AES_CCM8                    0x0e00
+#define OP_PCL_IPSEC_AES_CCM12                   0x0f00
+#define OP_PCL_IPSEC_AES_CCM16                   0x1000
+#define OP_PCL_IPSEC_AES_GCM8                    0x1200
+#define OP_PCL_IPSEC_AES_GCM12                   0x1300
+#define OP_PCL_IPSEC_AES_GCM16                   0x1400
+
+#define OP_PCL_IPSEC_HMAC_NULL                   0x0000
+#define OP_PCL_IPSEC_HMAC_MD5_96                 0x0001
+#define OP_PCL_IPSEC_HMAC_SHA1_96                0x0002
+#define OP_PCL_IPSEC_AES_XCBC_MAC_96             0x0005
+#define OP_PCL_IPSEC_HMAC_MD5_128                0x0006
+#define OP_PCL_IPSEC_HMAC_SHA1_160               0x0007
+#define OP_PCL_IPSEC_HMAC_SHA2_256_128           0x000c
+#define OP_PCL_IPSEC_HMAC_SHA2_384_192           0x000d
+#define OP_PCL_IPSEC_HMAC_SHA2_512_256           0x000e
+
+/* For SRTP - OP_PCLID_SRTP */
+#define OP_PCL_SRTP_CIPHER_MASK                  0xff00
+#define OP_PCL_SRTP_AUTH_MASK                    0x00ff
+
+#define OP_PCL_SRTP_AES_CTR                      0x0d00
+
+#define OP_PCL_SRTP_HMAC_SHA1_160                0x0007
+
+/* For SSL 3.0 - OP_PCLID_SSL30 */
+#define OP_PCL_SSL30_AES_128_CBC_SHA             0x002f
+#define OP_PCL_SSL30_AES_128_CBC_SHA_2           0x0030
+#define OP_PCL_SSL30_AES_128_CBC_SHA_3           0x0031
+#define OP_PCL_SSL30_AES_128_CBC_SHA_4           0x0032
+#define OP_PCL_SSL30_AES_128_CBC_SHA_5           0x0033
+#define OP_PCL_SSL30_AES_128_CBC_SHA_6           0x0034
+#define OP_PCL_SSL30_AES_128_CBC_SHA_7           0x008c
+#define OP_PCL_SSL30_AES_128_CBC_SHA_8           0x0090
+#define OP_PCL_SSL30_AES_128_CBC_SHA_9           0x0094
+#define OP_PCL_SSL30_AES_128_CBC_SHA_10          0xc004
+#define OP_PCL_SSL30_AES_128_CBC_SHA_11          0xc009
+#define OP_PCL_SSL30_AES_128_CBC_SHA_12          0xc00e
+#define OP_PCL_SSL30_AES_128_CBC_SHA_13          0xc013
+#define OP_PCL_SSL30_AES_128_CBC_SHA_14          0xc018
+#define OP_PCL_SSL30_AES_128_CBC_SHA_15          0xc01d
+#define OP_PCL_SSL30_AES_128_CBC_SHA_16          0xc01e
+#define OP_PCL_SSL30_AES_128_CBC_SHA_17          0xc01f
+
+#define OP_PCL_SSL30_AES_256_CBC_SHA             0x0035
+#define OP_PCL_SSL30_AES_256_CBC_SHA_2           0x0036
+#define OP_PCL_SSL30_AES_256_CBC_SHA_3           0x0037
+#define OP_PCL_SSL30_AES_256_CBC_SHA_4           0x0038
+#define OP_PCL_SSL30_AES_256_CBC_SHA_5           0x0039
+#define OP_PCL_SSL30_AES_256_CBC_SHA_6           0x003a
+#define OP_PCL_SSL30_AES_256_CBC_SHA_7           0x008d
+#define OP_PCL_SSL30_AES_256_CBC_SHA_8           0x0091
+#define OP_PCL_SSL30_AES_256_CBC_SHA_9           0x0095
+#define OP_PCL_SSL30_AES_256_CBC_SHA_10          0xc005
+#define OP_PCL_SSL30_AES_256_CBC_SHA_11          0xc00a
+#define OP_PCL_SSL30_AES_256_CBC_SHA_12          0xc00f
+#define OP_PCL_SSL30_AES_256_CBC_SHA_13          0xc014
+#define OP_PCL_SSL30_AES_256_CBC_SHA_14          0xc019
+#define OP_PCL_SSL30_AES_256_CBC_SHA_15          0xc020
+#define OP_PCL_SSL30_AES_256_CBC_SHA_16          0xc021
+#define OP_PCL_SSL30_AES_256_CBC_SHA_17          0xc022
+
+#define OP_PCL_SSL30_3DES_EDE_CBC_MD5            0x0023
+
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA            0x001f
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_2          0x008b
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_3          0x008f
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_4          0x0093
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_5          0x000a
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_6          0x000d
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_7          0x0010
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_8          0x0013
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_9          0x0016
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_10         0x001b
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_11         0xc003
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_12         0xc008
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_13         0xc00d
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_14         0xc012
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_15         0xc017
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_16         0xc01a
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_17         0xc01b
+#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_18         0xc01c
+
+#define OP_PCL_SSL30_DES40_CBC_MD5               0x0029
+
+#define OP_PCL_SSL30_DES_CBC_MD5                 0x0022
+
+#define OP_PCL_SSL30_DES40_CBC_SHA               0x0008
+#define OP_PCL_SSL30_DES40_CBC_SHA_2             0x000b
+#define OP_PCL_SSL30_DES40_CBC_SHA_3             0x000e
+#define OP_PCL_SSL30_DES40_CBC_SHA_4             0x0011
+#define OP_PCL_SSL30_DES40_CBC_SHA_5             0x0014
+#define OP_PCL_SSL30_DES40_CBC_SHA_6             0x0019
+#define OP_PCL_SSL30_DES40_CBC_SHA_7             0x0026
+
+#define OP_PCL_SSL30_DES_CBC_SHA                 0x001e
+#define OP_PCL_SSL30_DES_CBC_SHA_2               0x0009
+#define OP_PCL_SSL30_DES_CBC_SHA_3               0x000c
+#define OP_PCL_SSL30_DES_CBC_SHA_4               0x000f
+#define OP_PCL_SSL30_DES_CBC_SHA_5               0x0012
+#define OP_PCL_SSL30_DES_CBC_SHA_6               0x0015
+#define OP_PCL_SSL30_DES_CBC_SHA_7               0x001a
+
+#define OP_PCL_SSL30_RC4_128_MD5                 0x0024
+#define OP_PCL_SSL30_RC4_128_MD5_2               0x0004
+#define OP_PCL_SSL30_RC4_128_MD5_3               0x0018
+
+#define OP_PCL_SSL30_RC4_40_MD5                  0x002b
+#define OP_PCL_SSL30_RC4_40_MD5_2                0x0003
+#define OP_PCL_SSL30_RC4_40_MD5_3                0x0017
+
+#define OP_PCL_SSL30_RC4_128_SHA                 0x0020
+#define OP_PCL_SSL30_RC4_128_SHA_2               0x008a
+#define OP_PCL_SSL30_RC4_128_SHA_3               0x008e
+#define OP_PCL_SSL30_RC4_128_SHA_4               0x0092
+#define OP_PCL_SSL30_RC4_128_SHA_5               0x0005
+#define OP_PCL_SSL30_RC4_128_SHA_6               0xc002
+#define OP_PCL_SSL30_RC4_128_SHA_7               0xc007
+#define OP_PCL_SSL30_RC4_128_SHA_8               0xc00c
+#define OP_PCL_SSL30_RC4_128_SHA_9               0xc011
+#define OP_PCL_SSL30_RC4_128_SHA_10              0xc016
+
+#define OP_PCL_SSL30_RC4_40_SHA                  0x0028
+
+
+/* For TLS 1.0 - OP_PCLID_TLS10 */
+#define OP_PCL_TLS10_AES_128_CBC_SHA             0x002f
+#define OP_PCL_TLS10_AES_128_CBC_SHA_2           0x0030
+#define OP_PCL_TLS10_AES_128_CBC_SHA_3           0x0031
+#define OP_PCL_TLS10_AES_128_CBC_SHA_4           0x0032
+#define OP_PCL_TLS10_AES_128_CBC_SHA_5           0x0033
+#define OP_PCL_TLS10_AES_128_CBC_SHA_6           0x0034
+#define OP_PCL_TLS10_AES_128_CBC_SHA_7           0x008c
+#define OP_PCL_TLS10_AES_128_CBC_SHA_8           0x0090
+#define OP_PCL_TLS10_AES_128_CBC_SHA_9           0x0094
+#define OP_PCL_TLS10_AES_128_CBC_SHA_10          0xc004
+#define OP_PCL_TLS10_AES_128_CBC_SHA_11          0xc009
+#define OP_PCL_TLS10_AES_128_CBC_SHA_12          0xc00e
+#define OP_PCL_TLS10_AES_128_CBC_SHA_13          0xc013
+#define OP_PCL_TLS10_AES_128_CBC_SHA_14          0xc018
+#define OP_PCL_TLS10_AES_128_CBC_SHA_15          0xc01d
+#define OP_PCL_TLS10_AES_128_CBC_SHA_16          0xc01e
+#define OP_PCL_TLS10_AES_128_CBC_SHA_17          0xc01f
+
+#define OP_PCL_TLS10_AES_256_CBC_SHA             0x0035
+#define OP_PCL_TLS10_AES_256_CBC_SHA_2           0x0036
+#define OP_PCL_TLS10_AES_256_CBC_SHA_3           0x0037
+#define OP_PCL_TLS10_AES_256_CBC_SHA_4           0x0038
+#define OP_PCL_TLS10_AES_256_CBC_SHA_5           0x0039
+#define OP_PCL_TLS10_AES_256_CBC_SHA_6           0x003a
+#define OP_PCL_TLS10_AES_256_CBC_SHA_7           0x008d
+#define OP_PCL_TLS10_AES_256_CBC_SHA_8           0x0091
+#define OP_PCL_TLS10_AES_256_CBC_SHA_9           0x0095
+#define OP_PCL_TLS10_AES_256_CBC_SHA_10          0xc005
+#define OP_PCL_TLS10_AES_256_CBC_SHA_11          0xc00a
+#define OP_PCL_TLS10_AES_256_CBC_SHA_12          0xc00f
+#define OP_PCL_TLS10_AES_256_CBC_SHA_13          0xc014
+#define OP_PCL_TLS10_AES_256_CBC_SHA_14          0xc019
+#define OP_PCL_TLS10_AES_256_CBC_SHA_15          0xc020
+#define OP_PCL_TLS10_AES_256_CBC_SHA_16          0xc021
+#define OP_PCL_TLS10_AES_256_CBC_SHA_17          0xc022
+
+/* #define OP_PCL_TLS10_3DES_EDE_CBC_MD5            0x0023 */
+
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA            0x001f
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_2          0x008b
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_3          0x008f
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_4          0x0093
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_5          0x000a
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_6          0x000d
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_7          0x0010
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_8          0x0013
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_9          0x0016
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_10         0x001b
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_11         0xc003
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_12         0xc008
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_13         0xc00d
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_14         0xc012
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_15         0xc017
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_16         0xc01a
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_17         0xc01b
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_18         0xc01c
+
+#define OP_PCL_TLS10_DES40_CBC_MD5               0x0029
+
+#define OP_PCL_TLS10_DES_CBC_MD5                 0x0022
+
+#define OP_PCL_TLS10_DES40_CBC_SHA               0x0008
+#define OP_PCL_TLS10_DES40_CBC_SHA_2             0x000b
+#define OP_PCL_TLS10_DES40_CBC_SHA_3             0x000e
+#define OP_PCL_TLS10_DES40_CBC_SHA_4             0x0011
+#define OP_PCL_TLS10_DES40_CBC_SHA_5             0x0014
+#define OP_PCL_TLS10_DES40_CBC_SHA_6             0x0019
+#define OP_PCL_TLS10_DES40_CBC_SHA_7             0x0026
+
+
+#define OP_PCL_TLS10_DES_CBC_SHA                 0x001e
+#define OP_PCL_TLS10_DES_CBC_SHA_2               0x0009
+#define OP_PCL_TLS10_DES_CBC_SHA_3               0x000c
+#define OP_PCL_TLS10_DES_CBC_SHA_4               0x000f
+#define OP_PCL_TLS10_DES_CBC_SHA_5               0x0012
+#define OP_PCL_TLS10_DES_CBC_SHA_6               0x0015
+#define OP_PCL_TLS10_DES_CBC_SHA_7               0x001a
+
+#define OP_PCL_TLS10_RC4_128_MD5                 0x0024
+#define OP_PCL_TLS10_RC4_128_MD5_2               0x0004
+#define OP_PCL_TLS10_RC4_128_MD5_3               0x0018
+
+#define OP_PCL_TLS10_RC4_40_MD5                  0x002b
+#define OP_PCL_TLS10_RC4_40_MD5_2                0x0003
+#define OP_PCL_TLS10_RC4_40_MD5_3                0x0017
+
+#define OP_PCL_TLS10_RC4_128_SHA                 0x0020
+#define OP_PCL_TLS10_RC4_128_SHA_2               0x008a
+#define OP_PCL_TLS10_RC4_128_SHA_3               0x008e
+#define OP_PCL_TLS10_RC4_128_SHA_4               0x0092
+#define OP_PCL_TLS10_RC4_128_SHA_5               0x0005
+#define OP_PCL_TLS10_RC4_128_SHA_6               0xc002
+#define OP_PCL_TLS10_RC4_128_SHA_7               0xc007
+#define OP_PCL_TLS10_RC4_128_SHA_8               0xc00c
+#define OP_PCL_TLS10_RC4_128_SHA_9               0xc011
+#define OP_PCL_TLS10_RC4_128_SHA_10              0xc016
+
+#define OP_PCL_TLS10_RC4_40_SHA                  0x0028
+
+#define OP_PCL_TLS10_3DES_EDE_CBC_MD5            0xff23
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA160         0xff30
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA224         0xff34
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA256         0xff36
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA384         0xff33
+#define OP_PCL_TLS10_3DES_EDE_CBC_SHA512         0xff35
+#define OP_PCL_TLS10_AES_128_CBC_SHA160          0xff80
+#define OP_PCL_TLS10_AES_128_CBC_SHA224          0xff84
+#define OP_PCL_TLS10_AES_128_CBC_SHA256          0xff86
+#define OP_PCL_TLS10_AES_128_CBC_SHA384          0xff83
+#define OP_PCL_TLS10_AES_128_CBC_SHA512          0xff85
+#define OP_PCL_TLS10_AES_192_CBC_SHA160          0xff20
+#define OP_PCL_TLS10_AES_192_CBC_SHA224          0xff24
+#define OP_PCL_TLS10_AES_192_CBC_SHA256          0xff26
+#define OP_PCL_TLS10_AES_192_CBC_SHA384          0xff23
+#define OP_PCL_TLS10_AES_192_CBC_SHA512          0xff25
+#define OP_PCL_TLS10_AES_256_CBC_SHA160          0xff60
+#define OP_PCL_TLS10_AES_256_CBC_SHA224          0xff64
+#define OP_PCL_TLS10_AES_256_CBC_SHA256          0xff66
+#define OP_PCL_TLS10_AES_256_CBC_SHA384          0xff63
+#define OP_PCL_TLS10_AES_256_CBC_SHA512          0xff65
+
+
+
+/* For TLS 1.1 - OP_PCLID_TLS11 */
+#define OP_PCL_TLS11_AES_128_CBC_SHA             0x002f
+#define OP_PCL_TLS11_AES_128_CBC_SHA_2           0x0030
+#define OP_PCL_TLS11_AES_128_CBC_SHA_3           0x0031
+#define OP_PCL_TLS11_AES_128_CBC_SHA_4           0x0032
+#define OP_PCL_TLS11_AES_128_CBC_SHA_5           0x0033
+#define OP_PCL_TLS11_AES_128_CBC_SHA_6           0x0034
+#define OP_PCL_TLS11_AES_128_CBC_SHA_7           0x008c
+#define OP_PCL_TLS11_AES_128_CBC_SHA_8           0x0090
+#define OP_PCL_TLS11_AES_128_CBC_SHA_9           0x0094
+#define OP_PCL_TLS11_AES_128_CBC_SHA_10          0xc004
+#define OP_PCL_TLS11_AES_128_CBC_SHA_11          0xc009
+#define OP_PCL_TLS11_AES_128_CBC_SHA_12          0xc00e
+#define OP_PCL_TLS11_AES_128_CBC_SHA_13          0xc013
+#define OP_PCL_TLS11_AES_128_CBC_SHA_14          0xc018
+#define OP_PCL_TLS11_AES_128_CBC_SHA_15          0xc01d
+#define OP_PCL_TLS11_AES_128_CBC_SHA_16          0xc01e
+#define OP_PCL_TLS11_AES_128_CBC_SHA_17          0xc01f
+
+#define OP_PCL_TLS11_AES_256_CBC_SHA             0x0035
+#define OP_PCL_TLS11_AES_256_CBC_SHA_2           0x0036
+#define OP_PCL_TLS11_AES_256_CBC_SHA_3           0x0037
+#define OP_PCL_TLS11_AES_256_CBC_SHA_4           0x0038
+#define OP_PCL_TLS11_AES_256_CBC_SHA_5           0x0039
+#define OP_PCL_TLS11_AES_256_CBC_SHA_6           0x003a
+#define OP_PCL_TLS11_AES_256_CBC_SHA_7           0x008d
+#define OP_PCL_TLS11_AES_256_CBC_SHA_8           0x0091
+#define OP_PCL_TLS11_AES_256_CBC_SHA_9           0x0095
+#define OP_PCL_TLS11_AES_256_CBC_SHA_10          0xc005
+#define OP_PCL_TLS11_AES_256_CBC_SHA_11          0xc00a
+#define OP_PCL_TLS11_AES_256_CBC_SHA_12          0xc00f
+#define OP_PCL_TLS11_AES_256_CBC_SHA_13          0xc014
+#define OP_PCL_TLS11_AES_256_CBC_SHA_14          0xc019
+#define OP_PCL_TLS11_AES_256_CBC_SHA_15          0xc020
+#define OP_PCL_TLS11_AES_256_CBC_SHA_16          0xc021
+#define OP_PCL_TLS11_AES_256_CBC_SHA_17          0xc022
+
+/* #define OP_PCL_TLS11_3DES_EDE_CBC_MD5            0x0023 */
+
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA            0x001f
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_2          0x008b
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_3          0x008f
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_4          0x0093
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_5          0x000a
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_6          0x000d
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_7          0x0010
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_8          0x0013
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_9          0x0016
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_10         0x001b
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_11         0xc003
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_12         0xc008
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_13         0xc00d
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_14         0xc012
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_15         0xc017
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_16         0xc01a
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_17         0xc01b
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_18         0xc01c
+
+#define OP_PCL_TLS11_DES40_CBC_MD5               0x0029
+
+#define OP_PCL_TLS11_DES_CBC_MD5                 0x0022
+
+#define OP_PCL_TLS11_DES40_CBC_SHA               0x0008
+#define OP_PCL_TLS11_DES40_CBC_SHA_2             0x000b
+#define OP_PCL_TLS11_DES40_CBC_SHA_3             0x000e
+#define OP_PCL_TLS11_DES40_CBC_SHA_4             0x0011
+#define OP_PCL_TLS11_DES40_CBC_SHA_5             0x0014
+#define OP_PCL_TLS11_DES40_CBC_SHA_6             0x0019
+#define OP_PCL_TLS11_DES40_CBC_SHA_7             0x0026
+
+#define OP_PCL_TLS11_DES_CBC_SHA                 0x001e
+#define OP_PCL_TLS11_DES_CBC_SHA_2               0x0009
+#define OP_PCL_TLS11_DES_CBC_SHA_3               0x000c
+#define OP_PCL_TLS11_DES_CBC_SHA_4               0x000f
+#define OP_PCL_TLS11_DES_CBC_SHA_5               0x0012
+#define OP_PCL_TLS11_DES_CBC_SHA_6               0x0015
+#define OP_PCL_TLS11_DES_CBC_SHA_7               0x001a
+
+#define OP_PCL_TLS11_RC4_128_MD5                 0x0024
+#define OP_PCL_TLS11_RC4_128_MD5_2               0x0004
+#define OP_PCL_TLS11_RC4_128_MD5_3               0x0018
+
+#define OP_PCL_TLS11_RC4_40_MD5                  0x002b
+#define OP_PCL_TLS11_RC4_40_MD5_2                0x0003
+#define OP_PCL_TLS11_RC4_40_MD5_3                0x0017
+
+#define OP_PCL_TLS11_RC4_128_SHA                 0x0020
+#define OP_PCL_TLS11_RC4_128_SHA_2               0x008a
+#define OP_PCL_TLS11_RC4_128_SHA_3               0x008e
+#define OP_PCL_TLS11_RC4_128_SHA_4               0x0092
+#define OP_PCL_TLS11_RC4_128_SHA_5               0x0005
+#define OP_PCL_TLS11_RC4_128_SHA_6               0xc002
+#define OP_PCL_TLS11_RC4_128_SHA_7               0xc007
+#define OP_PCL_TLS11_RC4_128_SHA_8               0xc00c
+#define OP_PCL_TLS11_RC4_128_SHA_9               0xc011
+#define OP_PCL_TLS11_RC4_128_SHA_10              0xc016
+
+#define OP_PCL_TLS11_RC4_40_SHA                  0x0028
+
+#define OP_PCL_TLS11_3DES_EDE_CBC_MD5            0xff23
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA160         0xff30
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA224         0xff34
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA256         0xff36
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA384         0xff33
+#define OP_PCL_TLS11_3DES_EDE_CBC_SHA512         0xff35
+#define OP_PCL_TLS11_AES_128_CBC_SHA160          0xff80
+#define OP_PCL_TLS11_AES_128_CBC_SHA224          0xff84
+#define OP_PCL_TLS11_AES_128_CBC_SHA256          0xff86
+#define OP_PCL_TLS11_AES_128_CBC_SHA384          0xff83
+#define OP_PCL_TLS11_AES_128_CBC_SHA512          0xff85
+#define OP_PCL_TLS11_AES_192_CBC_SHA160          0xff20
+#define OP_PCL_TLS11_AES_192_CBC_SHA224          0xff24
+#define OP_PCL_TLS11_AES_192_CBC_SHA256          0xff26
+#define OP_PCL_TLS11_AES_192_CBC_SHA384          0xff23
+#define OP_PCL_TLS11_AES_192_CBC_SHA512          0xff25
+#define OP_PCL_TLS11_AES_256_CBC_SHA160          0xff60
+#define OP_PCL_TLS11_AES_256_CBC_SHA224          0xff64
+#define OP_PCL_TLS11_AES_256_CBC_SHA256          0xff66
+#define OP_PCL_TLS11_AES_256_CBC_SHA384          0xff63
+#define OP_PCL_TLS11_AES_256_CBC_SHA512          0xff65
+
+
+/* For TLS 1.2 - OP_PCLID_TLS12 */
+#define OP_PCL_TLS12_AES_128_CBC_SHA             0x002f
+#define OP_PCL_TLS12_AES_128_CBC_SHA_2           0x0030
+#define OP_PCL_TLS12_AES_128_CBC_SHA_3           0x0031
+#define OP_PCL_TLS12_AES_128_CBC_SHA_4           0x0032
+#define OP_PCL_TLS12_AES_128_CBC_SHA_5           0x0033
+#define OP_PCL_TLS12_AES_128_CBC_SHA_6           0x0034
+#define OP_PCL_TLS12_AES_128_CBC_SHA_7           0x008c
+#define OP_PCL_TLS12_AES_128_CBC_SHA_8           0x0090
+#define OP_PCL_TLS12_AES_128_CBC_SHA_9           0x0094
+#define OP_PCL_TLS12_AES_128_CBC_SHA_10          0xc004
+#define OP_PCL_TLS12_AES_128_CBC_SHA_11          0xc009
+#define OP_PCL_TLS12_AES_128_CBC_SHA_12          0xc00e
+#define OP_PCL_TLS12_AES_128_CBC_SHA_13          0xc013
+#define OP_PCL_TLS12_AES_128_CBC_SHA_14          0xc018
+#define OP_PCL_TLS12_AES_128_CBC_SHA_15          0xc01d
+#define OP_PCL_TLS12_AES_128_CBC_SHA_16          0xc01e
+#define OP_PCL_TLS12_AES_128_CBC_SHA_17          0xc01f
+
+#define OP_PCL_TLS12_AES_256_CBC_SHA             0x0035
+#define OP_PCL_TLS12_AES_256_CBC_SHA_2           0x0036
+#define OP_PCL_TLS12_AES_256_CBC_SHA_3           0x0037
+#define OP_PCL_TLS12_AES_256_CBC_SHA_4           0x0038
+#define OP_PCL_TLS12_AES_256_CBC_SHA_5           0x0039
+#define OP_PCL_TLS12_AES_256_CBC_SHA_6           0x003a
+#define OP_PCL_TLS12_AES_256_CBC_SHA_7           0x008d
+#define OP_PCL_TLS12_AES_256_CBC_SHA_8           0x0091
+#define OP_PCL_TLS12_AES_256_CBC_SHA_9           0x0095
+#define OP_PCL_TLS12_AES_256_CBC_SHA_10          0xc005
+#define OP_PCL_TLS12_AES_256_CBC_SHA_11          0xc00a
+#define OP_PCL_TLS12_AES_256_CBC_SHA_12          0xc00f
+#define OP_PCL_TLS12_AES_256_CBC_SHA_13          0xc014
+#define OP_PCL_TLS12_AES_256_CBC_SHA_14          0xc019
+#define OP_PCL_TLS12_AES_256_CBC_SHA_15          0xc020
+#define OP_PCL_TLS12_AES_256_CBC_SHA_16          0xc021
+#define OP_PCL_TLS12_AES_256_CBC_SHA_17          0xc022
+
+/* #define OP_PCL_TLS12_3DES_EDE_CBC_MD5            0x0023 */
+
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA            0x001f
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_2          0x008b
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_3          0x008f
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_4          0x0093
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_5          0x000a
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_6          0x000d
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_7          0x0010
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_8          0x0013
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_9          0x0016
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_10         0x001b
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_11         0xc003
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_12         0xc008
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_13         0xc00d
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_14         0xc012
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_15         0xc017
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_16         0xc01a
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_17         0xc01b
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_18         0xc01c
+
+#define OP_PCL_TLS12_DES40_CBC_MD5               0x0029
+
+#define OP_PCL_TLS12_DES_CBC_MD5                 0x0022
+
+#define OP_PCL_TLS12_DES40_CBC_SHA               0x0008
+#define OP_PCL_TLS12_DES40_CBC_SHA_2             0x000b
+#define OP_PCL_TLS12_DES40_CBC_SHA_3             0x000e
+#define OP_PCL_TLS12_DES40_CBC_SHA_4             0x0011
+#define OP_PCL_TLS12_DES40_CBC_SHA_5             0x0014
+#define OP_PCL_TLS12_DES40_CBC_SHA_6             0x0019
+#define OP_PCL_TLS12_DES40_CBC_SHA_7             0x0026
+
+#define OP_PCL_TLS12_DES_CBC_SHA                 0x001e
+#define OP_PCL_TLS12_DES_CBC_SHA_2               0x0009
+#define OP_PCL_TLS12_DES_CBC_SHA_3               0x000c
+#define OP_PCL_TLS12_DES_CBC_SHA_4               0x000f
+#define OP_PCL_TLS12_DES_CBC_SHA_5               0x0012
+#define OP_PCL_TLS12_DES_CBC_SHA_6               0x0015
+#define OP_PCL_TLS12_DES_CBC_SHA_7               0x001a
+
+#define OP_PCL_TLS12_RC4_128_MD5                 0x0024
+#define OP_PCL_TLS12_RC4_128_MD5_2               0x0004
+#define OP_PCL_TLS12_RC4_128_MD5_3               0x0018
+
+#define OP_PCL_TLS12_RC4_40_MD5                  0x002b
+#define OP_PCL_TLS12_RC4_40_MD5_2                0x0003
+#define OP_PCL_TLS12_RC4_40_MD5_3                0x0017
+
+#define OP_PCL_TLS12_RC4_128_SHA                 0x0020
+#define OP_PCL_TLS12_RC4_128_SHA_2               0x008a
+#define OP_PCL_TLS12_RC4_128_SHA_3               0x008e
+#define OP_PCL_TLS12_RC4_128_SHA_4               0x0092
+#define OP_PCL_TLS12_RC4_128_SHA_5               0x0005
+#define OP_PCL_TLS12_RC4_128_SHA_6               0xc002
+#define OP_PCL_TLS12_RC4_128_SHA_7               0xc007
+#define OP_PCL_TLS12_RC4_128_SHA_8               0xc00c
+#define OP_PCL_TLS12_RC4_128_SHA_9               0xc011
+#define OP_PCL_TLS12_RC4_128_SHA_10              0xc016
+
+#define OP_PCL_TLS12_RC4_40_SHA                  0x0028
+
+/* #define OP_PCL_TLS12_AES_128_CBC_SHA256          0x003c */
+#define OP_PCL_TLS12_AES_128_CBC_SHA256_2        0x003e
+#define OP_PCL_TLS12_AES_128_CBC_SHA256_3        0x003f
+#define OP_PCL_TLS12_AES_128_CBC_SHA256_4        0x0040
+#define OP_PCL_TLS12_AES_128_CBC_SHA256_5        0x0067
+#define OP_PCL_TLS12_AES_128_CBC_SHA256_6        0x006c
+
+/* #define OP_PCL_TLS12_AES_256_CBC_SHA256          0x003d */
+#define OP_PCL_TLS12_AES_256_CBC_SHA256_2        0x0068
+#define OP_PCL_TLS12_AES_256_CBC_SHA256_3        0x0069
+#define OP_PCL_TLS12_AES_256_CBC_SHA256_4        0x006a
+#define OP_PCL_TLS12_AES_256_CBC_SHA256_5        0x006b
+#define OP_PCL_TLS12_AES_256_CBC_SHA256_6        0x006d
+
+/* AEAD_AES_xxx_CCM/GCM remain to be defined... */
+
+#define OP_PCL_TLS12_3DES_EDE_CBC_MD5            0xff23
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA160         0xff30
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA224         0xff34
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA256         0xff36
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA384         0xff33
+#define OP_PCL_TLS12_3DES_EDE_CBC_SHA512         0xff35
+#define OP_PCL_TLS12_AES_128_CBC_SHA160          0xff80
+#define OP_PCL_TLS12_AES_128_CBC_SHA224          0xff84
+#define OP_PCL_TLS12_AES_128_CBC_SHA256          0xff86
+#define OP_PCL_TLS12_AES_128_CBC_SHA384          0xff83
+#define OP_PCL_TLS12_AES_128_CBC_SHA512          0xff85
+#define OP_PCL_TLS12_AES_192_CBC_SHA160          0xff20
+#define OP_PCL_TLS12_AES_192_CBC_SHA224          0xff24
+#define OP_PCL_TLS12_AES_192_CBC_SHA256          0xff26
+#define OP_PCL_TLS12_AES_192_CBC_SHA384          0xff23
+#define OP_PCL_TLS12_AES_192_CBC_SHA512          0xff25
+#define OP_PCL_TLS12_AES_256_CBC_SHA160          0xff60
+#define OP_PCL_TLS12_AES_256_CBC_SHA224          0xff64
+#define OP_PCL_TLS12_AES_256_CBC_SHA256          0xff66
+#define OP_PCL_TLS12_AES_256_CBC_SHA384          0xff63
+#define OP_PCL_TLS12_AES_256_CBC_SHA512          0xff65
+
+/* For DTLS - OP_PCLID_DTLS */
+
+#define OP_PCL_DTLS_AES_128_CBC_SHA              0x002f
+#define OP_PCL_DTLS_AES_128_CBC_SHA_2            0x0030
+#define OP_PCL_DTLS_AES_128_CBC_SHA_3            0x0031
+#define OP_PCL_DTLS_AES_128_CBC_SHA_4            0x0032
+#define OP_PCL_DTLS_AES_128_CBC_SHA_5            0x0033
+#define OP_PCL_DTLS_AES_128_CBC_SHA_6            0x0034
+#define OP_PCL_DTLS_AES_128_CBC_SHA_7            0x008c
+#define OP_PCL_DTLS_AES_128_CBC_SHA_8            0x0090
+#define OP_PCL_DTLS_AES_128_CBC_SHA_9            0x0094
+#define OP_PCL_DTLS_AES_128_CBC_SHA_10           0xc004
+#define OP_PCL_DTLS_AES_128_CBC_SHA_11           0xc009
+#define OP_PCL_DTLS_AES_128_CBC_SHA_12           0xc00e
+#define OP_PCL_DTLS_AES_128_CBC_SHA_13           0xc013
+#define OP_PCL_DTLS_AES_128_CBC_SHA_14           0xc018
+#define OP_PCL_DTLS_AES_128_CBC_SHA_15           0xc01d
+#define OP_PCL_DTLS_AES_128_CBC_SHA_16           0xc01e
+#define OP_PCL_DTLS_AES_128_CBC_SHA_17           0xc01f
+
+#define OP_PCL_DTLS_AES_256_CBC_SHA              0x0035
+#define OP_PCL_DTLS_AES_256_CBC_SHA_2            0x0036
+#define OP_PCL_DTLS_AES_256_CBC_SHA_3            0x0037
+#define OP_PCL_DTLS_AES_256_CBC_SHA_4            0x0038
+#define OP_PCL_DTLS_AES_256_CBC_SHA_5            0x0039
+#define OP_PCL_DTLS_AES_256_CBC_SHA_6            0x003a
+#define OP_PCL_DTLS_AES_256_CBC_SHA_7            0x008d
+#define OP_PCL_DTLS_AES_256_CBC_SHA_8            0x0091
+#define OP_PCL_DTLS_AES_256_CBC_SHA_9            0x0095
+#define OP_PCL_DTLS_AES_256_CBC_SHA_10           0xc005
+#define OP_PCL_DTLS_AES_256_CBC_SHA_11           0xc00a
+#define OP_PCL_DTLS_AES_256_CBC_SHA_12           0xc00f
+#define OP_PCL_DTLS_AES_256_CBC_SHA_13           0xc014
+#define OP_PCL_DTLS_AES_256_CBC_SHA_14           0xc019
+#define OP_PCL_DTLS_AES_256_CBC_SHA_15           0xc020
+#define OP_PCL_DTLS_AES_256_CBC_SHA_16           0xc021
+#define OP_PCL_DTLS_AES_256_CBC_SHA_17           0xc022
+
+/* #define OP_PCL_DTLS_3DES_EDE_CBC_MD5             0x0023 */
+
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA             0x001f
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_2           0x008b
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_3           0x008f
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_4           0x0093
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_5           0x000a
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_6           0x000d
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_7           0x0010
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_8           0x0013
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_9           0x0016
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_10          0x001b
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_11          0xc003
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_12          0xc008
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_13          0xc00d
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_14          0xc012
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_15          0xc017
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_16          0xc01a
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_17          0xc01b
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_18          0xc01c
+
+#define OP_PCL_DTLS_DES40_CBC_MD5                0x0029
+
+#define OP_PCL_DTLS_DES_CBC_MD5                  0x0022
+
+#define OP_PCL_DTLS_DES40_CBC_SHA                0x0008
+#define OP_PCL_DTLS_DES40_CBC_SHA_2              0x000b
+#define OP_PCL_DTLS_DES40_CBC_SHA_3              0x000e
+#define OP_PCL_DTLS_DES40_CBC_SHA_4              0x0011
+#define OP_PCL_DTLS_DES40_CBC_SHA_5              0x0014
+#define OP_PCL_DTLS_DES40_CBC_SHA_6              0x0019
+#define OP_PCL_DTLS_DES40_CBC_SHA_7              0x0026
+
+
+#define OP_PCL_DTLS_DES_CBC_SHA                  0x001e
+#define OP_PCL_DTLS_DES_CBC_SHA_2                0x0009
+#define OP_PCL_DTLS_DES_CBC_SHA_3                0x000c
+#define OP_PCL_DTLS_DES_CBC_SHA_4                0x000f
+#define OP_PCL_DTLS_DES_CBC_SHA_5                0x0012
+#define OP_PCL_DTLS_DES_CBC_SHA_6                0x0015
+#define OP_PCL_DTLS_DES_CBC_SHA_7                0x001a
+
+
+#define OP_PCL_DTLS_3DES_EDE_CBC_MD5             0xff23
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA160          0xff30
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA224          0xff34
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA256          0xff36
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA384          0xff33
+#define OP_PCL_DTLS_3DES_EDE_CBC_SHA512          0xff35
+#define OP_PCL_DTLS_AES_128_CBC_SHA160           0xff80
+#define OP_PCL_DTLS_AES_128_CBC_SHA224           0xff84
+#define OP_PCL_DTLS_AES_128_CBC_SHA256           0xff86
+#define OP_PCL_DTLS_AES_128_CBC_SHA384           0xff83
+#define OP_PCL_DTLS_AES_128_CBC_SHA512           0xff85
+#define OP_PCL_DTLS_AES_192_CBC_SHA160           0xff20
+#define OP_PCL_DTLS_AES_192_CBC_SHA224           0xff24
+#define OP_PCL_DTLS_AES_192_CBC_SHA256           0xff26
+#define OP_PCL_DTLS_AES_192_CBC_SHA384           0xff23
+#define OP_PCL_DTLS_AES_192_CBC_SHA512           0xff25
+#define OP_PCL_DTLS_AES_256_CBC_SHA160           0xff60
+#define OP_PCL_DTLS_AES_256_CBC_SHA224           0xff64
+#define OP_PCL_DTLS_AES_256_CBC_SHA256           0xff66
+#define OP_PCL_DTLS_AES_256_CBC_SHA384           0xff63
+#define OP_PCL_DTLS_AES_256_CBC_SHA512           0xff65
+
+/* 802.16 WiMAX protinfos */
+#define OP_PCL_WIMAX_OFDM                        0x0201
+#define OP_PCL_WIMAX_OFDMA                       0x0231
+
+/* 802.11 WiFi protinfos */
+#define OP_PCL_WIFI                              0xac04
+
+/* MacSec protinfos */
+#define OP_PCL_MACSEC                            0x0001
+
+/* PKI unidirectional protocol protinfo bits */
+#define OP_PCL_PKPROT_TEST                       0x0008
+#define OP_PCL_PKPROT_DECRYPT                    0x0004
+#define OP_PCL_PKPROT_ECC                        0x0002
+#define OP_PCL_PKPROT_F2M                        0x0001
+
+/* For non-protocol/alg-only op commands */
+#define OP_ALG_TYPE_SHIFT	24
+#define OP_ALG_TYPE_MASK	(0x7 << OP_ALG_TYPE_SHIFT)
+#define OP_ALG_TYPE_CLASS1	2
+#define OP_ALG_TYPE_CLASS2	4
+
+#define OP_ALG_ALGSEL_SHIFT	16
+#define OP_ALG_ALGSEL_MASK	(0xff << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SUBMASK	(0x0f << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_AES	(0x10 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_DES	(0x20 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_3DES	(0x21 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_ARC4	(0x30 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_MD5	(0x40 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SHA1	(0x41 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SHA224	(0x42 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SHA256	(0x43 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SHA384	(0x44 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SHA512	(0x45 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_RNG	(0x50 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SNOW	(0x60 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SNOW_F8	(0x60 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_KASUMI	(0x70 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_CRC	(0x90 << OP_ALG_ALGSEL_SHIFT)
+#define OP_ALG_ALGSEL_SNOW_F9	(0xA0 << OP_ALG_ALGSEL_SHIFT)
+
+#define OP_ALG_AAI_SHIFT	4
+#define OP_ALG_AAI_MASK		(0x1ff << OP_ALG_AAI_SHIFT)
+
+/* blockcipher AAI set */
+#define OP_ALG_AAI_CTR_MOD128	(0x00 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD8	(0x01 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD16	(0x02 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD24	(0x03 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD32	(0x04 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD40	(0x05 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD48	(0x06 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD56	(0x07 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD64	(0x08 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD72	(0x09 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD80	(0x0a << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD88	(0x0b << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD96	(0x0c << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD104	(0x0d << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD112	(0x0e << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_MOD120	(0x0f << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CBC		(0x10 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_ECB		(0x20 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CFB		(0x30 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_OFB		(0x40 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_XTS		(0x50 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CMAC		(0x60 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_XCBC_MAC	(0x70 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CCM		(0x80 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_GCM		(0x90 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CBC_XCBCMAC	(0xa0 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CTR_XCBCMAC	(0xb0 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CHECKODD	(0x80 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_DK		(0x100 << OP_ALG_AAI_SHIFT)
+
+/* randomizer AAI set */
+#define OP_ALG_AAI_RNG		(0x00 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_RNG_NOZERO	(0x10 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_RNG_ODD	(0x20 << OP_ALG_AAI_SHIFT)
+
+/* hmac/smac AAI set */
+#define OP_ALG_AAI_HASH		(0x00 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_HMAC		(0x01 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_SMAC		(0x02 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_HMAC_PRECOMP	(0x04 << OP_ALG_AAI_SHIFT)
+
+/* CRC AAI set*/
+#define OP_ALG_AAI_802		(0x01 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_3385		(0x02 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_CUST_POLY	(0x04 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_DIS		(0x10 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_DOS		(0x20 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_DOC		(0x40 << OP_ALG_AAI_SHIFT)
+
+/* Kasumi/SNOW AAI set */
+#define OP_ALG_AAI_F8		(0xc0 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_F9		(0xc8 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_GSM		(0x10 << OP_ALG_AAI_SHIFT)
+#define OP_ALG_AAI_EDGE		(0x20 << OP_ALG_AAI_SHIFT)
+
+
+#define OP_ALG_AS_SHIFT		2
+#define OP_ALG_AS_MASK		(0x3 << OP_ALG_AS_SHIFT)
+#define OP_ALG_AS_UPDATE	(0 << OP_ALG_AS_SHIFT)
+#define OP_ALG_AS_INIT		(1 << OP_ALG_AS_SHIFT)
+#define OP_ALG_AS_FINALIZE	(2 << OP_ALG_AS_SHIFT)
+#define OP_ALG_AS_INITFINAL	(3 << OP_ALG_AS_SHIFT)
+
+#define OP_ALG_ICV_SHIFT	1
+#define OP_ALG_ICV_MASK		(1 << OP_ALG_ICV_SHIFT)
+#define OP_ALG_ICV_OFF		(0 << OP_ALG_ICV_SHIFT)
+#define OP_ALG_ICV_ON		(1 << OP_ALG_ICV_SHIFT)
+
+#define OP_ALG_DIR_SHIFT	0
+#define OP_ALG_DIR_MASK		1
+#define OP_ALG_DECRYPT		0
+#define OP_ALG_ENCRYPT		1
+
+/* PKHA algorithm type set */
+#define OP_ALG_PK                    0x00800000
+#define OP_ALG_PK_FUN_MASK           0x3f /* clrmem, modmath, or cpymem */
+
+/* PKHA mode clear memory functions */
+#define OP_ALG_PKMODE_A_RAM          0x80000
+#define OP_ALG_PKMODE_B_RAM          0x40000
+#define OP_ALG_PKMODE_E_RAM          0x20000
+#define OP_ALG_PKMODE_N_RAM          0x10000
+#define OP_ALG_PKMODE_CLEARMEM       0x00001
+
+/* PKHA mode modular-arithmetic functions */
+#define OP_ALG_PKMODE_MOD_IN_MONTY   0x80000
+#define OP_ALG_PKMODE_MOD_OUT_MONTY  0x40000
+#define OP_ALG_PKMODE_MOD_F2M        0x20000
+#define OP_ALG_PKMODE_MOD_R2_IN      0x10000
+#define OP_ALG_PKMODE_PRJECTV        0x00800
+#define OP_ALG_PKMODE_TIME_EQ        0x400
+#define OP_ALG_PKMODE_OUT_B          0x000
+#define OP_ALG_PKMODE_OUT_A          0x100
+#define OP_ALG_PKMODE_MOD_ADD        0x002
+#define OP_ALG_PKMODE_MOD_SUB_AB     0x003
+#define OP_ALG_PKMODE_MOD_SUB_BA     0x004
+#define OP_ALG_PKMODE_MOD_MULT       0x005
+#define OP_ALG_PKMODE_MOD_EXPO       0x006
+#define OP_ALG_PKMODE_MOD_REDUCT     0x007
+#define OP_ALG_PKMODE_MOD_INV        0x008
+#define OP_ALG_PKMODE_MOD_ECC_ADD    0x009
+#define OP_ALG_PKMODE_MOD_ECC_DBL    0x00a
+#define OP_ALG_PKMODE_MOD_ECC_MULT   0x00b
+#define OP_ALG_PKMODE_MOD_MONT_CNST  0x00c
+#define OP_ALG_PKMODE_MOD_CRT_CNST   0x00d
+#define OP_ALG_PKMODE_MOD_GCD        0x00e
+#define OP_ALG_PKMODE_MOD_PRIMALITY  0x00f
+
+/* PKHA mode copy-memory functions */
+#define OP_ALG_PKMODE_SRC_REG_SHIFT  13
+#define OP_ALG_PKMODE_SRC_REG_MASK   (7 << OP_ALG_PKMODE_SRC_REG_SHIFT)
+#define OP_ALG_PKMODE_DST_REG_SHIFT  10
+#define OP_ALG_PKMODE_DST_REG_MASK   (7 << OP_ALG_PKMODE_DST_REG_SHIFT)
+#define OP_ALG_PKMODE_SRC_SEG_SHIFT  8
+#define OP_ALG_PKMODE_SRC_SEG_MASK   (3 << OP_ALG_PKMODE_SRC_SEG_SHIFT)
+#define OP_ALG_PKMODE_DST_SEG_SHIFT  6
+#define OP_ALG_PKMODE_DST_SEG_MASK   (3 << OP_ALG_PKMODE_DST_SEG_SHIFT)
+
+#define OP_ALG_PKMODE_SRC_REG_A      (0 << OP_ALG_PKMODE_SRC_REG_SHIFT)
+#define OP_ALG_PKMODE_SRC_REG_B      (1 << OP_ALG_PKMODE_SRC_REG_SHIFT)
+#define OP_ALG_PKMODE_SRC_REG_N      (3 << OP_ALG_PKMODE_SRC_REG_SHIFT)
+#define OP_ALG_PKMODE_DST_REG_A      (0 << OP_ALG_PKMODE_DST_REG_SHIFT)
+#define OP_ALG_PKMODE_DST_REG_B      (1 << OP_ALG_PKMODE_DST_REG_SHIFT)
+#define OP_ALG_PKMODE_DST_REG_E      (2 << OP_ALG_PKMODE_DST_REG_SHIFT)
+#define OP_ALG_PKMODE_DST_REG_N      (3 << OP_ALG_PKMODE_DST_REG_SHIFT)
+#define OP_ALG_PKMODE_SRC_SEG_0      (0 << OP_ALG_PKMODE_SRC_SEG_SHIFT)
+#define OP_ALG_PKMODE_SRC_SEG_1      (1 << OP_ALG_PKMODE_SRC_SEG_SHIFT)
+#define OP_ALG_PKMODE_SRC_SEG_2      (2 << OP_ALG_PKMODE_SRC_SEG_SHIFT)
+#define OP_ALG_PKMODE_SRC_SEG_3      (3 << OP_ALG_PKMODE_SRC_SEG_SHIFT)
+#define OP_ALG_PKMODE_DST_SEG_0      (0 << OP_ALG_PKMODE_DST_SEG_SHIFT)
+#define OP_ALG_PKMODE_DST_SEG_1      (1 << OP_ALG_PKMODE_DST_SEG_SHIFT)
+#define OP_ALG_PKMODE_DST_SEG_2      (2 << OP_ALG_PKMODE_DST_SEG_SHIFT)
+#define OP_ALG_PKMODE_DST_SEG_3      (3 << OP_ALG_PKMODE_DST_SEG_SHIFT)
+#define OP_ALG_PKMODE_CPYMEM_N_SZ    0x80
+#define OP_ALG_PKMODE_CPYMEM_SRC_SZ  0x81
+
+/*
+ * SEQ_IN_PTR Command Constructs
+ */
+
+/* Release Buffers */
+#define SQIN_RBS               0x04000000
+
+/* Sequence pointer is really a descriptor */
+#define SQIN_INL               0x02000000
+
+/* Sequence pointer is a scatter-gather table */
+#define SQIN_SGF               0x01000000
+
+/* Appends to a previous pointer */
+#define SQIN_PRE               0x00800000
+
+/* Use extended length following pointer */
+#define SQIN_EXT               0x00400000
+
+/* Restore sequence with pointer/length */
+#define SQIN_RTO               0x00200000
+
+/* Replace job descriptor */
+#define SQIN_RJD               0x00100000
+
+#define SQIN_LEN_SHIFT           0
+#define SQIN_LEN_MASK           (0xffff << SQIN_LEN_SHIFT)
+
+/*
+ * SEQ_OUT_PTR Command Constructs
+ */
+
+/* Sequence pointer is a scatter-gather table */
+#define SQOUT_SGF              0x01000000
+
+/* Appends to a previous pointer */
+#define SQOUT_PRE              0x00800000
+
+/* Restore sequence with pointer/length */
+#define SQOUT_RTO              0x00200000
+
+/* Use extended length following pointer */
+#define SQOUT_EXT              0x00400000
+
+#define SQOUT_LEN_SHIFT           0
+#define SQOUT_LEN_MASK           (0xffff << SQOUT_LEN_SHIFT)
+
+
+/*
+ * SIGNATURE Command Constructs
+ */
+
+/* TYPE field is all that's relevant */
+#define SIGN_TYPE_SHIFT         16
+#define SIGN_TYPE_MASK          (0x0f << SIGN_TYPE_SHIFT)
+
+#define SIGN_TYPE_FINAL         (0x00 << SIGN_TYPE_SHIFT)
+#define SIGN_TYPE_FINAL_RESTORE (0x01 << SIGN_TYPE_SHIFT)
+#define SIGN_TYPE_FINAL_NONZERO (0x02 << SIGN_TYPE_SHIFT)
+#define SIGN_TYPE_IMM_2         (0x0a << SIGN_TYPE_SHIFT)
+#define SIGN_TYPE_IMM_3         (0x0b << SIGN_TYPE_SHIFT)
+#define SIGN_TYPE_IMM_4         (0x0c << SIGN_TYPE_SHIFT)
+
+/*
+ * MOVE Command Constructs
+ */
+
+#define MOVE_AUX_SHIFT          25
+#define MOVE_AUX_MASK           (3 << MOVE_AUX_SHIFT)
+#define MOVE_AUX_MS             (2 << MOVE_AUX_SHIFT)
+#define MOVE_AUX_LS             (1 << MOVE_AUX_SHIFT)
+
+#define MOVE_WAITCOMP_SHIFT     24
+#define MOVE_WAITCOMP_MASK      (1 << MOVE_WAITCOMP_SHIFT)
+#define MOVE_WAITCOMP           (1 << MOVE_WAITCOMP_SHIFT)
+
+#define MOVE_SRC_SHIFT          20
+#define MOVE_SRC_MASK           (0x0f << MOVE_SRC_SHIFT)
+#define MOVE_SRC_CLASS1CTX      (0x00 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_CLASS2CTX      (0x01 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_OUTFIFO        (0x02 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_DESCBUF        (0x03 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_MATH0          (0x04 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_MATH1          (0x05 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_MATH2          (0x06 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_MATH3          (0x07 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_INFIFO         (0x08 << MOVE_SRC_SHIFT)
+#define MOVE_SRC_INFIFO_CL      (0x09 << MOVE_SRC_SHIFT)
+
+#define MOVE_DEST_SHIFT         16
+#define MOVE_DEST_MASK          (0x0f << MOVE_DEST_SHIFT)
+#define MOVE_DEST_CLASS1CTX     (0x00 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_CLASS2CTX     (0x01 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_OUTFIFO       (0x02 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_DESCBUF       (0x03 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_MATH0         (0x04 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_MATH1         (0x05 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_MATH2         (0x06 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_MATH3         (0x07 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_CLASS1INFIFO  (0x08 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_CLASS2INFIFO  (0x09 << MOVE_DEST_SHIFT)
+#define MOVE_DEST_PK_A          (0x0c << MOVE_DEST_SHIFT)
+#define MOVE_DEST_CLASS1KEY     (0x0d << MOVE_DEST_SHIFT)
+#define MOVE_DEST_CLASS2KEY     (0x0e << MOVE_DEST_SHIFT)
+
+#define MOVE_OFFSET_SHIFT       8
+#define MOVE_OFFSET_MASK        (0xff << MOVE_OFFSET_SHIFT)
+
+#define MOVE_LEN_SHIFT          0
+#define MOVE_LEN_MASK           (0xff << MOVE_LEN_SHIFT)
+
+#define MOVELEN_MRSEL_SHIFT     0
+#define MOVELEN_MRSEL_MASK      (0x3 << MOVE_LEN_SHIFT)
+
+/*
+ * MATH Command Constructs
+ */
+
+#define MATH_IFB_SHIFT          26
+#define MATH_IFB_MASK           (1 << MATH_IFB_SHIFT)
+#define MATH_IFB                (1 << MATH_IFB_SHIFT)
+
+#define MATH_NFU_SHIFT          25
+#define MATH_NFU_MASK           (1 << MATH_NFU_SHIFT)
+#define MATH_NFU                (1 << MATH_NFU_SHIFT)
+
+#define MATH_STL_SHIFT          24
+#define MATH_STL_MASK           (1 << MATH_STL_SHIFT)
+#define MATH_STL                (1 << MATH_STL_SHIFT)
+
+/* Function selectors */
+#define MATH_FUN_SHIFT          20
+#define MATH_FUN_MASK           (0x0f << MATH_FUN_SHIFT)
+#define MATH_FUN_ADD            (0x00 << MATH_FUN_SHIFT)
+#define MATH_FUN_ADDC           (0x01 << MATH_FUN_SHIFT)
+#define MATH_FUN_SUB            (0x02 << MATH_FUN_SHIFT)
+#define MATH_FUN_SUBB           (0x03 << MATH_FUN_SHIFT)
+#define MATH_FUN_OR             (0x04 << MATH_FUN_SHIFT)
+#define MATH_FUN_AND            (0x05 << MATH_FUN_SHIFT)
+#define MATH_FUN_XOR            (0x06 << MATH_FUN_SHIFT)
+#define MATH_FUN_LSHIFT         (0x07 << MATH_FUN_SHIFT)
+#define MATH_FUN_RSHIFT         (0x08 << MATH_FUN_SHIFT)
+#define MATH_FUN_SHLD           (0x09 << MATH_FUN_SHIFT)
+#define MATH_FUN_ZBYT           (0x0a << MATH_FUN_SHIFT)
+
+/* Source 0 selectors */
+#define MATH_SRC0_SHIFT         16
+#define MATH_SRC0_MASK          (0x0f << MATH_SRC0_SHIFT)
+#define MATH_SRC0_REG0          (0x00 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_REG1          (0x01 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_REG2          (0x02 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_REG3          (0x03 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_IMM           (0x04 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_SEQINLEN      (0x08 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_SEQOUTLEN     (0x09 << MATH_SRC0_SHIFT)
+#define MATH_SRC0_VARSEQINLEN   (0x0a << MATH_SRC0_SHIFT)
+#define MATH_SRC0_VARSEQOUTLEN  (0x0b << MATH_SRC0_SHIFT)
+#define MATH_SRC0_ZERO          (0x0c << MATH_SRC0_SHIFT)
+
+/* Source 1 selectors */
+#define MATH_SRC1_SHIFT         12
+#define MATH_SRC1_MASK          (0x0f << MATH_SRC1_SHIFT)
+#define MATH_SRC1_REG0          (0x00 << MATH_SRC1_SHIFT)
+#define MATH_SRC1_REG1          (0x01 << MATH_SRC1_SHIFT)
+#define MATH_SRC1_REG2          (0x02 << MATH_SRC1_SHIFT)
+#define MATH_SRC1_REG3          (0x03 << MATH_SRC1_SHIFT)
+#define MATH_SRC1_IMM           (0x04 << MATH_SRC1_SHIFT)
+#define MATH_SRC1_INFIFO        (0x0a << MATH_SRC1_SHIFT)
+#define MATH_SRC1_OUTFIFO       (0x0b << MATH_SRC1_SHIFT)
+#define MATH_SRC1_ONE           (0x0c << MATH_SRC1_SHIFT)
+
+/* Destination selectors */
+#define MATH_DEST_SHIFT         8
+#define MATH_DEST_MASK          (0x0f << MATH_DEST_SHIFT)
+#define MATH_DEST_REG0          (0x00 << MATH_DEST_SHIFT)
+#define MATH_DEST_REG1          (0x01 << MATH_DEST_SHIFT)
+#define MATH_DEST_REG2          (0x02 << MATH_DEST_SHIFT)
+#define MATH_DEST_REG3          (0x03 << MATH_DEST_SHIFT)
+#define MATH_DEST_SEQINLEN      (0x08 << MATH_DEST_SHIFT)
+#define MATH_DEST_SEQOUTLEN     (0x09 << MATH_DEST_SHIFT)
+#define MATH_DEST_VARSEQINLEN   (0x0a << MATH_DEST_SHIFT)
+#define MATH_DEST_VARSEQOUTLEN  (0x0b << MATH_DEST_SHIFT)
+#define MATH_DEST_NONE          (0x0f << MATH_DEST_SHIFT)
+
+/* Length selectors */
+#define MATH_LEN_SHIFT          0
+#define MATH_LEN_MASK           (0x0f << MATH_LEN_SHIFT)
+#define MATH_LEN_1BYTE          0x01
+#define MATH_LEN_2BYTE          0x02
+#define MATH_LEN_4BYTE          0x04
+#define MATH_LEN_8BYTE          0x08
+
+/*
+ * JUMP Command Constructs
+ */
+
+#define JUMP_CLASS_SHIFT        25
+#define JUMP_CLASS_MASK		(3 << JUMP_CLASS_SHIFT)
+#define JUMP_CLASS_NONE		0
+#define JUMP_CLASS_CLASS1	(1 << JUMP_CLASS_SHIFT)
+#define JUMP_CLASS_CLASS2	(2 << JUMP_CLASS_SHIFT)
+#define JUMP_CLASS_BOTH		(3 << JUMP_CLASS_SHIFT)
+
+#define JUMP_JSL_SHIFT          24
+#define JUMP_JSL_MASK           (1 << JUMP_JSL_SHIFT)
+#define JUMP_JSL                (1 << JUMP_JSL_SHIFT)
+
+#define JUMP_TYPE_SHIFT         22
+#define JUMP_TYPE_MASK          (0x03 << JUMP_TYPE_SHIFT)
+#define JUMP_TYPE_LOCAL         (0x00 << JUMP_TYPE_SHIFT)
+#define JUMP_TYPE_NONLOCAL      (0x01 << JUMP_TYPE_SHIFT)
+#define JUMP_TYPE_HALT          (0x02 << JUMP_TYPE_SHIFT)
+#define JUMP_TYPE_HALT_USER     (0x03 << JUMP_TYPE_SHIFT)
+
+#define JUMP_TEST_SHIFT         16
+#define JUMP_TEST_MASK          (0x03 << JUMP_TEST_SHIFT)
+#define JUMP_TEST_ALL           (0x00 << JUMP_TEST_SHIFT)
+#define JUMP_TEST_INVALL        (0x01 << JUMP_TEST_SHIFT)
+#define JUMP_TEST_ANY           (0x02 << JUMP_TEST_SHIFT)
+#define JUMP_TEST_INVANY        (0x03 << JUMP_TEST_SHIFT)
+
+/* Condition codes. JSL bit is factored in */
+#define JUMP_COND_SHIFT         8
+#define JUMP_COND_MASK          (0x100ff << JUMP_COND_SHIFT)
+#define JUMP_COND_PK_0          (0x80 << JUMP_COND_SHIFT)
+#define JUMP_COND_PK_GCD_1      (0x40 << JUMP_COND_SHIFT)
+#define JUMP_COND_PK_PRIME      (0x20 << JUMP_COND_SHIFT)
+#define JUMP_COND_MATH_N        (0x08 << JUMP_COND_SHIFT)
+#define JUMP_COND_MATH_Z        (0x04 << JUMP_COND_SHIFT)
+#define JUMP_COND_MATH_C        (0x02 << JUMP_COND_SHIFT)
+#define JUMP_COND_MATH_NV       (0x01 << JUMP_COND_SHIFT)
+
+#define JUMP_COND_JRP           ((0x80 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_SHRD          ((0x40 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_SELF          ((0x20 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_CALM          ((0x10 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_NIP           ((0x08 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_NIFP          ((0x04 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_NOP           ((0x02 << JUMP_COND_SHIFT) | JUMP_JSL)
+#define JUMP_COND_NCP           ((0x01 << JUMP_COND_SHIFT) | JUMP_JSL)
+
+#define JUMP_OFFSET_SHIFT       0
+#define JUMP_OFFSET_MASK        (0xff << JUMP_OFFSET_SHIFT)
+
+/*
+ * NFIFO ENTRY
+ * Data Constructs
+ *
+ */
+#define NFIFOENTRY_DEST_SHIFT	30
+#define NFIFOENTRY_DEST_MASK	(3 << NFIFOENTRY_DEST_SHIFT)
+#define NFIFOENTRY_DEST_DECO	(0 << NFIFOENTRY_DEST_SHIFT)
+#define NFIFOENTRY_DEST_CLASS1	(1 << NFIFOENTRY_DEST_SHIFT)
+#define NFIFOENTRY_DEST_CLASS2	(2 << NFIFOENTRY_DEST_SHIFT)
+#define NFIFOENTRY_DEST_BOTH	(3 << NFIFOENTRY_DEST_SHIFT)
+
+#define NFIFOENTRY_LC2_SHIFT	29
+#define NFIFOENTRY_LC2_MASK		(1 << NFIFOENTRY_LC2_SHIFT)
+#define NFIFOENTRY_LC2			(1 << NFIFOENTRY_LC2_SHIFT)
+
+#define NFIFOENTRY_LC1_SHIFT	28
+#define NFIFOENTRY_LC1_MASK		(1 << NFIFOENTRY_LC1_SHIFT)
+#define NFIFOENTRY_LC1			(1 << NFIFOENTRY_LC1_SHIFT)
+
+#define NFIFOENTRY_FC2_SHIFT	27
+#define NFIFOENTRY_FC2_MASK		(1 << NFIFOENTRY_FC2_SHIFT)
+#define NFIFOENTRY_FC2			(1 << NFIFOENTRY_FC2_SHIFT)
+
+#define NFIFOENTRY_FC1_SHIFT	26
+#define NFIFOENTRY_FC1_MASK		(1 << NFIFOENTRY_FC1_SHIFT)
+#define NFIFOENTRY_FC1			(1 << NFIFOENTRY_FC1_SHIFT)
+
+#define NFIFOENTRY_STYPE_SHIFT	24
+#define NFIFOENTRY_STYPE_MASK	(3 << NFIFOENTRY_STYPE_SHIFT)
+#define NFIFOENTRY_STYPE_DFIFO	(0 << NFIFOENTRY_STYPE_SHIFT)
+#define NFIFOENTRY_STYPE_OFIFO	(1 << NFIFOENTRY_STYPE_SHIFT)
+#define NFIFOENTRY_STYPE_PAD	(2 << NFIFOENTRY_STYPE_SHIFT)
+#define NFIFOENTRY_STYPE_SNOOP	(3 << NFIFOENTRY_STYPE_SHIFT)
+
+#define NFIFOENTRY_DTYPE_SHIFT	20
+#define NFIFOENTRY_DTYPE_MASK	(0xF << NFIFOENTRY_DTYPE_SHIFT)
+
+#define NFIFOENTRY_DTYPE_SBOX      (0x0  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_AAD       (0x1  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_IV        (0x2  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_SAD       (0x3  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_ICV       (0xA  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_SKIP      (0xE  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_MSG       (0xF  << NFIFOENTRY_DTYPE_SHIFT)
+
+#define NFIFOENTRY_DTYPE_PK_A0     (0x0  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_A1     (0x1  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_A2     (0x2  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_A3     (0x3  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_B0     (0x4  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_B1     (0x5  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_B2     (0x6  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_B3     (0x7  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_N      (0x8  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_E      (0x9  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_A      (0xC  << NFIFOENTRY_DTYPE_SHIFT)
+#define NFIFOENTRY_DTYPE_PK_B      (0xD  << NFIFOENTRY_DTYPE_SHIFT)
+
+
+#define NFIFOENTRY_BND_SHIFT	19
+#define NFIFOENTRY_BND_MASK		(1 << NFIFOENTRY_BND_SHIFT)
+#define NFIFOENTRY_BND			(1 << NFIFOENTRY_BND_SHIFT)
+
+#define NFIFOENTRY_PTYPE_SHIFT	16
+#define NFIFOENTRY_PTYPE_MASK	(0x7 << NFIFOENTRY_PTYPE_SHIFT)
+
+#define NFIFOENTRY_PTYPE_ZEROS         (0x0  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_RND_NOZEROS   (0x1  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_INCREMENT     (0x2  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_RND           (0x3  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_ZEROS_NZ      (0x4  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_RND_NZ_LZ     (0x5  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_N             (0x6  << NFIFOENTRY_PTYPE_SHIFT)
+#define NFIFOENTRY_PTYPE_RND_NZ_N      (0x7  << NFIFOENTRY_PTYPE_SHIFT)
+
+#define NFIFOENTRY_OC_SHIFT		15
+#define NFIFOENTRY_OC_MASK		(1 << NFIFOENTRY_OC_SHIFT)
+#define NFIFOENTRY_OC			(1 << NFIFOENTRY_OC_SHIFT)
+
+#define NFIFOENTRY_AST_SHIFT	14
+#define NFIFOENTRY_AST_MASK		(1 << NFIFOENTRY_OC_SHIFT)
+#define NFIFOENTRY_AST			(1 << NFIFOENTRY_OC_SHIFT)
+
+#define NFIFOENTRY_BM_SHIFT		11
+#define NFIFOENTRY_BM_MASK		(1 << NFIFOENTRY_BM_SHIFT)
+#define NFIFOENTRY_BM			(1 << NFIFOENTRY_BM_SHIFT)
+
+#define NFIFOENTRY_PS_SHIFT		10
+#define NFIFOENTRY_PS_MASK		(1 << NFIFOENTRY_PS_SHIFT)
+#define NFIFOENTRY_PS			(1 << NFIFOENTRY_PS_SHIFT)
+
+
+#define NFIFOENTRY_DLEN_SHIFT	0
+#define NFIFOENTRY_DLEN_MASK	(0xFFF << NFIFOENTRY_DLEN_SHIFT)
+
+#define NFIFOENTRY_PLEN_SHIFT	0
+#define NFIFOENTRY_PLEN_MASK	(0xFF << NFIFOENTRY_PLEN_SHIFT)
+
+/*
+ * PDB internal definitions
+ */
+
+/* IPSec ESP CBC Encap/Decap Options */
+#define PDBOPTS_ESPCBC_ARSNONE  0x00   /* no antireplay window              */
+#define PDBOPTS_ESPCBC_ARS32    0x40   /* 32-entry antireplay window        */
+#define PDBOPTS_ESPCBC_ARS64    0xc0   /* 64-entry antireplay window        */
+#define PDBOPTS_ESPCBC_IVSRC    0x20   /* IV comes from internal random gen */
+#define PDBOPTS_ESPCBC_ESN      0x10   /* extended sequence included        */
+#define PDBOPTS_ESPCBC_OUTFMT   0x08   /* output only decapsulation (decap) */
+#define PDBOPTS_ESPCBC_IPHDRSRC 0x08   /* IP header comes from PDB (encap)  */
+#define PDBOPTS_ESPCBC_INCIPHDR 0x04   /* Prepend IP header to output frame */
+#define PDBOPTS_ESPCBC_IPVSN    0x02   /* process IPv6 header               */
+#define PDBOPTS_ESPCBC_TUNNEL   0x01   /* tunnel mode next-header byte      */
+
+#endif /* DESC_H */
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
new file mode 100644
index 000000000000..c224f39e94a7
--- /dev/null
+++ b/drivers/crypto/caam/desc_constr.h
@@ -0,0 +1,204 @@
+/*
+ * caam descriptor construction helper functions
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#include "desc.h"
+
+#define IMMEDIATE (1 << 23)
+#define CAAM_CMD_SZ sizeof(u32)
+#define CAAM_PTR_SZ sizeof(dma_addr_t)
+
+#ifdef DEBUG
+#define PRINT_POS do { printk(KERN_DEBUG "%02d: %s\n", desc_len(desc),\
+			      &__func__[sizeof("append")]); } while (0)
+#else
+#define PRINT_POS
+#endif
+
+#define DISABLE_AUTO_INFO_FIFO (IMMEDIATE | LDST_CLASS_DECO | \
+				LDST_SRCDST_WORD_DECOCTRL | \
+				(LDOFF_DISABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT))
+#define ENABLE_AUTO_INFO_FIFO (IMMEDIATE | LDST_CLASS_DECO | \
+			       LDST_SRCDST_WORD_DECOCTRL | \
+			       (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT))
+
+static inline int desc_len(u32 *desc)
+{
+	return *desc & HDR_DESCLEN_MASK;
+}
+
+static inline int desc_bytes(void *desc)
+{
+	return desc_len(desc) * CAAM_CMD_SZ;
+}
+
+static inline u32 *desc_end(u32 *desc)
+{
+	return desc + desc_len(desc);
+}
+
+static inline void *sh_desc_pdb(u32 *desc)
+{
+	return desc + 1;
+}
+
+static inline void init_desc(u32 *desc, u32 options)
+{
+	*desc = options | HDR_ONE | 1;
+}
+
+static inline void init_sh_desc(u32 *desc, u32 options)
+{
+	PRINT_POS;
+	init_desc(desc, CMD_SHARED_DESC_HDR | options);
+}
+
+static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+{
+	u32 pdb_len = pdb_bytes / CAAM_CMD_SZ + 1;
+
+	init_sh_desc(desc, ((pdb_len << HDR_START_IDX_SHIFT) + pdb_len) |
+		     options);
+}
+
+static inline void init_job_desc(u32 *desc, u32 options)
+{
+	init_desc(desc, CMD_DESC_HDR | options);
+}
+
+static inline void append_ptr(u32 *desc, dma_addr_t ptr)
+{
+	dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
+
+	*offset = ptr;
+
+	(*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ;
+}
+
+static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
+					u32 options)
+{
+	PRINT_POS;
+	init_job_desc(desc, HDR_SHARED | options |
+		      (len << HDR_START_IDX_SHIFT));
+	append_ptr(desc, ptr);
+}
+
+static inline void append_data(u32 *desc, void *data, int len)
+{
+	u32 *offset = desc_end(desc);
+
+	if (len) /* avoid sparse warning: memcpy with byte count of 0 */
+		memcpy(offset, data, len);
+
+	(*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
+}
+
+static inline void append_cmd(u32 *desc, u32 command)
+{
+	u32 *cmd = desc_end(desc);
+
+	*cmd = command;
+
+	(*desc)++;
+}
+
+static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
+				  u32 command)
+{
+	append_cmd(desc, command | len);
+	append_ptr(desc, ptr);
+}
+
+static inline void append_cmd_data(u32 *desc, void *data, int len,
+				   u32 command)
+{
+	append_cmd(desc, command | IMMEDIATE | len);
+	append_data(desc, data, len);
+}
+
+static inline u32 *append_jump(u32 *desc, u32 options)
+{
+	u32 *cmd = desc_end(desc);
+
+	PRINT_POS;
+	append_cmd(desc, CMD_JUMP | options);
+
+	return cmd;
+}
+
+static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
+{
+	*jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc));
+}
+
+#define APPEND_CMD(cmd, op) \
+static inline void append_##cmd(u32 *desc, u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd(desc, CMD_##op | options); \
+}
+APPEND_CMD(operation, OPERATION)
+APPEND_CMD(move, MOVE)
+
+#define APPEND_CMD_LEN(cmd, op) \
+static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd(desc, CMD_##op | len | options); \
+}
+APPEND_CMD_LEN(seq_store, SEQ_STORE)
+APPEND_CMD_LEN(seq_fifo_load, SEQ_FIFO_LOAD)
+APPEND_CMD_LEN(seq_fifo_store, SEQ_FIFO_STORE)
+
+#define APPEND_CMD_PTR(cmd, op) \
+static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
+				u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd_ptr(desc, ptr, len, CMD_##op | options); \
+}
+APPEND_CMD_PTR(key, KEY)
+APPEND_CMD_PTR(seq_in_ptr, SEQ_IN_PTR)
+APPEND_CMD_PTR(seq_out_ptr, SEQ_OUT_PTR)
+APPEND_CMD_PTR(load, LOAD)
+APPEND_CMD_PTR(store, STORE)
+APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
+APPEND_CMD_PTR(fifo_store, FIFO_STORE)
+
+#define APPEND_CMD_PTR_TO_IMM(cmd, op) \
+static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+					 unsigned int len, u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd_data(desc, data, len, CMD_##op | options); \
+}
+APPEND_CMD_PTR_TO_IMM(load, LOAD);
+APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD);
+
+/*
+ * 2nd variant for commands whose specified immediate length differs
+ * from length of immediate data provided, e.g., split keys
+ */
+#define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
+static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+					 unsigned int data_len, \
+					 unsigned int len, u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd(desc, CMD_##op | IMMEDIATE | len | options); \
+	append_data(desc, data, data_len); \
+}
+APPEND_CMD_PTR_TO_IMM2(key, KEY);
+
+#define APPEND_CMD_RAW_IMM(cmd, op, type) \
+static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
+					     u32 options) \
+{ \
+	PRINT_POS; \
+	append_cmd(desc, CMD_##op | IMMEDIATE | options | sizeof(type)); \
+	append_cmd(desc, immediate); \
+}
+APPEND_CMD_RAW_IMM(load, LOAD, u32);
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
new file mode 100644
index 000000000000..bd57a6825f57
--- /dev/null
+++ b/drivers/crypto/caam/error.c
@@ -0,0 +1,248 @@
+/*
+ * CAAM Error Reporting
+ *
+ * Copyright 2009-2011 Freescale Semiconductor, Inc.
+ */
+
+#include "compat.h"
+#include "regs.h"
+#include "intern.h"
+#include "desc.h"
+#include "jr.h"
+#include "error.h"
+
+#define SPRINTFCAT(str, format, param, max_alloc)		\
+{								\
+	char *tmp;						\
+								\
+	tmp = kmalloc(sizeof(format) + max_alloc, GFP_ATOMIC);	\
+	sprintf(tmp, format, param);				\
+	strcat(str, tmp);					\
+	kfree(tmp);						\
+}
+
+static void report_jump_idx(u32 status, char *outstr)
+{
+	u8 idx = (status & JRSTA_DECOERR_INDEX_MASK) >>
+		  JRSTA_DECOERR_INDEX_SHIFT;
+
+	if (status & JRSTA_DECOERR_JUMP)
+		strcat(outstr, "jump tgt desc idx ");
+	else
+		strcat(outstr, "desc idx ");
+
+	SPRINTFCAT(outstr, "%d: ", idx, sizeof("255"));
+}
+
+static void report_ccb_status(u32 status, char *outstr)
+{
+	char *cha_id_list[] = {
+		"",
+		"AES",
+		"DES, 3DES",
+		"ARC4",
+		"MD5, SHA-1, SH-224, SHA-256, SHA-384, SHA-512",
+		"RNG",
+		"SNOW f8",
+		"Kasumi f8, f9",
+		"All Public Key Algorithms",
+		"CRC",
+		"SNOW f9",
+	};
+	char *err_id_list[] = {
+		"None. No error.",
+		"Mode error.",
+		"Data size error.",
+		"Key size error.",
+		"PKHA A memory size error.",
+		"PKHA B memory size error.",
+		"Data arrived out of sequence error.",
+		"PKHA divide-by-zero error.",
+		"PKHA modulus even error.",
+		"DES key parity error.",
+		"ICV check failed.",
+		"Hardware error.",
+		"Unsupported CCM AAD size.",
+		"Class 1 CHA is not reset",
+		"Invalid CHA combination was selected",
+		"Invalid CHA selected.",
+	};
+	u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >>
+		    JRSTA_CCBERR_CHAID_SHIFT;
+	u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
+	report_jump_idx(status, outstr);
+
+	if (cha_id < sizeof(cha_id_list)) {
+		SPRINTFCAT(outstr, "%s: ", cha_id_list[cha_id],
+			   strlen(cha_id_list[cha_id]));
+	} else {
+		SPRINTFCAT(outstr, "unidentified cha_id value 0x%02x: ",
+			   cha_id, sizeof("ff"));
+	}
+
+	if (err_id < sizeof(err_id_list)) {
+		SPRINTFCAT(outstr, "%s", err_id_list[err_id],
+			   strlen(err_id_list[err_id]));
+	} else {
+		SPRINTFCAT(outstr, "unidentified err_id value 0x%02x",
+			   err_id, sizeof("ff"));
+	}
+}
+
+static void report_jump_status(u32 status, char *outstr)
+{
+	SPRINTFCAT(outstr, "%s() not implemented", __func__, sizeof(__func__));
+}
+
+static void report_deco_status(u32 status, char *outstr)
+{
+	const struct {
+		u8 value;
+		char *error_text;
+	} desc_error_list[] = {
+		{ 0x00, "None. No error." },
+		{ 0x01, "SGT Length Error. The descriptor is trying to read "
+			"more data than is contained in the SGT table." },
+		{ 0x02, "Reserved." },
+		{ 0x03, "Job Ring Control Error. There is a bad value in the "
+			"Job Ring Control register." },
+		{ 0x04, "Invalid Descriptor Command. The Descriptor Command "
+			"field is invalid." },
+		{ 0x05, "Reserved." },
+		{ 0x06, "Invalid KEY Command" },
+		{ 0x07, "Invalid LOAD Command" },
+		{ 0x08, "Invalid STORE Command" },
+		{ 0x09, "Invalid OPERATION Command" },
+		{ 0x0A, "Invalid FIFO LOAD Command" },
+		{ 0x0B, "Invalid FIFO STORE Command" },
+		{ 0x0C, "Invalid MOVE Command" },
+		{ 0x0D, "Invalid JUMP Command. A nonlocal JUMP Command is "
+			"invalid because the target is not a Job Header "
+			"Command, or the jump is from a Trusted Descriptor to "
+			"a Job Descriptor, or because the target Descriptor "
+			"contains a Shared Descriptor." },
+		{ 0x0E, "Invalid MATH Command" },
+		{ 0x0F, "Invalid SIGNATURE Command" },
+		{ 0x10, "Invalid Sequence Command. A SEQ IN PTR OR SEQ OUT PTR "
+			"Command is invalid or a SEQ KEY, SEQ LOAD, SEQ FIFO "
+			"LOAD, or SEQ FIFO STORE decremented the input or "
+			"output sequence length below 0. This error may result "
+			"if a built-in PROTOCOL Command has encountered a "
+			"malformed PDU." },
+		{ 0x11, "Skip data type invalid. The type must be 0xE or 0xF."},
+		{ 0x12, "Shared Descriptor Header Error" },
+		{ 0x13, "Header Error. Invalid length or parity, or certain "
+			"other problems." },
+		{ 0x14, "Burster Error. Burster has gotten to an illegal "
+			"state" },
+		{ 0x15, "Context Register Length Error. The descriptor is "
+			"trying to read or write past the end of the Context "
+			"Register. A SEQ LOAD or SEQ STORE with the VLF bit "
+			"set was executed with too large a length in the "
+			"variable length register (VSOL for SEQ STORE or VSIL "
+			"for SEQ LOAD)." },
+		{ 0x16, "DMA Error" },
+		{ 0x17, "Reserved." },
+		{ 0x1A, "Job failed due to JR reset" },
+		{ 0x1B, "Job failed due to Fail Mode" },
+		{ 0x1C, "DECO Watchdog timer timeout error" },
+		{ 0x1D, "DECO tried to copy a key from another DECO but the "
+			"other DECO's Key Registers were locked" },
+		{ 0x1E, "DECO attempted to copy data from a DECO that had an "
+			"unmasked Descriptor error" },
+		{ 0x1F, "LIODN error. DECO was trying to share from itself or "
+			"from another DECO but the two Non-SEQ LIODN values "
+			"didn't match or the 'shared from' DECO's Descriptor "
+			"required that the SEQ LIODNs be the same and they "
+			"aren't." },
+		{ 0x20, "DECO has completed a reset initiated via the DRR "
+			"register" },
+		{ 0x21, "Nonce error. When using EKT (CCM) key encryption "
+			"option in the FIFO STORE Command, the Nonce counter "
+			"reached its maximum value and this encryption mode "
+			"can no longer be used." },
+		{ 0x22, "Meta data is too large (> 511 bytes) for TLS decap "
+			"(input frame; block ciphers) and IPsec decap (output "
+			"frame, when doing the next header byte update) and "
+			"DCRC (output frame)." },
+		{ 0x80, "DNR (do not run) error" },
+		{ 0x81, "undefined protocol command" },
+		{ 0x82, "invalid setting in PDB" },
+		{ 0x83, "Anti-replay LATE error" },
+		{ 0x84, "Anti-replay REPLAY error" },
+		{ 0x85, "Sequence number overflow" },
+		{ 0x86, "Sigver invalid signature" },
+		{ 0x87, "DSA Sign Illegal test descriptor" },
+		{ 0x88, "Protocol Format Error - A protocol has seen an error "
+			"in the format of data received. When running RSA, "
+			"this means that formatting with random padding was "
+			"used, and did not follow the form: 0x00, 0x02, 8-to-N "
+			"bytes of non-zero pad, 0x00, F data." },
+		{ 0x89, "Protocol Size Error - A protocol has seen an error in "
+			"size. When running RSA, pdb size N < (size of F) when "
+			"no formatting is used; or pdb size N < (F + 11) when "
+			"formatting is used." },
+		{ 0xC1, "Blob Command error: Undefined mode" },
+		{ 0xC2, "Blob Command error: Secure Memory Blob mode error" },
+		{ 0xC4, "Blob Command error: Black Blob key or input size "
+			"error" },
+		{ 0xC5, "Blob Command error: Invalid key destination" },
+		{ 0xC8, "Blob Command error: Trusted/Secure mode error" },
+		{ 0xF0, "IPsec TTL or hop limit field either came in as 0, "
+			"or was decremented to 0" },
+		{ 0xF1, "3GPP HFN matches or exceeds the Threshold" },
+	};
+	u8 desc_error = status & JRSTA_DECOERR_ERROR_MASK;
+	int i;
+
+	report_jump_idx(status, outstr);
+
+	for (i = 0; i < sizeof(desc_error_list); i++)
+		if (desc_error_list[i].value == desc_error)
+			break;
+
+	if (i != sizeof(desc_error_list) && desc_error_list[i].error_text) {
+		SPRINTFCAT(outstr, "%s", desc_error_list[i].error_text,
+			   strlen(desc_error_list[i].error_text));
+	} else {
+		SPRINTFCAT(outstr, "unidentified error value 0x%02x",
+			   desc_error, sizeof("ff"));
+	}
+}
+
+static void report_jr_status(u32 status, char *outstr)
+{
+	SPRINTFCAT(outstr, "%s() not implemented", __func__, sizeof(__func__));
+}
+
+static void report_cond_code_status(u32 status, char *outstr)
+{
+	SPRINTFCAT(outstr, "%s() not implemented", __func__, sizeof(__func__));
+}
+
+char *caam_jr_strstatus(char *outstr, u32 status)
+{
+	struct stat_src {
+		void (*report_ssed)(u32 status, char *outstr);
+		char *error;
+	} status_src[] = {
+		{ NULL, "No error" },
+		{ NULL, NULL },
+		{ report_ccb_status, "CCB" },
+		{ report_jump_status, "Jump" },
+		{ report_deco_status, "DECO" },
+		{ NULL, NULL },
+		{ report_jr_status, "Job Ring" },
+		{ report_cond_code_status, "Condition Code" },
+	};
+	u32 ssrc = status >> JRSTA_SSRC_SHIFT;
+
+	sprintf(outstr, "%s: ", status_src[ssrc].error);
+
+	if (status_src[ssrc].report_ssed)
+		status_src[ssrc].report_ssed(status, outstr);
+
+	return outstr;
+}
+EXPORT_SYMBOL(caam_jr_strstatus);
diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h
new file mode 100644
index 000000000000..067afc120132
--- /dev/null
+++ b/drivers/crypto/caam/error.h
@@ -0,0 +1,10 @@
+/*
+ * CAAM Error Reporting code header
+ *
+ * Copyright 2009-2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef CAAM_ERROR_H
+#define CAAM_ERROR_H
+extern char *caam_jr_strstatus(char *outstr, u32 status);
+#endif /* CAAM_ERROR_H */
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
new file mode 100644
index 000000000000..a34be01b0b29
--- /dev/null
+++ b/drivers/crypto/caam/intern.h
@@ -0,0 +1,113 @@
+/*
+ * CAAM/SEC 4.x driver backend
+ * Private/internal definitions between modules
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef INTERN_H
+#define INTERN_H
+
+#define JOBR_UNASSIGNED 0
+#define JOBR_ASSIGNED 1
+
+/* Currently comes from Kconfig param as a ^2 (driver-required) */
+#define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE)
+
+/* Kconfig params for interrupt coalescing if selected (else zero) */
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_INTC
+#define JOBR_INTC JRCFG_ICEN
+#define JOBR_INTC_TIME_THLD CONFIG_CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
+#define JOBR_INTC_COUNT_THLD CONFIG_CRYPTO_DEV_FSL_CAAM_INTC_COUNT_THLD
+#else
+#define JOBR_INTC 0
+#define JOBR_INTC_TIME_THLD 0
+#define JOBR_INTC_COUNT_THLD 0
+#endif
+
+/*
+ * Storage for tracking each in-process entry moving across a ring
+ * Each entry on an output ring needs one of these
+ */
+struct caam_jrentry_info {
+	void (*callbk)(struct device *dev, u32 *desc, u32 status, void *arg);
+	void *cbkarg;	/* Argument per ring entry */
+	u32 *desc_addr_virt;	/* Stored virt addr for postprocessing */
+	dma_addr_t desc_addr_dma;	/* Stored bus addr for done matching */
+	u32 desc_size;	/* Stored size for postprocessing, header derived */
+};
+
+/* Private sub-storage for a single JobR */
+struct caam_drv_private_jr {
+	struct device *parentdev;	/* points back to controller dev */
+	int ridx;
+	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
+	struct tasklet_struct irqtask[NR_CPUS];
+	int irq;			/* One per queue */
+	int assign;			/* busy/free */
+
+	/* Job ring info */
+	int ringsize;	/* Size of rings (assume input = output) */
+	struct caam_jrentry_info *entinfo;	/* Alloc'ed 1 per ring entry */
+	spinlock_t inplock ____cacheline_aligned; /* Input ring index lock */
+	int inp_ring_write_index;	/* Input index "tail" */
+	int head;			/* entinfo (s/w ring) head index */
+	dma_addr_t *inpring;	/* Base of input ring, alloc DMA-safe */
+	spinlock_t outlock ____cacheline_aligned; /* Output ring index lock */
+	int out_ring_read_index;	/* Output index "tail" */
+	int tail;			/* entinfo (s/w ring) tail index */
+	struct jr_outentry *outring;	/* Base of output ring, DMA-safe */
+};
+
+/*
+ * Driver-private storage for a single CAAM block instance
+ */
+struct caam_drv_private {
+
+	struct device *dev;
+	struct device **jrdev; /* Alloc'ed array per sub-device */
+	spinlock_t jr_alloc_lock;
+	struct platform_device *pdev;
+
+	/* Physical-presence section */
+	struct caam_ctrl *ctrl; /* controller region */
+	struct caam_deco **deco; /* DECO/CCB views */
+	struct caam_assurance *ac;
+	struct caam_queue_if *qi; /* QI control region */
+
+	/*
+	 * Detected geometry block. Filled in from device tree if powerpc,
+	 * or from register-based version detection code
+	 */
+	u8 total_jobrs;		/* Total Job Rings in device */
+	u8 qi_present;		/* Nonzero if QI present in device */
+	int secvio_irq;		/* Security violation interrupt number */
+
+	/* which jr allocated to scatterlist crypto */
+	atomic_t tfm_count ____cacheline_aligned;
+	int num_jrs_for_algapi;
+	struct device **algapi_jr;
+	/* list of registered crypto algorithms (mk generic context handle?) */
+	struct list_head alg_list;
+
+	/*
+	 * debugfs entries for developer view into driver/device
+	 * variables at runtime.
+	 */
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dfs_root;
+	struct dentry *ctl; /* controller dir */
+	struct dentry *ctl_rq_dequeued, *ctl_ob_enc_req, *ctl_ib_dec_req;
+	struct dentry *ctl_ob_enc_bytes, *ctl_ob_prot_bytes;
+	struct dentry *ctl_ib_dec_bytes, *ctl_ib_valid_bytes;
+	struct dentry *ctl_faultaddr, *ctl_faultdetail, *ctl_faultstatus;
+
+	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
+	struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk;
+#endif
+};
+
+void caam_jr_algapi_init(struct device *dev);
+void caam_jr_algapi_remove(struct device *dev);
+#endif /* INTERN_H */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
new file mode 100644
index 000000000000..68cb9af4d1a3
--- /dev/null
+++ b/drivers/crypto/caam/jr.c
@@ -0,0 +1,523 @@
+/*
+ * CAAM/SEC 4.x transport/backend driver
+ * JobR backend functionality
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#include "compat.h"
+#include "regs.h"
+#include "jr.h"
+#include "desc.h"
+#include "intern.h"
+
+/* Main per-ring interrupt handler */
+static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
+{
+	struct device *dev = st_dev;
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	u32 irqstate;
+
+	/*
+	 * Check the output ring for ready responses, kick
+	 * tasklet if jobs done.
+	 */
+	irqstate = rd_reg32(&jrp->rregs->jrintstatus);
+	if (!irqstate)
+		return IRQ_NONE;
+
+	/*
+	 * If JobR error, we got more development work to do
+	 * Flag a bug now, but we really need to shut down and
+	 * restart the queue (and fix code).
+	 */
+	if (irqstate & JRINT_JR_ERROR) {
+		dev_err(dev, "job ring error: irqstate: %08x\n", irqstate);
+		BUG();
+	}
+
+	/* mask valid interrupts */
+	setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+
+	/* Have valid interrupt at this point, just ACK and trigger */
+	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
+
+	preempt_disable();
+	tasklet_schedule(&jrp->irqtask[smp_processor_id()]);
+	preempt_enable();
+
+	return IRQ_HANDLED;
+}
+
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void caam_jr_dequeue(unsigned long devarg)
+{
+	int hw_idx, sw_idx, i, head, tail;
+	struct device *dev = (struct device *)devarg;
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
+	u32 *userdesc, userstatus;
+	void *userarg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&jrp->outlock, flags);
+
+	head = ACCESS_ONCE(jrp->head);
+	sw_idx = tail = jrp->tail;
+
+	while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 &&
+	       rd_reg32(&jrp->rregs->outring_used)) {
+
+		hw_idx = jrp->out_ring_read_index;
+		for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) {
+			sw_idx = (tail + i) & (JOBR_DEPTH - 1);
+
+			smp_read_barrier_depends();
+
+			if (jrp->outring[hw_idx].desc ==
+			    jrp->entinfo[sw_idx].desc_addr_dma)
+				break; /* found */
+		}
+		/* we should never fail to find a matching descriptor */
+		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);
+
+		/* Unmap just-run descriptor so we can post-process */
+		dma_unmap_single(dev, jrp->outring[hw_idx].desc,
+				 jrp->entinfo[sw_idx].desc_size,
+				 DMA_TO_DEVICE);
+
+		/* mark completed, avoid matching on a recycled desc addr */
+		jrp->entinfo[sw_idx].desc_addr_dma = 0;
+
+		/* Stash callback params for use outside of lock */
+		usercall = jrp->entinfo[sw_idx].callbk;
+		userarg = jrp->entinfo[sw_idx].cbkarg;
+		userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
+		userstatus = jrp->outring[hw_idx].jrstatus;
+
+		smp_mb();
+
+		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
+					   (JOBR_DEPTH - 1);
+
+		/*
+		 * if this job completed out-of-order, do not increment
+		 * the tail.  Otherwise, increment tail by 1 plus the
+		 * number of subsequent jobs already completed out-of-order
+		 */
+		if (sw_idx == tail) {
+			do {
+				tail = (tail + 1) & (JOBR_DEPTH - 1);
+				smp_read_barrier_depends();
+			} while (CIRC_CNT(head, tail, JOBR_DEPTH) >= 1 &&
+				 jrp->entinfo[tail].desc_addr_dma == 0);
+
+			jrp->tail = tail;
+		}
+
+		/* set done */
+		wr_reg32(&jrp->rregs->outring_rmvd, 1);
+
+		spin_unlock_irqrestore(&jrp->outlock, flags);
+
+		/* Finally, execute user's callback */
+		usercall(dev, userdesc, userstatus, userarg);
+
+		spin_lock_irqsave(&jrp->outlock, flags);
+
+		head = ACCESS_ONCE(jrp->head);
+		sw_idx = tail = jrp->tail;
+	}
+
+	spin_unlock_irqrestore(&jrp->outlock, flags);
+
+	/* reenable / unmask IRQs */
+	clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK);
+}
+
+/**
+ * caam_jr_register() - Alloc a ring for someone to use as needed. Returns
+ * an ordinal of the rings allocated, else returns -ENODEV if no rings
+ * are available.
+ * @ctrldev: points to the controller level dev (parent) that
+ *           owns rings available for use.
+ * @dev:     points to where a pointer to the newly allocated queue's
+ *           dev can be written to if successful.
+ **/
+int caam_jr_register(struct device *ctrldev, struct device **rdev)
+{
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
+	struct caam_drv_private_jr *jrpriv = NULL;
+	unsigned long flags;
+	int ring;
+
+	/* Lock, if free ring - assign, unlock */
+	spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags);
+	for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) {
+		jrpriv = dev_get_drvdata(ctrlpriv->jrdev[ring]);
+		if (jrpriv->assign == JOBR_UNASSIGNED) {
+			jrpriv->assign = JOBR_ASSIGNED;
+			*rdev = ctrlpriv->jrdev[ring];
+			spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
+			return ring;
+		}
+	}
+
+	/* If assigned, write dev where caller needs it */
+	spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
+	*rdev = NULL;
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(caam_jr_register);
+
+/**
+ * caam_jr_deregister() - Deregister an API and release the queue.
+ * Returns 0 if OK, -EBUSY if queue still contains pending entries
+ * or unprocessed results at the time of the call
+ * @dev     - points to the dev that identifies the queue to
+ *            be released.
+ **/
+int caam_jr_deregister(struct device *rdev)
+{
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(rdev);
+	struct caam_drv_private *ctrlpriv;
+	unsigned long flags;
+
+	/* Get the owning controller's private space */
+	ctrlpriv = dev_get_drvdata(jrpriv->parentdev);
+
+	/*
+	 * Make sure ring empty before release
+	 */
+	if (rd_reg32(&jrpriv->rregs->outring_used) ||
+	    (rd_reg32(&jrpriv->rregs->inpring_avail) != JOBR_DEPTH))
+		return -EBUSY;
+
+	/* Release ring */
+	spin_lock_irqsave(&ctrlpriv->jr_alloc_lock, flags);
+	jrpriv->assign = JOBR_UNASSIGNED;
+	spin_unlock_irqrestore(&ctrlpriv->jr_alloc_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(caam_jr_deregister);
+
+/**
+ * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK,
+ * -EBUSY if the queue is full, -EIO if it cannot map the caller's
+ * descriptor.
+ * @dev:  device of the job ring to be used. This device should have
+ *        been assigned prior by caam_jr_register().
+ * @desc: points to a job descriptor that execute our request. All
+ *        descriptors (and all referenced data) must be in a DMAable
+ *        region, and all data references must be physical addresses
+ *        accessible to CAAM (i.e. within a PAMU window granted
+ *        to it).
+ * @cbk:  pointer to a callback function to be invoked upon completion
+ *        of this request. This has the form:
+ *        callback(struct device *dev, u32 *desc, u32 stat, void *arg)
+ *        where:
+ *        @dev:    contains the job ring device that processed this
+ *                 response.
+ *        @desc:   descriptor that initiated the request, same as
+ *                 "desc" being argued to caam_jr_enqueue().
+ *        @status: untranslated status received from CAAM. See the
+ *                 reference manual for a detailed description of
+ *                 error meaning, or see the JRSTA definitions in the
+ *                 register header file
+ *        @areq:   optional pointer to an argument passed with the
+ *                 original request
+ * @areq: optional pointer to a user argument for use at callback
+ *        time.
+ **/
+int caam_jr_enqueue(struct device *dev, u32 *desc,
+		    void (*cbk)(struct device *dev, u32 *desc,
+				u32 status, void *areq),
+		    void *areq)
+{
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	struct caam_jrentry_info *head_entry;
+	unsigned long flags;
+	int head, tail, desc_size;
+	dma_addr_t desc_dma;
+
+	desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32);
+	desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, desc_dma)) {
+		dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n");
+		return -EIO;
+	}
+
+	spin_lock_irqsave(&jrp->inplock, flags);
+
+	head = jrp->head;
+	tail = ACCESS_ONCE(jrp->tail);
+
+	if (!rd_reg32(&jrp->rregs->inpring_avail) ||
+	    CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
+		spin_unlock_irqrestore(&jrp->inplock, flags);
+		dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE);
+		return -EBUSY;
+	}
+
+	head_entry = &jrp->entinfo[head];
+	head_entry->desc_addr_virt = desc;
+	head_entry->desc_size = desc_size;
+	head_entry->callbk = (void *)cbk;
+	head_entry->cbkarg = areq;
+	head_entry->desc_addr_dma = desc_dma;
+
+	jrp->inpring[jrp->inp_ring_write_index] = desc_dma;
+
+	smp_wmb();
+
+	jrp->inp_ring_write_index = (jrp->inp_ring_write_index + 1) &
+				    (JOBR_DEPTH - 1);
+	jrp->head = (head + 1) & (JOBR_DEPTH - 1);
+
+	wmb();
+
+	wr_reg32(&jrp->rregs->inpring_jobadd, 1);
+
+	spin_unlock_irqrestore(&jrp->inplock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(caam_jr_enqueue);
+
+static int caam_reset_hw_jr(struct device *dev)
+{
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	unsigned int timeout = 100000;
+
+	/*
+	 * FIXME: disabling IRQs here inhibits proper job completion
+	 * and error propagation
+	 */
+	disable_irq(jrp->irq);
+
+	/* initiate flush (required prior to reset) */
+	wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET);
+	while (((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) ==
+		JRINT_ERR_HALT_INPROGRESS) && --timeout)
+		cpu_relax();
+
+	if ((rd_reg32(&jrp->rregs->jrintstatus) & JRINT_ERR_HALT_MASK) !=
+	    JRINT_ERR_HALT_COMPLETE || timeout == 0) {
+		dev_err(dev, "failed to flush job ring %d\n", jrp->ridx);
+		return -EIO;
+	}
+
+	/* initiate reset */
+	timeout = 100000;
+	wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET);
+	while ((rd_reg32(&jrp->rregs->jrcommand) & JRCR_RESET) && --timeout)
+		cpu_relax();
+
+	if (timeout == 0) {
+		dev_err(dev, "failed to reset job ring %d\n", jrp->ridx);
+		return -EIO;
+	}
+
+	enable_irq(jrp->irq);
+
+	return 0;
+}
+
+/*
+ * Init JobR independent of platform property detection
+ */
+static int caam_jr_init(struct device *dev)
+{
+	struct caam_drv_private_jr *jrp;
+	dma_addr_t inpbusaddr, outbusaddr;
+	int i, error;
+
+	jrp = dev_get_drvdata(dev);
+
+	error = caam_reset_hw_jr(dev);
+	if (error)
+		return error;
+
+	jrp->inpring = kzalloc(sizeof(dma_addr_t) * JOBR_DEPTH,
+			       GFP_KERNEL | GFP_DMA);
+	jrp->outring = kzalloc(sizeof(struct jr_outentry) *
+			       JOBR_DEPTH, GFP_KERNEL | GFP_DMA);
+
+	jrp->entinfo = kzalloc(sizeof(struct caam_jrentry_info) * JOBR_DEPTH,
+			       GFP_KERNEL);
+
+	if ((jrp->inpring == NULL) || (jrp->outring == NULL) ||
+	    (jrp->entinfo == NULL)) {
+		dev_err(dev, "can't allocate job rings for %d\n",
+			jrp->ridx);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < JOBR_DEPTH; i++)
+		jrp->entinfo[i].desc_addr_dma = !0;
+
+	/* Setup rings */
+	inpbusaddr = dma_map_single(dev, jrp->inpring,
+				    sizeof(u32 *) * JOBR_DEPTH,
+				    DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, inpbusaddr)) {
+		dev_err(dev, "caam_jr_init(): can't map input ring\n");
+		kfree(jrp->inpring);
+		kfree(jrp->outring);
+		kfree(jrp->entinfo);
+		return -EIO;
+	}
+
+	outbusaddr = dma_map_single(dev, jrp->outring,
+				    sizeof(struct jr_outentry) * JOBR_DEPTH,
+				    DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, outbusaddr)) {
+		dev_err(dev, "caam_jr_init(): can't map output ring\n");
+			dma_unmap_single(dev, inpbusaddr,
+					 sizeof(u32 *) * JOBR_DEPTH,
+					 DMA_BIDIRECTIONAL);
+		kfree(jrp->inpring);
+		kfree(jrp->outring);
+		kfree(jrp->entinfo);
+		return -EIO;
+	}
+
+	jrp->inp_ring_write_index = 0;
+	jrp->out_ring_read_index = 0;
+	jrp->head = 0;
+	jrp->tail = 0;
+
+	wr_reg64(&jrp->rregs->inpring_base, inpbusaddr);
+	wr_reg64(&jrp->rregs->outring_base, outbusaddr);
+	wr_reg32(&jrp->rregs->inpring_size, JOBR_DEPTH);
+	wr_reg32(&jrp->rregs->outring_size, JOBR_DEPTH);
+
+	jrp->ringsize = JOBR_DEPTH;
+
+	spin_lock_init(&jrp->inplock);
+	spin_lock_init(&jrp->outlock);
+
+	/* Select interrupt coalescing parameters */
+	setbits32(&jrp->rregs->rconfig_lo, JOBR_INTC |
+		  (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) |
+		  (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT));
+
+	/* Connect job ring interrupt handler. */
+	for_each_possible_cpu(i)
+		tasklet_init(&jrp->irqtask[i], caam_jr_dequeue,
+			     (unsigned long)dev);
+
+	error = request_irq(jrp->irq, caam_jr_interrupt, 0,
+			    "caam-jobr", dev);
+	if (error) {
+		dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
+			jrp->ridx, jrp->irq);
+		irq_dispose_mapping(jrp->irq);
+		jrp->irq = 0;
+		dma_unmap_single(dev, inpbusaddr, sizeof(u32 *) * JOBR_DEPTH,
+				 DMA_BIDIRECTIONAL);
+		dma_unmap_single(dev, outbusaddr, sizeof(u32 *) * JOBR_DEPTH,
+				 DMA_BIDIRECTIONAL);
+		kfree(jrp->inpring);
+		kfree(jrp->outring);
+		kfree(jrp->entinfo);
+		return -EINVAL;
+	}
+
+	jrp->assign = JOBR_UNASSIGNED;
+	return 0;
+}
+
+/*
+ * Shutdown JobR independent of platform property code
+ */
+int caam_jr_shutdown(struct device *dev)
+{
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
+	dma_addr_t inpbusaddr, outbusaddr;
+	int ret, i;
+
+	ret = caam_reset_hw_jr(dev);
+
+	for_each_possible_cpu(i)
+		tasklet_kill(&jrp->irqtask[i]);
+
+	/* Release interrupt */
+	free_irq(jrp->irq, dev);
+
+	/* Free rings */
+	inpbusaddr = rd_reg64(&jrp->rregs->inpring_base);
+	outbusaddr = rd_reg64(&jrp->rregs->outring_base);
+	dma_unmap_single(dev, outbusaddr,
+			 sizeof(struct jr_outentry) * JOBR_DEPTH,
+			 DMA_BIDIRECTIONAL);
+	dma_unmap_single(dev, inpbusaddr, sizeof(u32 *) * JOBR_DEPTH,
+			 DMA_BIDIRECTIONAL);
+	kfree(jrp->outring);
+	kfree(jrp->inpring);
+	kfree(jrp->entinfo);
+
+	return ret;
+}
+
+/*
+ * Probe routine for each detected JobR subsystem. It assumes that
+ * property detection was picked up externally.
+ */
+int caam_jr_probe(struct platform_device *pdev, struct device_node *np,
+		  int ring)
+{
+	struct device *ctrldev, *jrdev;
+	struct platform_device *jr_pdev;
+	struct caam_drv_private *ctrlpriv;
+	struct caam_drv_private_jr *jrpriv;
+	u32 *jroffset;
+	int error;
+
+	ctrldev = &pdev->dev;
+	ctrlpriv = dev_get_drvdata(ctrldev);
+
+	jrpriv = kmalloc(sizeof(struct caam_drv_private_jr),
+			 GFP_KERNEL);
+	if (jrpriv == NULL) {
+		dev_err(ctrldev, "can't alloc private mem for job ring %d\n",
+			ring);
+		return -ENOMEM;
+	}
+	jrpriv->parentdev = ctrldev; /* point back to parent */
+	jrpriv->ridx = ring; /* save ring identity relative to detection */
+
+	/*
+	 * Derive a pointer to the detected JobRs regs
+	 * Driver has already iomapped the entire space, we just
+	 * need to add in the offset to this JobR. Don't know if I
+	 * like this long-term, but it'll run
+	 */
+	jroffset = (u32 *)of_get_property(np, "reg", NULL);
+	jrpriv->rregs = (struct caam_job_ring __iomem *)((void *)ctrlpriv->ctrl
+							 + *jroffset);
+
+	/* Build a local dev for each detected queue */
+	jr_pdev = of_platform_device_create(np, NULL, ctrldev);
+	if (jr_pdev == NULL) {
+		kfree(jrpriv);
+		return -EINVAL;
+	}
+	jrdev = &jr_pdev->dev;
+	dev_set_drvdata(jrdev, jrpriv);
+	ctrlpriv->jrdev[ring] = jrdev;
+
+	/* Identify the interrupt */
+	jrpriv->irq = of_irq_to_resource(np, 0, NULL);
+
+	/* Now do the platform independent part */
+	error = caam_jr_init(jrdev); /* now turn on hardware */
+	if (error) {
+		kfree(jrpriv);
+		return error;
+	}
+
+	return error;
+}
diff --git a/drivers/crypto/caam/jr.h b/drivers/crypto/caam/jr.h
new file mode 100644
index 000000000000..c23df395b622
--- /dev/null
+++ b/drivers/crypto/caam/jr.h
@@ -0,0 +1,21 @@
+/*
+ * CAAM public-level include definitions for the JobR backend
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef JR_H
+#define JR_H
+
+/* Prototypes for backend-level services exposed to APIs */
+int caam_jr_register(struct device *ctrldev, struct device **rdev);
+int caam_jr_deregister(struct device *rdev);
+int caam_jr_enqueue(struct device *dev, u32 *desc,
+		    void (*cbk)(struct device *dev, u32 *desc, u32 status,
+				void *areq),
+		    void *areq);
+
+extern int caam_jr_probe(struct platform_device *pdev, struct device_node *np,
+			 int ring);
+extern int caam_jr_shutdown(struct device *dev);
+#endif /* JR_H */
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
new file mode 100644
index 000000000000..d063a260958b
--- /dev/null
+++ b/drivers/crypto/caam/regs.h
@@ -0,0 +1,663 @@
+/*
+ * CAAM hardware register-level view
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ */
+
+#ifndef REGS_H
+#define REGS_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * Architecture-specific register access methods
+ *
+ * CAAM's bus-addressable registers are 64 bits internally.
+ * They have been wired to be safely accessible on 32-bit
+ * architectures, however. Registers were organized such
+ * that (a) they can be contained in 32 bits, (b) if not, then they
+ * can be treated as two 32-bit entities, or finally (c) if they
+ * must be treated as a single 64-bit value, then this can safely
+ * be done with two 32-bit cycles.
+ *
+ * For 32-bit operations on 64-bit values, CAAM follows the same
+ * 64-bit register access conventions as it's predecessors, in that
+ * writes are "triggered" by a write to the register at the numerically
+ * higher address, thus, a full 64-bit write cycle requires a write
+ * to the lower address, followed by a write to the higher address,
+ * which will latch/execute the write cycle.
+ *
+ * For example, let's assume a SW reset of CAAM through the master
+ * configuration register.
+ * - SWRST is in bit 31 of MCFG.
+ * - MCFG begins at base+0x0000.
+ * - Bits 63-32 are a 32-bit word at base+0x0000 (numerically-lower)
+ * - Bits 31-0 are a 32-bit word at base+0x0004 (numerically-higher)
+ *
+ * (and on Power, the convention is 0-31, 32-63, I know...)
+ *
+ * Assuming a 64-bit write to this MCFG to perform a software reset
+ * would then require a write of 0 to base+0x0000, followed by a
+ * write of 0x80000000 to base+0x0004, which would "execute" the
+ * reset.
+ *
+ * Of course, since MCFG 63-32 is all zero, we could cheat and simply
+ * write 0x8000000 to base+0x0004, and the reset would work fine.
+ * However, since CAAM does contain some write-and-read-intended
+ * 64-bit registers, this code defines 64-bit access methods for
+ * the sake of internal consistency and simplicity, and so that a
+ * clean transition to 64-bit is possible when it becomes necessary.
+ *
+ * There are limitations to this that the developer must recognize.
+ * 32-bit architectures cannot enforce an atomic-64 operation,
+ * Therefore:
+ *
+ * - On writes, since the HW is assumed to latch the cycle on the
+ *   write of the higher-numeric-address word, then ordered
+ *   writes work OK.
+ *
+ * - For reads, where a register contains a relevant value of more
+ *   that 32 bits, the hardware employs logic to latch the other
+ *   "half" of the data until read, ensuring an accurate value.
+ *   This is of particular relevance when dealing with CAAM's
+ *   performance counters.
+ *
+ */
+
+#ifdef __BIG_ENDIAN
+#define wr_reg32(reg, data) out_be32(reg, data)
+#define rd_reg32(reg) in_be32(reg)
+#ifdef CONFIG_64BIT
+#define wr_reg64(reg, data) out_be64(reg, data)
+#define rd_reg64(reg) in_be64(reg)
+#endif
+#else
+#ifdef __LITTLE_ENDIAN
+#define wr_reg32(reg, data) __raw_writel(reg, data)
+#define rd_reg32(reg) __raw_readl(reg)
+#ifdef CONFIG_64BIT
+#define wr_reg64(reg, data) __raw_writeq(reg, data)
+#define rd_reg64(reg) __raw_readq(reg)
+#endif
+#endif
+#endif
+
+#ifndef CONFIG_64BIT
+static inline void wr_reg64(u64 __iomem *reg, u64 data)
+{
+	wr_reg32((u32 __iomem *)reg, (data & 0xffffffff00000000ull) >> 32);
+	wr_reg32((u32 __iomem *)reg + 1, data & 0x00000000ffffffffull);
+}
+
+static inline u64 rd_reg64(u64 __iomem *reg)
+{
+	return (((u64)rd_reg32((u32 __iomem *)reg)) << 32) |
+		((u64)rd_reg32((u32 __iomem *)reg + 1));
+}
+#endif
+
+/*
+ * jr_outentry
+ * Represents each entry in a JobR output ring
+ */
+struct jr_outentry {
+	dma_addr_t desc;/* Pointer to completed descriptor */
+	u32 jrstatus;	/* Status for completed descriptor */
+} __packed;
+
+/*
+ * caam_perfmon - Performance Monitor/Secure Memory Status/
+ *                CAAM Global Status/Component Version IDs
+ *
+ * Spans f00-fff wherever instantiated
+ */
+
+/* Number of DECOs */
+#define CHA_NUM_DECONUM_SHIFT	56
+#define CHA_NUM_DECONUM_MASK	(0xfull << CHA_NUM_DECONUM_SHIFT)
+
+struct caam_perfmon {
+	/* Performance Monitor Registers			f00-f9f */
+	u64 req_dequeued;	/* PC_REQ_DEQ - Dequeued Requests	     */
+	u64 ob_enc_req;	/* PC_OB_ENC_REQ - Outbound Encrypt Requests */
+	u64 ib_dec_req;	/* PC_IB_DEC_REQ - Inbound Decrypt Requests  */
+	u64 ob_enc_bytes;	/* PC_OB_ENCRYPT - Outbound Bytes Encrypted  */
+	u64 ob_prot_bytes;	/* PC_OB_PROTECT - Outbound Bytes Protected  */
+	u64 ib_dec_bytes;	/* PC_IB_DECRYPT - Inbound Bytes Decrypted   */
+	u64 ib_valid_bytes;	/* PC_IB_VALIDATED Inbound Bytes Validated   */
+	u64 rsvd[13];
+
+	/* CAAM Hardware Instantiation Parameters		fa0-fbf */
+	u64 cha_rev;		/* CRNR - CHA Revision Number		*/
+#define CTPR_QI_SHIFT		57
+#define CTPR_QI_MASK		(0x1ull << CHA_NUM_DECONUM_SHIFT)
+	u64 comp_parms;	/* CTPR - Compile Parameters Register	*/
+	u64 rsvd1[2];
+
+	/* CAAM Global Status					fc0-fdf */
+	u64 faultaddr;	/* FAR  - Fault Address		*/
+	u32 faultliodn;	/* FALR - Fault Address LIODN	*/
+	u32 faultdetail;	/* FADR - Fault Addr Detail	*/
+	u32 rsvd2;
+	u32 status;		/* CSTA - CAAM Status */
+	u64 rsvd3;
+
+	/* Component Instantiation Parameters			fe0-fff */
+	u32 rtic_id;		/* RVID - RTIC Version ID	*/
+	u32 ccb_id;		/* CCBVID - CCB Version ID	*/
+	u64 cha_id;		/* CHAVID - CHA Version ID	*/
+	u64 cha_num;		/* CHANUM - CHA Number		*/
+	u64 caam_id;		/* CAAMVID - CAAM Version ID	*/
+};
+
+/* LIODN programming for DMA configuration */
+#define MSTRID_LOCK_LIODN	0x80000000
+#define MSTRID_LOCK_MAKETRUSTED	0x00010000	/* only for JR masterid */
+
+#define MSTRID_LIODN_MASK	0x0fff
+struct masterid {
+	u32 liodn_ms;	/* lock and make-trusted control bits */
+	u32 liodn_ls;	/* LIODN for non-sequence and seq access */
+};
+
+/* Partition ID for DMA configuration */
+struct partid {
+	u32 rsvd1;
+	u32 pidr;	/* partition ID, DECO */
+};
+
+/* RNG test mode (replicated twice in some configurations) */
+/* Padded out to 0x100 */
+struct rngtst {
+	u32 mode;		/* RTSTMODEx - Test mode */
+	u32 rsvd1[3];
+	u32 reset;		/* RTSTRESETx - Test reset control */
+	u32 rsvd2[3];
+	u32 status;		/* RTSTSSTATUSx - Test status */
+	u32 rsvd3;
+	u32 errstat;		/* RTSTERRSTATx - Test error status */
+	u32 rsvd4;
+	u32 errctl;		/* RTSTERRCTLx - Test error control */
+	u32 rsvd5;
+	u32 entropy;		/* RTSTENTROPYx - Test entropy */
+	u32 rsvd6[15];
+	u32 verifctl;	/* RTSTVERIFCTLx - Test verification control */
+	u32 rsvd7;
+	u32 verifstat;	/* RTSTVERIFSTATx - Test verification status */
+	u32 rsvd8;
+	u32 verifdata;	/* RTSTVERIFDx - Test verification data */
+	u32 rsvd9;
+	u32 xkey;		/* RTSTXKEYx - Test XKEY */
+	u32 rsvd10;
+	u32 oscctctl;	/* RTSTOSCCTCTLx - Test osc. counter control */
+	u32 rsvd11;
+	u32 oscct;		/* RTSTOSCCTx - Test oscillator counter */
+	u32 rsvd12;
+	u32 oscctstat;	/* RTSTODCCTSTATx - Test osc counter status */
+	u32 rsvd13[2];
+	u32 ofifo[4];	/* RTSTOFIFOx - Test output FIFO */
+	u32 rsvd14[15];
+};
+
+/*
+ * caam_ctrl - basic core configuration
+ * starts base + 0x0000 padded out to 0x1000
+ */
+
+#define KEK_KEY_SIZE		8
+#define TKEK_KEY_SIZE		8
+#define TDSK_KEY_SIZE		8
+
+#define DECO_RESET	1	/* Use with DECO reset/availability regs */
+#define DECO_RESET_0	(DECO_RESET << 0)
+#define DECO_RESET_1	(DECO_RESET << 1)
+#define DECO_RESET_2	(DECO_RESET << 2)
+#define DECO_RESET_3	(DECO_RESET << 3)
+#define DECO_RESET_4	(DECO_RESET << 4)
+
+struct caam_ctrl {
+	/* Basic Configuration Section				000-01f */
+	/* Read/Writable					        */
+	u32 rsvd1;
+	u32 mcr;		/* MCFG      Master Config Register  */
+	u32 rsvd2[2];
+
+	/* Bus Access Configuration Section			010-11f */
+	/* Read/Writable                                                */
+	struct masterid jr_mid[4];	/* JRxLIODNR - JobR LIODN setup */
+	u32 rsvd3[12];
+	struct masterid rtic_mid[4];	/* RTICxLIODNR - RTIC LIODN setup */
+	u32 rsvd4[7];
+	u32 deco_rq;			/* DECORR - DECO Request */
+	struct partid deco_mid[5];	/* DECOxLIODNR - 1 per DECO */
+	u32 rsvd5[22];
+
+	/* DECO Availability/Reset Section			120-3ff */
+	u32 deco_avail;		/* DAR - DECO availability */
+	u32 deco_reset;		/* DRR - DECO reset */
+	u32 rsvd6[182];
+
+	/* Key Encryption/Decryption Configuration              400-5ff */
+	/* Read/Writable only while in Non-secure mode                  */
+	u32 kek[KEK_KEY_SIZE];	/* JDKEKR - Key Encryption Key */
+	u32 tkek[TKEK_KEY_SIZE];	/* TDKEKR - Trusted Desc KEK */
+	u32 tdsk[TDSK_KEY_SIZE];	/* TDSKR - Trusted Desc Signing Key */
+	u32 rsvd7[32];
+	u64 sknonce;			/* SKNR - Secure Key Nonce */
+	u32 rsvd8[70];
+
+	/* RNG Test/Verification/Debug Access                   600-7ff */
+	/* (Useful in Test/Debug modes only...)                         */
+	struct rngtst rtst[2];
+
+	u32 rsvd9[448];
+
+	/* Performance Monitor                                  f00-fff */
+	struct caam_perfmon perfmon;
+};
+
+/*
+ * Controller master config register defs
+ */
+#define MCFGR_SWRESET		0x80000000 /* software reset */
+#define MCFGR_WDENABLE		0x40000000 /* DECO watchdog enable */
+#define MCFGR_WDFAIL		0x20000000 /* DECO watchdog force-fail */
+#define MCFGR_DMA_RESET		0x10000000
+#define MCFGR_LONG_PTR		0x00010000 /* Use >32-bit desc addressing */
+
+/* AXI read cache control */
+#define MCFGR_ARCACHE_SHIFT	12
+#define MCFGR_ARCACHE_MASK	(0xf << MCFGR_ARCACHE_SHIFT)
+
+/* AXI write cache control */
+#define MCFGR_AWCACHE_SHIFT	8
+#define MCFGR_AWCACHE_MASK	(0xf << MCFGR_AWCACHE_SHIFT)
+
+/* AXI pipeline depth */
+#define MCFGR_AXIPIPE_SHIFT	4
+#define MCFGR_AXIPIPE_MASK	(0xf << MCFGR_AXIPIPE_SHIFT)
+
+#define MCFGR_AXIPRI		0x00000008 /* Assert AXI priority sideband */
+#define MCFGR_BURST_64		0x00000001 /* Max burst size */
+
+/*
+ * caam_job_ring - direct job ring setup
+ * 1-4 possible per instantiation, base + 1000/2000/3000/4000
+ * Padded out to 0x1000
+ */
+struct caam_job_ring {
+	/* Input ring */
+	u64 inpring_base;	/* IRBAx -  Input desc ring baseaddr */
+	u32 rsvd1;
+	u32 inpring_size;	/* IRSx - Input ring size */
+	u32 rsvd2;
+	u32 inpring_avail;	/* IRSAx - Input ring room remaining */
+	u32 rsvd3;
+	u32 inpring_jobadd;	/* IRJAx - Input ring jobs added */
+
+	/* Output Ring */
+	u64 outring_base;	/* ORBAx - Output status ring base addr */
+	u32 rsvd4;
+	u32 outring_size;	/* ORSx - Output ring size */
+	u32 rsvd5;
+	u32 outring_rmvd;	/* ORJRx - Output ring jobs removed */
+	u32 rsvd6;
+	u32 outring_used;	/* ORSFx - Output ring slots full */
+
+	/* Status/Configuration */
+	u32 rsvd7;
+	u32 jroutstatus;	/* JRSTAx - JobR output status */
+	u32 rsvd8;
+	u32 jrintstatus;	/* JRINTx - JobR interrupt status */
+	u32 rconfig_hi;	/* JRxCFG - Ring configuration */
+	u32 rconfig_lo;
+
+	/* Indices. CAAM maintains as "heads" of each queue */
+	u32 rsvd9;
+	u32 inp_rdidx;	/* IRRIx - Input ring read index */
+	u32 rsvd10;
+	u32 out_wtidx;	/* ORWIx - Output ring write index */
+
+	/* Command/control */
+	u32 rsvd11;
+	u32 jrcommand;	/* JRCRx - JobR command */
+
+	u32 rsvd12[932];
+
+	/* Performance Monitor                                  f00-fff */
+	struct caam_perfmon perfmon;
+};
+
+#define JR_RINGSIZE_MASK	0x03ff
+/*
+ * jrstatus - Job Ring Output Status
+ * All values in lo word
+ * Also note, same values written out as status through QI
+ * in the command/status field of a frame descriptor
+ */
+#define JRSTA_SSRC_SHIFT            28
+#define JRSTA_SSRC_MASK             0xf0000000
+
+#define JRSTA_SSRC_NONE             0x00000000
+#define JRSTA_SSRC_CCB_ERROR        0x20000000
+#define JRSTA_SSRC_JUMP_HALT_USER   0x30000000
+#define JRSTA_SSRC_DECO             0x40000000
+#define JRSTA_SSRC_JRERROR          0x60000000
+#define JRSTA_SSRC_JUMP_HALT_CC     0x70000000
+
+#define JRSTA_DECOERR_JUMP          0x08000000
+#define JRSTA_DECOERR_INDEX_SHIFT   8
+#define JRSTA_DECOERR_INDEX_MASK    0xff00
+#define JRSTA_DECOERR_ERROR_MASK    0x00ff
+
+#define JRSTA_DECOERR_NONE          0x00
+#define JRSTA_DECOERR_LINKLEN       0x01
+#define JRSTA_DECOERR_LINKPTR       0x02
+#define JRSTA_DECOERR_JRCTRL        0x03
+#define JRSTA_DECOERR_DESCCMD       0x04
+#define JRSTA_DECOERR_ORDER         0x05
+#define JRSTA_DECOERR_KEYCMD        0x06
+#define JRSTA_DECOERR_LOADCMD       0x07
+#define JRSTA_DECOERR_STORECMD      0x08
+#define JRSTA_DECOERR_OPCMD         0x09
+#define JRSTA_DECOERR_FIFOLDCMD     0x0a
+#define JRSTA_DECOERR_FIFOSTCMD     0x0b
+#define JRSTA_DECOERR_MOVECMD       0x0c
+#define JRSTA_DECOERR_JUMPCMD       0x0d
+#define JRSTA_DECOERR_MATHCMD       0x0e
+#define JRSTA_DECOERR_SHASHCMD      0x0f
+#define JRSTA_DECOERR_SEQCMD        0x10
+#define JRSTA_DECOERR_DECOINTERNAL  0x11
+#define JRSTA_DECOERR_SHDESCHDR     0x12
+#define JRSTA_DECOERR_HDRLEN        0x13
+#define JRSTA_DECOERR_BURSTER       0x14
+#define JRSTA_DECOERR_DESCSIGNATURE 0x15
+#define JRSTA_DECOERR_DMA           0x16
+#define JRSTA_DECOERR_BURSTFIFO     0x17
+#define JRSTA_DECOERR_JRRESET       0x1a
+#define JRSTA_DECOERR_JOBFAIL       0x1b
+#define JRSTA_DECOERR_DNRERR        0x80
+#define JRSTA_DECOERR_UNDEFPCL      0x81
+#define JRSTA_DECOERR_PDBERR        0x82
+#define JRSTA_DECOERR_ANRPLY_LATE   0x83
+#define JRSTA_DECOERR_ANRPLY_REPLAY 0x84
+#define JRSTA_DECOERR_SEQOVF        0x85
+#define JRSTA_DECOERR_INVSIGN       0x86
+#define JRSTA_DECOERR_DSASIGN       0x87
+
+#define JRSTA_CCBERR_JUMP           0x08000000
+#define JRSTA_CCBERR_INDEX_MASK     0xff00
+#define JRSTA_CCBERR_INDEX_SHIFT    8
+#define JRSTA_CCBERR_CHAID_MASK     0x00f0
+#define JRSTA_CCBERR_CHAID_SHIFT    4
+#define JRSTA_CCBERR_ERRID_MASK     0x000f
+
+#define JRSTA_CCBERR_CHAID_AES      (0x01 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_DES      (0x02 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_ARC4     (0x03 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_MD       (0x04 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_RNG      (0x05 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_SNOW     (0x06 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_KASUMI   (0x07 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_PK       (0x08 << JRSTA_CCBERR_CHAID_SHIFT)
+#define JRSTA_CCBERR_CHAID_CRC      (0x09 << JRSTA_CCBERR_CHAID_SHIFT)
+
+#define JRSTA_CCBERR_ERRID_NONE     0x00
+#define JRSTA_CCBERR_ERRID_MODE     0x01
+#define JRSTA_CCBERR_ERRID_DATASIZ  0x02
+#define JRSTA_CCBERR_ERRID_KEYSIZ   0x03
+#define JRSTA_CCBERR_ERRID_PKAMEMSZ 0x04
+#define JRSTA_CCBERR_ERRID_PKBMEMSZ 0x05
+#define JRSTA_CCBERR_ERRID_SEQUENCE 0x06
+#define JRSTA_CCBERR_ERRID_PKDIVZRO 0x07
+#define JRSTA_CCBERR_ERRID_PKMODEVN 0x08
+#define JRSTA_CCBERR_ERRID_KEYPARIT 0x09
+#define JRSTA_CCBERR_ERRID_ICVCHK   0x0a
+#define JRSTA_CCBERR_ERRID_HARDWARE 0x0b
+#define JRSTA_CCBERR_ERRID_CCMAAD   0x0c
+#define JRSTA_CCBERR_ERRID_INVCHA   0x0f
+
+#define JRINT_ERR_INDEX_MASK        0x3fff0000
+#define JRINT_ERR_INDEX_SHIFT       16
+#define JRINT_ERR_TYPE_MASK         0xf00
+#define JRINT_ERR_TYPE_SHIFT        8
+#define JRINT_ERR_HALT_MASK         0xc
+#define JRINT_ERR_HALT_SHIFT        2
+#define JRINT_ERR_HALT_INPROGRESS   0x4
+#define JRINT_ERR_HALT_COMPLETE     0x8
+#define JRINT_JR_ERROR              0x02
+#define JRINT_JR_INT                0x01
+
+#define JRINT_ERR_TYPE_WRITE        1
+#define JRINT_ERR_TYPE_BAD_INPADDR  3
+#define JRINT_ERR_TYPE_BAD_OUTADDR  4
+#define JRINT_ERR_TYPE_INV_INPWRT   5
+#define JRINT_ERR_TYPE_INV_OUTWRT   6
+#define JRINT_ERR_TYPE_RESET        7
+#define JRINT_ERR_TYPE_REMOVE_OFL   8
+#define JRINT_ERR_TYPE_ADD_OFL      9
+
+#define JRCFG_SOE		0x04
+#define JRCFG_ICEN		0x02
+#define JRCFG_IMSK		0x01
+#define JRCFG_ICDCT_SHIFT	8
+#define JRCFG_ICTT_SHIFT	16
+
+#define JRCR_RESET                  0x01
+
+/*
+ * caam_assurance - Assurance Controller View
+ * base + 0x6000 padded out to 0x1000
+ */
+
+struct rtic_element {
+	u64 address;
+	u32 rsvd;
+	u32 length;
+};
+
+struct rtic_block {
+	struct rtic_element element[2];
+};
+
+struct rtic_memhash {
+	u32 memhash_be[32];
+	u32 memhash_le[32];
+};
+
+struct caam_assurance {
+    /* Status/Command/Watchdog */
+	u32 rsvd1;
+	u32 status;		/* RSTA - Status */
+	u32 rsvd2;
+	u32 cmd;		/* RCMD - Command */
+	u32 rsvd3;
+	u32 ctrl;		/* RCTL - Control */
+	u32 rsvd4;
+	u32 throttle;	/* RTHR - Throttle */
+	u32 rsvd5[2];
+	u64 watchdog;	/* RWDOG - Watchdog Timer */
+	u32 rsvd6;
+	u32 rend;		/* REND - Endian corrections */
+	u32 rsvd7[50];
+
+	/* Block access/configuration @ 100/110/120/130 */
+	struct rtic_block memblk[4];	/* Memory Blocks A-D */
+	u32 rsvd8[32];
+
+	/* Block hashes @ 200/300/400/500 */
+	struct rtic_memhash hash[4];	/* Block hash values A-D */
+	u32 rsvd_3[640];
+};
+
+/*
+ * caam_queue_if - QI configuration and control
+ * starts base + 0x7000, padded out to 0x1000 long
+ */
+
+struct caam_queue_if {
+	u32 qi_control_hi;	/* QICTL  - QI Control */
+	u32 qi_control_lo;
+	u32 rsvd1;
+	u32 qi_status;	/* QISTA  - QI Status */
+	u32 qi_deq_cfg_hi;	/* QIDQC  - QI Dequeue Configuration */
+	u32 qi_deq_cfg_lo;
+	u32 qi_enq_cfg_hi;	/* QISEQC - QI Enqueue Command     */
+	u32 qi_enq_cfg_lo;
+	u32 rsvd2[1016];
+};
+
+/* QI control bits - low word */
+#define QICTL_DQEN      0x01              /* Enable frame pop          */
+#define QICTL_STOP      0x02              /* Stop dequeue/enqueue      */
+#define QICTL_SOE       0x04              /* Stop on error             */
+
+/* QI control bits - high word */
+#define QICTL_MBSI	0x01
+#define QICTL_MHWSI	0x02
+#define QICTL_MWSI	0x04
+#define QICTL_MDWSI	0x08
+#define QICTL_CBSI	0x10		/* CtrlDataByteSwapInput     */
+#define QICTL_CHWSI	0x20		/* CtrlDataHalfSwapInput     */
+#define QICTL_CWSI	0x40		/* CtrlDataWordSwapInput     */
+#define QICTL_CDWSI	0x80		/* CtrlDataDWordSwapInput    */
+#define QICTL_MBSO	0x0100
+#define QICTL_MHWSO	0x0200
+#define QICTL_MWSO	0x0400
+#define QICTL_MDWSO	0x0800
+#define QICTL_CBSO	0x1000		/* CtrlDataByteSwapOutput    */
+#define QICTL_CHWSO	0x2000		/* CtrlDataHalfSwapOutput    */
+#define QICTL_CWSO	0x4000		/* CtrlDataWordSwapOutput    */
+#define QICTL_CDWSO     0x8000		/* CtrlDataDWordSwapOutput   */
+#define QICTL_DMBS	0x010000
+#define QICTL_EPO	0x020000
+
+/* QI status bits */
+#define QISTA_PHRDERR   0x01              /* PreHeader Read Error      */
+#define QISTA_CFRDERR   0x02              /* Compound Frame Read Error */
+#define QISTA_OFWRERR   0x04              /* Output Frame Read Error   */
+#define QISTA_BPDERR    0x08              /* Buffer Pool Depleted      */
+#define QISTA_BTSERR    0x10              /* Buffer Undersize          */
+#define QISTA_CFWRERR   0x20              /* Compound Frame Write Err  */
+#define QISTA_STOPD     0x80000000        /* QI Stopped (see QICTL)    */
+
+/* deco_sg_table - DECO view of scatter/gather table */
+struct deco_sg_table {
+	u64 addr;		/* Segment Address */
+	u32 elen;		/* E, F bits + 30-bit length */
+	u32 bpid_offset;	/* Buffer Pool ID + 16-bit length */
+};
+
+/*
+ * caam_deco - descriptor controller - CHA cluster block
+ *
+ * Only accessible when direct DECO access is turned on
+ * (done in DECORR, via MID programmed in DECOxMID
+ *
+ * 5 typical, base + 0x8000/9000/a000/b000
+ * Padded out to 0x1000 long
+ */
+struct caam_deco {
+	u32 rsvd1;
+	u32 cls1_mode;	/* CxC1MR -  Class 1 Mode */
+	u32 rsvd2;
+	u32 cls1_keysize;	/* CxC1KSR - Class 1 Key Size */
+	u32 cls1_datasize_hi;	/* CxC1DSR - Class 1 Data Size */
+	u32 cls1_datasize_lo;
+	u32 rsvd3;
+	u32 cls1_icvsize;	/* CxC1ICVSR - Class 1 ICV size */
+	u32 rsvd4[5];
+	u32 cha_ctrl;	/* CCTLR - CHA control */
+	u32 rsvd5;
+	u32 irq_crtl;	/* CxCIRQ - CCB interrupt done/error/clear */
+	u32 rsvd6;
+	u32 clr_written;	/* CxCWR - Clear-Written */
+	u32 ccb_status_hi;	/* CxCSTA - CCB Status/Error */
+	u32 ccb_status_lo;
+	u32 rsvd7[3];
+	u32 aad_size;	/* CxAADSZR - Current AAD Size */
+	u32 rsvd8;
+	u32 cls1_iv_size;	/* CxC1IVSZR - Current Class 1 IV Size */
+	u32 rsvd9[7];
+	u32 pkha_a_size;	/* PKASZRx - Size of PKHA A */
+	u32 rsvd10;
+	u32 pkha_b_size;	/* PKBSZRx - Size of PKHA B */
+	u32 rsvd11;
+	u32 pkha_n_size;	/* PKNSZRx - Size of PKHA N */
+	u32 rsvd12;
+	u32 pkha_e_size;	/* PKESZRx - Size of PKHA E */
+	u32 rsvd13[24];
+	u32 cls1_ctx[16];	/* CxC1CTXR - Class 1 Context @100 */
+	u32 rsvd14[48];
+	u32 cls1_key[8];	/* CxC1KEYR - Class 1 Key @200 */
+	u32 rsvd15[121];
+	u32 cls2_mode;	/* CxC2MR - Class 2 Mode */
+	u32 rsvd16;
+	u32 cls2_keysize;	/* CxX2KSR - Class 2 Key Size */
+	u32 cls2_datasize_hi;	/* CxC2DSR - Class 2 Data Size */
+	u32 cls2_datasize_lo;
+	u32 rsvd17;
+	u32 cls2_icvsize;	/* CxC2ICVSZR - Class 2 ICV Size */
+	u32 rsvd18[56];
+	u32 cls2_ctx[18];	/* CxC2CTXR - Class 2 Context @500 */
+	u32 rsvd19[46];
+	u32 cls2_key[32];	/* CxC2KEYR - Class2 Key @600 */
+	u32 rsvd20[84];
+	u32 inp_infofifo_hi;	/* CxIFIFO - Input Info FIFO @7d0 */
+	u32 inp_infofifo_lo;
+	u32 rsvd21[2];
+	u64 inp_datafifo;	/* CxDFIFO - Input Data FIFO */
+	u32 rsvd22[2];
+	u64 out_datafifo;	/* CxOFIFO - Output Data FIFO */
+	u32 rsvd23[2];
+	u32 jr_ctl_hi;	/* CxJRR - JobR Control Register      @800 */
+	u32 jr_ctl_lo;
+	u64 jr_descaddr;	/* CxDADR - JobR Descriptor Address */
+	u32 op_status_hi;	/* DxOPSTA - DECO Operation Status */
+	u32 op_status_lo;
+	u32 rsvd24[2];
+	u32 liodn;		/* DxLSR - DECO LIODN Status - non-seq */
+	u32 td_liodn;	/* DxLSR - DECO LIODN Status - trustdesc */
+	u32 rsvd26[6];
+	u64 math[4];		/* DxMTH - Math register */
+	u32 rsvd27[8];
+	struct deco_sg_table gthr_tbl[4];	/* DxGTR - Gather Tables */
+	u32 rsvd28[16];
+	struct deco_sg_table sctr_tbl[4];	/* DxSTR - Scatter Tables */
+	u32 rsvd29[48];
+	u32 descbuf[64];	/* DxDESB - Descriptor buffer */
+	u32 rsvd30[320];
+};
+
+/*
+ * Current top-level view of memory map is:
+ *
+ * 0x0000 - 0x0fff - CAAM Top-Level Control
+ * 0x1000 - 0x1fff - Job Ring 0
+ * 0x2000 - 0x2fff - Job Ring 1
+ * 0x3000 - 0x3fff - Job Ring 2
+ * 0x4000 - 0x4fff - Job Ring 3
+ * 0x5000 - 0x5fff - (unused)
+ * 0x6000 - 0x6fff - Assurance Controller
+ * 0x7000 - 0x7fff - Queue Interface
+ * 0x8000 - 0x8fff - DECO-CCB 0
+ * 0x9000 - 0x9fff - DECO-CCB 1
+ * 0xa000 - 0xafff - DECO-CCB 2
+ * 0xb000 - 0xbfff - DECO-CCB 3
+ * 0xc000 - 0xcfff - DECO-CCB 4
+ *
+ * caam_full describes the full register view of CAAM if useful,
+ * although many configurations may choose to implement parts of
+ * the register map separately, in differing privilege regions
+ */
+struct caam_full {
+	struct caam_ctrl __iomem ctrl;
+	struct caam_job_ring jr[4];
+	u64 rsvd[512];
+	struct caam_assurance assure;
+	struct caam_queue_if qi;
+	struct caam_deco *deco;
+};
+
+#endif /* REGS_H */
-- 
cgit v1.2.3


From 54e198d4c162b36ba864ecc658c829454074523f Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Wed, 23 Mar 2011 21:15:44 +0800
Subject: crypto: caam - standardize device tree naming convention to utilize
 '-vX.Y'

Help clarify that the number trailing in compatible nomenclature
is the version number of the device, i.e., change:

"fsl,p4080-sec4.0", "fsl,sec4.0";

to:

"fsl,p4080-sec-v4.0", "fsl,sec-v4.0";

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Cc: Kumar Gala <kumar.gala@freescale.com>
Cc: Steve Cornelius <sec@pobox.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/crypto/fsl-sec4.txt        | 68 +++++++++++-----------
 arch/powerpc/boot/dts/p4080ds.dts                  | 41 ++++++-------
 drivers/crypto/caam/caamalg.c                      |  4 +-
 drivers/crypto/caam/ctrl.c                         |  6 +-
 4 files changed, 60 insertions(+), 59 deletions(-)

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
index fce16a85e2c5..568aa3cb5276 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -53,7 +53,7 @@ PROPERTIES
    - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec4.0","fsl,sec-4.0"
+      Definition: Must include "fsl,p4080-sec-v4.0","fsl,sec-v4.0"
 
    - #address-cells
        Usage: required
@@ -72,7 +72,7 @@ PROPERTIES
       Usage: required
       Value type: <prop-encoded-array>
       Definition: A standard property.  Specifies the physical
-          address and length of the SEC4.0 configuration registers.
+          address and length of the SEC4 configuration registers.
           registers
 
    - ranges
@@ -105,7 +105,7 @@ PROPERTIES
 
 EXAMPLE
 	crypto@300000 {
-		compatible = "fsl,p4080-sec4.0", "fsl,sec4.0";
+		compatible = "fsl,p4080-sec-v4.0", "fsl,sec-v4.0";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x300000 0x10000>;
@@ -127,7 +127,7 @@ P4080 Job Ring (JR) Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec4.0-job-ring","fsl,sec4.0-job-ring"
+      Definition: Must include "fsl,p4080-sec-v4.0-job-ring","fsl,sec-v4.0-job-ring"
 
   - reg
       Usage: required
@@ -163,8 +163,8 @@ P4080 Job Ring (JR) Node
 
 EXAMPLE
 	jr@1000 {
-		compatible = "fsl,p4080-sec4.0-job-ring",
-			     "fsl,sec4.0-job-ring";
+		compatible = "fsl,p4080-sec-v4.0-job-ring",
+			     "fsl,sec-v4.0-job-ring";
 		reg = <0x1000 0x1000>;
 		fsl,liodn = <0x081>;
 		interrupt-parent = <&mpic>;
@@ -186,7 +186,7 @@ P4080 Run Time Integrity Check (RTIC) Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec4.0-rtic","fsl,sec4.0-rtic".
+      Definition: Must include "fsl,p4080-sec-v4.0-rtic","fsl,sec-v4.0-rtic".
 
    - #address-cells
        Usage: required
@@ -219,8 +219,8 @@ P4080 Run Time Integrity Check (RTIC) Node
 
 EXAMPLE
 	rtic@6000 {
-		compatible = "fsl,p4080-sec4.0-rtic",
-			     "fsl,sec4.0-rtic";
+		compatible = "fsl,p4080-sec-v4.0-rtic",
+			     "fsl,sec-v4.0-rtic";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x6000 0x100>;
@@ -238,7 +238,7 @@ P4080 Run Time Integrity Check (RTIC) Memory Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec4.0-rtic-memory","fsl,sec4.0-rtic-memory".
+      Definition: Must include "fsl,p4080-sec-v4.0-rtic-memory","fsl,sec-v4.0-rtic-memory".
 
   - reg
       Usage: required
@@ -270,8 +270,8 @@ P4080 Run Time Integrity Check (RTIC) Memory Node
 
 EXAMPLE
 	rtic-a@0 {
-		compatible = "fsl,p4080-sec4.0-rtic-memory",
-			     "fsl,sec4.0-rtic-memory";
+		compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+			     "fsl,sec-v4.0-rtic-memory";
 		reg = <0x00 0x20 0x100 0x80>;
 		fsl,liodn   = <0x03c>;
 		fsl,rtic-region  = <0x12345678 0x12345678 0x12345678>;
@@ -288,7 +288,7 @@ P4080 Secure Non-Volatile Storage (SNVS) Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon".
+      Definition: Must include "fsl,p4080-sec-v4.0-mon", "fsl,sec-v4.0-mon".
 
   - reg
       Usage: required
@@ -315,7 +315,7 @@ P4080 Secure Non-Volatile Storage (SNVS) Node
 
 EXAMPLE
 	sec_mon@314000 {
-		compatible = "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon";
+		compatible = "fsl,p4080-sec-v4.0-mon", "fsl,sec-v4.0-mon";
 		reg = <0x314000 0x1000>;
 		interrupt-parent = <&mpic>;
 		interrupts = <93 2>;
@@ -325,7 +325,7 @@ EXAMPLE
 FULL EXAMPLE
 
 	crypto: crypto@300000 {
-		compatible = "fsl,p4080-sec4.0", "fsl,sec4.0";
+		compatible = "fsl,p4080-sec-v4.0", "fsl,sec-v4.0";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x300000 0x10000>;
@@ -334,73 +334,73 @@ FULL EXAMPLE
 		interrupts = <92 2>;
 
 		sec_jr0: jr@1000 {
-			compatible = "fsl,p4080-sec4.0-job-ring",
-				     "fsl,sec4.0-job-ring";
+			compatible = "fsl,p4080-sec-v4.0-job-ring",
+				     "fsl,sec-v4.0-job-ring";
 			reg = <0x1000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <88 2>;
 		};
 
 		sec_jr1: jr@2000 {
-			compatible = "fsl,p4080-sec4.0-job-ring",
-				     "fsl,sec4.0-job-ring";
+			compatible = "fsl,p4080-sec-v4.0-job-ring",
+				     "fsl,sec-v4.0-job-ring";
 			reg = <0x2000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <89 2>;
 		};
 
 		sec_jr2: jr@3000 {
-			compatible = "fsl,p4080-sec4.0-job-ring",
-				     "fsl,sec4.0-job-ring";
+			compatible = "fsl,p4080-sec-v4.0-job-ring",
+				     "fsl,sec-v4.0-job-ring";
 			reg = <0x3000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <90 2>;
 		};
 
 		sec_jr3: jr@4000 {
-			compatible = "fsl,p4080-sec4.0-job-ring",
-				     "fsl,sec4.0-job-ring";
+			compatible = "fsl,p4080-sec-v4.0-job-ring",
+				     "fsl,sec-v4.0-job-ring";
 			reg = <0x4000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <91 2>;
 		};
 
 		rtic@6000 {
-			compatible = "fsl,p4080-sec4.0-rtic",
-				     "fsl,sec4.0-rtic";
+			compatible = "fsl,p4080-sec-v4.0-rtic",
+				     "fsl,sec-v4.0-rtic";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x6000 0x100>;
 			ranges = <0x0 0x6100 0xe00>;
 
 			rtic_a: rtic-a@0 {
-				compatible = "fsl,p4080-sec4.0-rtic-memory",
-					     "fsl,sec4.0-rtic-memory";
+				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
 				reg = <0x00 0x20 0x100 0x80>;
 			};
 
 			rtic_b: rtic-b@20 {
-				compatible = "fsl,p4080-sec4.0-rtic-memory",
-					     "fsl,sec4.0-rtic-memory";
+				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
 				reg = <0x20 0x20 0x200 0x80>;
 			};
 
 			rtic_c: rtic-c@40 {
-				compatible = "fsl,p4080-sec4.0-rtic-memory",
-					     "fsl,sec4.0-rtic-memory";
+				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
 				reg = <0x40 0x20 0x300 0x80>;
 			};
 
 			rtic_d: rtic-d@60 {
-				compatible = "fsl,p4080-sec4.0-rtic-memory",
-					     "fsl,sec4.0-rtic-memory";
+				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+					     "fsl,sec-v4.0-rtic-memory";
 				reg = <0x60 0x20 0x500 0x80>;
 			};
 		};
 	};
 
 	sec_mon: sec_mon@314000 {
-		compatible = "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon";
+		compatible = "fsl,p4080-sec-v4.0-mon", "fsl,sec-v4.0-mon";
 		reg = <0x314000 0x1000>;
 		interrupt-parent = <&mpic>;
 		interrupts = <93 2>;
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index c78e80155019..0430d241fde8 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -423,7 +423,7 @@
 		};
 
 		crypto: crypto@300000 {
-			compatible = "fsl,p4080-sec4.0", "fsl,sec4.0";
+			compatible = "fsl,p4080-sec-v4.0", "fsl,sec-v4.0";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x300000 0x10000>;
@@ -432,73 +432,74 @@
 			interrupts = <92 2>;
 
 			sec_jr0: jr@1000 {
-				compatible = "fsl,p4080-sec4.0-job-ring",
-					     "fsl,sec4.0-job-ring";
+				compatible = "fsl,p4080-sec-v4.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
 				reg = <0x1000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <88 2>;
 			};
 
 			sec_jr1: jr@2000 {
-				compatible = "fsl,p4080-sec4.0-job-ring",
-					     "fsl,sec4.0-job-ring";
+				compatible = "fsl,p4080-sec-v4.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
 				reg = <0x2000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <89 2>;
 			};
 
 			sec_jr2: jr@3000 {
-				compatible = "fsl,p4080-sec4.0-job-ring",
-					     "fsl,sec4.0-job-ring";
+				compatible = "fsl,p4080-sec-v4.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
 				reg = <0x3000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <90 2>;
 			};
 
 			sec_jr3: jr@4000 {
-				compatible = "fsl,p4080-sec4.0-job-ring",
-					     "fsl,sec4.0-job-ring";
+				compatible = "fsl,p4080-sec-v4.0-job-ring",
+					     "fsl,sec-v4.0-job-ring";
 				reg = <0x4000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <91 2>;
 			};
 
 			rtic@6000 {
-				compatible = "fsl,p4080-sec4.0-rtic",
-					     "fsl,sec4.0-rtic";
+				compatible = "fsl,p4080-sec-v4.0-rtic",
+					     "fsl,sec-v4.0-rtic";
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0x6000 0x100>;
 				ranges = <0x0 0x6100 0xe00>;
 
 				rtic_a: rtic-a@0 {
-					compatible = "fsl,p4080-sec4.0-rtic-memory",
-						     "fsl,sec4.0-rtic-memory";
+					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+						     "fsl,sec-v4.0-rtic-memory";
 					reg = <0x00 0x20 0x100 0x80>;
 				};
 
 				rtic_b: rtic-b@20 {
-					compatible = "fsl,p4080-sec4.0-rtic-memory",
-						     "fsl,sec4.0-rtic-memory";
+					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+						     "fsl,sec-v4.0-rtic-memory";
 					reg = <0x20 0x20 0x200 0x80>;
 				};
 
 				rtic_c: rtic-c@40 {
-					compatible = "fsl,p4080-sec4.0-rtic-memory",
-						     "fsl,sec4.0-rtic-memory";
+					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+						     "fsl,sec-v4.0-rtic-memory";
 					reg = <0x40 0x20 0x300 0x80>;
 				};
 
 				rtic_d: rtic-d@60 {
-					compatible = "fsl,p4080-sec4.0-rtic-memory",
-						     "fsl,sec4.0-rtic-memory";
+					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
+						     "fsl,sec-v4.0-rtic-memory";
 					reg = <0x60 0x20 0x500 0x80>;
 				};
 			};
 		};
 
 		sec_mon: sec_mon@314000 {
-			compatible = "fsl,p4080-sec4.0-mon", "fsl,sec4.0-mon";
+			compatible = "fsl,p4080-sec-v4.0-mon",
+				     "fsl,sec-v4.0-mon";
 			reg = <0x314000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <93 2>;
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index d7fe3d3d7db9..140343496096 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1020,7 +1020,7 @@ static void __exit caam_algapi_exit(void)
 	struct caam_crypto_alg *t_alg, *n;
 	int i, err;
 
-	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
 	if (!dev_node)
 		return;
 
@@ -1094,7 +1094,7 @@ static int __init caam_algapi_init(void)
 	struct caam_drv_private *priv;
 	int i = 0, err = 0;
 
-	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0");
+	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
 	if (!dev_node)
 		return -ENODEV;
 
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index aa2216160103..59aae4e3b54b 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -108,7 +108,7 @@ static int caam_probe(struct platform_device *pdev,
 	 * for all, then go probe each one.
 	 */
 	rspec = 0;
-	for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring")
+	for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring")
 		rspec++;
 	ctrlpriv->jrdev = kzalloc(sizeof(struct device *) * rspec, GFP_KERNEL);
 	if (ctrlpriv->jrdev == NULL) {
@@ -118,7 +118,7 @@ static int caam_probe(struct platform_device *pdev,
 
 	ring = 0;
 	ctrlpriv->total_jobrs = 0;
-	for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") {
+	for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring") {
 		caam_jr_probe(pdev, np, ring);
 		ctrlpriv->total_jobrs++;
 		ring++;
@@ -236,7 +236,7 @@ static int caam_probe(struct platform_device *pdev,
 
 static struct of_device_id caam_match[] = {
 	{
-		.compatible = "fsl,sec4.0",
+		.compatible = "fsl,sec-v4.0",
 	},
 	{},
 };
-- 
cgit v1.2.3


From 7dfc2179ec7339a180e822a5af7eb1294da245cf Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Wed, 23 Mar 2011 21:23:36 +0800
Subject: crypto: caam - de-CHIP-ify device tree compatibles

- all the integration parameters have been captured by the binding.
- the block name really uniquely identifies this hardware.

Some advocate putting SoC names everywhere in case software needs
to work around some chip-specific bug, but more precise SoC
information already exists in SVR, and board information already
exists in the top-level device tree node.

Note that sometimes the SoC name is a worse identifier than the
block version, as the block version can change between revisions
of the same SoC.

As a matter of historical reference, neither SEC versions 2.x
nor 3.x (driven by talitos) ever needed CHIP references.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Cc: Kumar Gala <kumar.gala@freescale.com>
Cc: Scott Wood <scottwood@freescale.com>
Acked-off-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../devicetree/bindings/crypto/fsl-sec4.txt        | 64 +++++++++-------------
 arch/powerpc/boot/dts/p4080ds.dts                  | 32 ++++-------
 2 files changed, 37 insertions(+), 59 deletions(-)

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
index 568aa3cb5276..bf57ecd5d73a 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -38,7 +38,7 @@ in the memory partition devoted to a particular core.  The P4080 has 4 JRs, so
 up to 4 JRs can be configured; and all 4 JRs process requests in parallel.
 
 =====================================================================
-P4080 SEC 4 Node
+SEC 4 Node
 
 Description
 
@@ -53,7 +53,7 @@ PROPERTIES
    - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec-v4.0","fsl,sec-v4.0"
+      Definition: Must include "fsl,sec-v4.0"
 
    - #address-cells
        Usage: required
@@ -105,7 +105,7 @@ PROPERTIES
 
 EXAMPLE
 	crypto@300000 {
-		compatible = "fsl,p4080-sec-v4.0", "fsl,sec-v4.0";
+		compatible = "fsl,sec-v4.0";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x300000 0x10000>;
@@ -115,7 +115,7 @@ EXAMPLE
 	};
 
 =====================================================================
-P4080 Job Ring (JR) Node
+Job Ring (JR) Node
 
     Child of the crypto node defines data processing interface to SEC 4
     across the peripheral bus for purposes of processing
@@ -127,7 +127,7 @@ P4080 Job Ring (JR) Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec-v4.0-job-ring","fsl,sec-v4.0-job-ring"
+      Definition: Must include "fsl,sec-v4.0-job-ring"
 
   - reg
       Usage: required
@@ -163,8 +163,7 @@ P4080 Job Ring (JR) Node
 
 EXAMPLE
 	jr@1000 {
-		compatible = "fsl,p4080-sec-v4.0-job-ring",
-			     "fsl,sec-v4.0-job-ring";
+		compatible = "fsl,sec-v4.0-job-ring";
 		reg = <0x1000 0x1000>;
 		fsl,liodn = <0x081>;
 		interrupt-parent = <&mpic>;
@@ -173,7 +172,7 @@ EXAMPLE
 
 
 =====================================================================
-P4080 Run Time Integrity Check (RTIC) Node
+Run Time Integrity Check (RTIC) Node
 
   Child node of the crypto node.  Defines a register space that
   contains up to 5 sets of addresses and their lengths (sizes) that
@@ -186,7 +185,7 @@ P4080 Run Time Integrity Check (RTIC) Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec-v4.0-rtic","fsl,sec-v4.0-rtic".
+      Definition: Must include "fsl,sec-v4.0-rtic".
 
    - #address-cells
        Usage: required
@@ -219,8 +218,7 @@ P4080 Run Time Integrity Check (RTIC) Node
 
 EXAMPLE
 	rtic@6000 {
-		compatible = "fsl,p4080-sec-v4.0-rtic",
-			     "fsl,sec-v4.0-rtic";
+		compatible = "fsl,sec-v4.0-rtic";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x6000 0x100>;
@@ -228,7 +226,7 @@ EXAMPLE
 	};
 
 =====================================================================
-P4080 Run Time Integrity Check (RTIC) Memory Node
+Run Time Integrity Check (RTIC) Memory Node
   A child node that defines individual RTIC memory regions that are used to
   perform run-time integrity check of memory areas that should not modified.
   The node defines a register that contains the memory address &
@@ -238,7 +236,7 @@ P4080 Run Time Integrity Check (RTIC) Memory Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec-v4.0-rtic-memory","fsl,sec-v4.0-rtic-memory".
+      Definition: Must include "fsl,sec-v4.0-rtic-memory".
 
   - reg
       Usage: required
@@ -270,15 +268,14 @@ P4080 Run Time Integrity Check (RTIC) Memory Node
 
 EXAMPLE
 	rtic-a@0 {
-		compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-			     "fsl,sec-v4.0-rtic-memory";
+		compatible = "fsl,sec-v4.0-rtic-memory";
 		reg = <0x00 0x20 0x100 0x80>;
 		fsl,liodn   = <0x03c>;
 		fsl,rtic-region  = <0x12345678 0x12345678 0x12345678>;
 	};
 
 =====================================================================
-P4080 Secure Non-Volatile Storage (SNVS) Node
+Secure Non-Volatile Storage (SNVS) Node
 
     Node defines address range and the associated
     interrupt for the SNVS function.  This function
@@ -288,7 +285,7 @@ P4080 Secure Non-Volatile Storage (SNVS) Node
   - compatible
       Usage: required
       Value type: <string>
-      Definition: Must include "fsl,p4080-sec-v4.0-mon", "fsl,sec-v4.0-mon".
+      Definition: Must include "fsl,sec-v4.0-mon".
 
   - reg
       Usage: required
@@ -315,7 +312,7 @@ P4080 Secure Non-Volatile Storage (SNVS) Node
 
 EXAMPLE
 	sec_mon@314000 {
-		compatible = "fsl,p4080-sec-v4.0-mon", "fsl,sec-v4.0-mon";
+		compatible = "fsl,sec-v4.0-mon";
 		reg = <0x314000 0x1000>;
 		interrupt-parent = <&mpic>;
 		interrupts = <93 2>;
@@ -325,7 +322,7 @@ EXAMPLE
 FULL EXAMPLE
 
 	crypto: crypto@300000 {
-		compatible = "fsl,p4080-sec-v4.0", "fsl,sec-v4.0";
+		compatible = "fsl,sec-v4.0";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0x300000 0x10000>;
@@ -334,73 +331,64 @@ FULL EXAMPLE
 		interrupts = <92 2>;
 
 		sec_jr0: jr@1000 {
-			compatible = "fsl,p4080-sec-v4.0-job-ring",
-				     "fsl,sec-v4.0-job-ring";
+			compatible = "fsl,sec-v4.0-job-ring";
 			reg = <0x1000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <88 2>;
 		};
 
 		sec_jr1: jr@2000 {
-			compatible = "fsl,p4080-sec-v4.0-job-ring",
-				     "fsl,sec-v4.0-job-ring";
+			compatible = "fsl,sec-v4.0-job-ring";
 			reg = <0x2000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <89 2>;
 		};
 
 		sec_jr2: jr@3000 {
-			compatible = "fsl,p4080-sec-v4.0-job-ring",
-				     "fsl,sec-v4.0-job-ring";
+			compatible = "fsl,sec-v4.0-job-ring";
 			reg = <0x3000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <90 2>;
 		};
 
 		sec_jr3: jr@4000 {
-			compatible = "fsl,p4080-sec-v4.0-job-ring",
-				     "fsl,sec-v4.0-job-ring";
+			compatible = "fsl,sec-v4.0-job-ring";
 			reg = <0x4000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <91 2>;
 		};
 
 		rtic@6000 {
-			compatible = "fsl,p4080-sec-v4.0-rtic",
-				     "fsl,sec-v4.0-rtic";
+			compatible = "fsl,sec-v4.0-rtic";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x6000 0x100>;
 			ranges = <0x0 0x6100 0xe00>;
 
 			rtic_a: rtic-a@0 {
-				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-					     "fsl,sec-v4.0-rtic-memory";
+				compatible = "fsl,sec-v4.0-rtic-memory";
 				reg = <0x00 0x20 0x100 0x80>;
 			};
 
 			rtic_b: rtic-b@20 {
-				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-					     "fsl,sec-v4.0-rtic-memory";
+				compatible = "fsl,sec-v4.0-rtic-memory";
 				reg = <0x20 0x20 0x200 0x80>;
 			};
 
 			rtic_c: rtic-c@40 {
-				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-					     "fsl,sec-v4.0-rtic-memory";
+				compatible = "fsl,sec-v4.0-rtic-memory";
 				reg = <0x40 0x20 0x300 0x80>;
 			};
 
 			rtic_d: rtic-d@60 {
-				compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-					     "fsl,sec-v4.0-rtic-memory";
+				compatible = "fsl,sec-v4.0-rtic-memory";
 				reg = <0x60 0x20 0x500 0x80>;
 			};
 		};
 	};
 
 	sec_mon: sec_mon@314000 {
-		compatible = "fsl,p4080-sec-v4.0-mon", "fsl,sec-v4.0-mon";
+		compatible = "fsl,sec-v4.0-mon";
 		reg = <0x314000 0x1000>;
 		interrupt-parent = <&mpic>;
 		interrupts = <93 2>;
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 0430d241fde8..927f94d16e9b 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -423,7 +423,7 @@
 		};
 
 		crypto: crypto@300000 {
-			compatible = "fsl,p4080-sec-v4.0", "fsl,sec-v4.0";
+			compatible = "fsl,sec-v4.0";
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x300000 0x10000>;
@@ -432,74 +432,64 @@
 			interrupts = <92 2>;
 
 			sec_jr0: jr@1000 {
-				compatible = "fsl,p4080-sec-v4.0-job-ring",
-					     "fsl,sec-v4.0-job-ring";
+				compatible = "fsl,sec-v4.0-job-ring";
 				reg = <0x1000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <88 2>;
 			};
 
 			sec_jr1: jr@2000 {
-				compatible = "fsl,p4080-sec-v4.0-job-ring",
-					     "fsl,sec-v4.0-job-ring";
+				compatible = "fsl,sec-v4.0-job-ring";
 				reg = <0x2000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <89 2>;
 			};
 
 			sec_jr2: jr@3000 {
-				compatible = "fsl,p4080-sec-v4.0-job-ring",
-					     "fsl,sec-v4.0-job-ring";
+				compatible = "fsl,sec-v4.0-job-ring";
 				reg = <0x3000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <90 2>;
 			};
 
 			sec_jr3: jr@4000 {
-				compatible = "fsl,p4080-sec-v4.0-job-ring",
-					     "fsl,sec-v4.0-job-ring";
+				compatible = "fsl,sec-v4.0-job-ring";
 				reg = <0x4000 0x1000>;
 				interrupt-parent = <&mpic>;
 				interrupts = <91 2>;
 			};
 
 			rtic@6000 {
-				compatible = "fsl,p4080-sec-v4.0-rtic",
-					     "fsl,sec-v4.0-rtic";
+				compatible = "fsl,sec-v4.0-rtic";
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0x6000 0x100>;
 				ranges = <0x0 0x6100 0xe00>;
 
 				rtic_a: rtic-a@0 {
-					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-						     "fsl,sec-v4.0-rtic-memory";
+					compatible = "fsl,sec-v4.0-rtic-memory";
 					reg = <0x00 0x20 0x100 0x80>;
 				};
 
 				rtic_b: rtic-b@20 {
-					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-						     "fsl,sec-v4.0-rtic-memory";
+					compatible = "fsl,sec-v4.0-rtic-memory";
 					reg = <0x20 0x20 0x200 0x80>;
 				};
 
 				rtic_c: rtic-c@40 {
-					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-						     "fsl,sec-v4.0-rtic-memory";
+					compatible = "fsl,sec-v4.0-rtic-memory";
 					reg = <0x40 0x20 0x300 0x80>;
 				};
 
 				rtic_d: rtic-d@60 {
-					compatible = "fsl,p4080-sec-v4.0-rtic-memory",
-						     "fsl,sec-v4.0-rtic-memory";
+					compatible = "fsl,sec-v4.0-rtic-memory";
 					reg = <0x60 0x20 0x500 0x80>;
 				};
 			};
 		};
 
 		sec_mon: sec_mon@314000 {
-			compatible = "fsl,p4080-sec-v4.0-mon",
-				     "fsl,sec-v4.0-mon";
+			compatible = "fsl,sec-v4.0-mon";
 			reg = <0x314000 0x1000>;
 			interrupt-parent = <&mpic>;
 			interrupts = <93 2>;
-- 
cgit v1.2.3


From 1d321881afb955bba1e0a8f50d3a04e111fcb581 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 28 Mar 2011 00:46:11 +0400
Subject: perf, x86: P4 PMU - clean up the code a bit

No change on the functional level, just align the table properly.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Lin Ming <ming.m.lin@intel.com>
LKML-Reference: <4D8FA213.5050108@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_p4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index c2520e178d32..8ff882fdb1c0 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -468,7 +468,7 @@ static struct p4_event_bind p4_event_bind_map[] = {
 		.opcode		= P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
 		.escr_msr	= { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
 		.escr_emask	=
-		P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
+			P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
 		.cntr		= { {12, 13, 16}, {14, 15, 17} },
 	},
 	[P4_EVENT_X87_ASSIST] = {
-- 
cgit v1.2.3


From 349c004e3d31fda23ad225b61861be38047fff16 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sat, 12 Mar 2011 12:50:10 +0100
Subject: x86: A fast way to check capabilities of the current cpu

Add this_cpu_has() which determines if the current cpu has a certain
ability using a segment prefix and a bit test operation.

For that we need to add bit operations to x86s percpu.h.

Many uses of cpu_has use a pointer passed to a function to determine
the current flags. That is no longer necessary after this patch.

However, this patch only converts the straightforward cases where
cpu_has is used with this_cpu_ptr. The rest is work for later.

-tj: Rolled up patch to add x86_ prefix and use percpu_read() instead
     of percpu_read_stable().

Signed-off-by: Christoph Lameter <cl@linux.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/cpufeature.h | 13 +++++++++----
 arch/x86/include/asm/percpu.h     | 27 +++++++++++++++++++++++++++
 arch/x86/kernel/apic/apic.c       |  2 +-
 arch/x86/kernel/process.c         |  4 ++--
 arch/x86/kernel/smpboot.c         |  4 ++--
 5 files changed, 41 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 91f3e087cf21..50c0d30e676d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -207,8 +207,7 @@ extern const char * const x86_power_flags[32];
 #define test_cpu_cap(c, bit)						\
 	 test_bit(bit, (unsigned long *)((c)->x86_capability))
 
-#define cpu_has(c, bit)							\
-	(__builtin_constant_p(bit) &&					\
+#define REQUIRED_MASK_BIT_SET(bit)					\
 	 ( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) ||	\
 	   (((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) ||	\
 	   (((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) ||	\
@@ -218,10 +217,16 @@ extern const char * const x86_power_flags[32];
 	   (((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) ||	\
 	   (((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) ||	\
 	   (((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) ||	\
-	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )	\
-	  ? 1 :								\
+	   (((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
+
+#define cpu_has(c, bit)							\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
 	 test_cpu_cap(c, bit))
 
+#define this_cpu_has(bit)						\
+	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : 	\
+	 x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
+
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d475b4398d8b..76042d981596 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -542,6 +542,33 @@ do {									\
 	old__;								\
 })
 
+static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+                        const unsigned long __percpu *addr)
+{
+	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+
+	return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
+}
+
+static inline int x86_this_cpu_variable_test_bit(int nr,
+                        const unsigned long __percpu *addr)
+{
+	int oldbit;
+
+	asm volatile("bt "__percpu_arg(2)",%1\n\t"
+			"sbb %0,%0"
+			: "=r" (oldbit)
+			: "m" (*(unsigned long *)addr), "Ir" (nr));
+
+	return oldbit;
+}
+
+#define x86_this_cpu_test_bit(nr, addr)			\
+	(__builtin_constant_p((nr))			\
+	 ? x86_this_cpu_constant_test_bit((nr), (addr))	\
+	 : x86_this_cpu_variable_test_bit((nr), (addr)))
+
+
 #include <asm-generic/percpu.h>
 
 /* We can use this directly for local CPU (faster). */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fabf01eff771..2bc503bf9e99 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
 {
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
-	if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
+	if (this_cpu_has(X86_FEATURE_ARAT)) {
 		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
 		/* Make LAPIC timer preferrable over percpu HPET */
 		lapic_clockevent.rating = 150;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d46cbe46b7ab..88a90a977f8e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
 	if (!need_resched()) {
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -465,7 +465,7 @@ static void mwait_idle(void)
 	if (!need_resched()) {
 		trace_power_start(POWER_CSTATE, 1, smp_processor_id());
 		trace_cpu_idle(1, smp_processor_id());
-		if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2871d3c71b6..a3c430bdfb60 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
 	void *mwait_ptr;
 	struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
 
-	if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)))
+	if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
 		return;
-	if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
+	if (!this_cpu_has(X86_FEATURE_CLFLSH))
 		return;
 	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
 		return;
-- 
cgit v1.2.3


From fe5042138b6fc60edde3b60025078884c2eb71ac Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sat, 12 Mar 2011 12:50:46 +0100
Subject: x86: Use this_cpu_has for thermal_interrupt current cpu

It is more effective to use a segment prefix instead of calculating the
address of the current cpu area amd then testing flags.

Signed-off-by: Christoph Lameter <cl@linux.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..6b0f4cde7a22 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -355,7 +355,6 @@ static void notify_thresholds(__u64 msr_val)
 static void intel_thermal_interrupt(void)
 {
 	__u64 msr_val;
-	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
 
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
@@ -367,19 +366,19 @@ static void intel_thermal_interrupt(void)
 				CORE_LEVEL) != 0)
 		mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
 
-	if (cpu_has(c, X86_FEATURE_PLN))
+	if (this_cpu_has(X86_FEATURE_PLN))
 		if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
 					CORE_LEVEL) != 0)
 			mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
 
-	if (cpu_has(c, X86_FEATURE_PTS)) {
+	if (this_cpu_has(X86_FEATURE_PTS)) {
 		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
 		if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
 					THERMAL_THROTTLING_EVENT,
 					PACKAGE_LEVEL) != 0)
 			mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
-		if (cpu_has(c, X86_FEATURE_PLN))
+		if (this_cpu_has(X86_FEATURE_PLN))
 			if (therm_throt_process(msr_val &
 					PACKAGE_THERM_STATUS_POWER_LIMIT,
 					POWER_LIMIT_EVENT,
-- 
cgit v1.2.3


From 62f0988ee5280ac03f787e3abd70bd91366e3778 Mon Sep 17 00:00:00 2001
From: David Brown <davidb@codeaurora.org>
Date: Tue, 29 Mar 2011 11:48:45 -0700
Subject: msm: Remove extraneous ffa device check

The qsd8x50 board file contains a few references to machine_is_...
macros that are otherwise unused, and contain no machine definition.
The recent purge of unused machine definitions breaks the compilation
of this target.

Since the machine cannot ever be used, just remove the bogus checks.

Signed-off-by: David Brown <davidb@codeaurora.org>
---
 arch/arm/mach-msm/board-qsd8x50.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c
index 7f568611547e..6a96911b0ad5 100644
--- a/arch/arm/mach-msm/board-qsd8x50.c
+++ b/arch/arm/mach-msm/board-qsd8x50.c
@@ -160,10 +160,7 @@ static struct msm_mmc_platform_data qsd8x50_sdc1_data = {
 
 static void __init qsd8x50_init_mmc(void)
 {
-	if (machine_is_qsd8x50_ffa() || machine_is_qsd8x50a_ffa())
-		vreg_mmc = vreg_get(NULL, "gp6");
-	else
-		vreg_mmc = vreg_get(NULL, "gp5");
+	vreg_mmc = vreg_get(NULL, "gp5");
 
 	if (IS_ERR(vreg_mmc)) {
 		pr_err("vreg get for vreg_mmc failed (%ld)\n",
-- 
cgit v1.2.3


From 893b66c39da812e7dd0d7b32aa0633e5d90d950c Mon Sep 17 00:00:00 2001
From: David Brown <davidb@codeaurora.org>
Date: Wed, 30 Mar 2011 11:26:57 -0700
Subject: msm: timer: fix missing return value

Change af90f10d38 "ARM: 6759/1: smp: Select local timers vs broadcast
timer support runtime" missed a return statement, causing a compile
warning:

  arch/arm/mach-msm/timer.c:272: warning: 'return' with no value, in
  function returning non-void

Trivially return 0 for success when running on cpu 0 (to match the
comment and previous behavior).

Signed-off-by: David Brown <davidb@codeaurora.org>
---
 arch/arm/mach-msm/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 56f920c55b6a..38b95e949d13 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -269,7 +269,7 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
 
 	/* Use existing clock_event for cpu 0 */
 	if (!smp_processor_id())
-		return;
+		return 0;
 
 	writel(DGT_CLK_CTL_DIV_4, MSM_TMR_BASE + DGT_CLK_CTL);
 
-- 
cgit v1.2.3


From 052936080c8fb2f791103995b21bd4018c8df886 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 1 Apr 2011 11:15:12 +0200
Subject: x86-64, NUMA: Remove custom phys_to_nid() implementation

phys_to_nid() maps physical address to NUMA node id.  This is
implemented by building perfect hash in compute_hash_shift() during
initialization.

However, with SPARSE memory model, the nid is encoded in page flags.
The perfect hash implementation was for DISCONTIG memory model which
got removed years ago by b263295dbf (x86: 64-bit, make sparsemem
vmemmap the only memory model).

So, the perfect hash ends up being used only during initialization
when the core SPARSE code already provides perfectly acceptable
generic early_pfn_to_nid() implementation.

Drop phys_to_nid() and use the generic ealry_pfn_to_nid() instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig                 |   4 --
 arch/x86/include/asm/mmzone_64.h |  23 --------
 arch/x86/mm/numa_64.c            | 123 +--------------------------------------
 3 files changed, 1 insertion(+), 149 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..b034814bf975 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1703,10 +1703,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	def_bool X86_64
-	depends on NUMA
-
 config USE_PERCPU_NUMA_NODE_ID
 	def_bool y
 	depends on NUMA
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index 288b96f815a6..b3f88d7867c7 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -4,36 +4,13 @@
 #ifndef _ASM_X86_MMZONE_64_H
 #define _ASM_X86_MMZONE_64_H
 
-
 #ifdef CONFIG_NUMA
 
 #include <linux/mmdebug.h>
-
 #include <asm/smp.h>
 
-/* Simple perfect hash to map physical addresses to node numbers */
-struct memnode {
-	int shift;
-	unsigned int mapsize;
-	s16 *map;
-	s16 embedded_map[64 - 8];
-} ____cacheline_aligned; /* total size = 128 bytes */
-extern struct memnode memnode;
-#define memnode_shift memnode.shift
-#define memnodemap memnode.map
-#define memnodemapsize memnode.mapsize
-
 extern struct pglist_data *node_data[];
 
-static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
-{
-	unsigned nid;
-	VIRTUAL_BUG_ON(!memnodemap);
-	nid = memnodemap[addr >> memnode_shift];
-	VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
-	return nid;
-}
-
 #define NODE_DATA(nid)		(node_data[nid])
 
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc72033..3951ee6eade7 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -28,125 +28,10 @@ EXPORT_SYMBOL(node_data);
 
 nodemask_t numa_nodes_parsed __initdata;
 
-struct memnode memnode;
-
-static unsigned long __initdata nodemap_addr;
-static unsigned long __initdata nodemap_size;
-
 static struct numa_meminfo numa_meminfo __initdata;
-
 static int numa_distance_cnt;
 static u8 *numa_distance;
 
-/*
- * Given a shift value, try to populate memnodemap[]
- * Returns :
- * 1 if OK
- * 0 if memnodmap[] too small (of shift too small)
- * -1 if node overlap or lost ram (shift too big)
- */
-static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
-{
-	unsigned long addr, end;
-	int i, res = -1;
-
-	memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
-	for (i = 0; i < mi->nr_blks; i++) {
-		addr = mi->blk[i].start;
-		end = mi->blk[i].end;
-		if (addr >= end)
-			continue;
-		if ((end >> shift) >= memnodemapsize)
-			return 0;
-		do {
-			if (memnodemap[addr >> shift] != NUMA_NO_NODE)
-				return -1;
-			memnodemap[addr >> shift] = mi->blk[i].nid;
-			addr += (1UL << shift);
-		} while (addr < end);
-		res = 1;
-	}
-	return res;
-}
-
-static int __init allocate_cachealigned_memnodemap(void)
-{
-	unsigned long addr;
-
-	memnodemap = memnode.embedded_map;
-	if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
-		return 0;
-
-	addr = 0x8000;
-	nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
-	nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
-				      nodemap_size, L1_CACHE_BYTES);
-	if (nodemap_addr == MEMBLOCK_ERROR) {
-		printk(KERN_ERR
-		       "NUMA: Unable to allocate Memory to Node hash map\n");
-		nodemap_addr = nodemap_size = 0;
-		return -1;
-	}
-	memnodemap = phys_to_virt(nodemap_addr);
-	memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
-
-	printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
-	       nodemap_addr, nodemap_addr + nodemap_size);
-	return 0;
-}
-
-/*
- * The LSB of all start and end addresses in the node map is the value of the
- * maximum possible shift.
- */
-static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
-{
-	int i, nodes_used = 0;
-	unsigned long start, end;
-	unsigned long bitfield = 0, memtop = 0;
-
-	for (i = 0; i < mi->nr_blks; i++) {
-		start = mi->blk[i].start;
-		end = mi->blk[i].end;
-		if (start >= end)
-			continue;
-		bitfield |= start;
-		nodes_used++;
-		if (end > memtop)
-			memtop = end;
-	}
-	if (nodes_used <= 1)
-		i = 63;
-	else
-		i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
-	memnodemapsize = (memtop >> i)+1;
-	return i;
-}
-
-static int __init compute_hash_shift(const struct numa_meminfo *mi)
-{
-	int shift;
-
-	shift = extract_lsb_from_nodes(mi);
-	if (allocate_cachealigned_memnodemap())
-		return -1;
-	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
-		shift);
-
-	if (populate_memnodemap(mi, shift) != 1) {
-		printk(KERN_INFO "Your memory is not aligned you need to "
-		       "rebuild your kernel with a bigger NODEMAPSIZE "
-		       "shift=%d\n", shift);
-		return -1;
-	}
-	return shift;
-}
-
-int __meminit  __early_pfn_to_nid(unsigned long pfn)
-{
-	return phys_to_nid(pfn << PAGE_SHIFT);
-}
-
 static void * __init early_node_mem(int nodeid, unsigned long start,
 				    unsigned long end, unsigned long size,
 				    unsigned long align)
@@ -270,7 +155,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 	memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
 	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
 		nodedata_phys + pgdat_size - 1);
-	nid = phys_to_nid(nodedata_phys);
+	nid = early_pfn_to_nid(nodedata_phys >> PAGE_SHIFT);
 	if (nid != nodeid)
 		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
 
@@ -527,12 +412,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 	if (WARN_ON(nodes_empty(node_possible_map)))
 		return -EINVAL;
 
-	memnode_shift = compute_hash_shift(mi);
-	if (memnode_shift < 0) {
-		printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
-		return -EINVAL;
-	}
-
 	for (i = 0; i < mi->nr_blks; i++)
 		memblock_x86_register_active_regions(mi->blk[i].nid,
 					mi->blk[i].start >> PAGE_SHIFT,
-- 
cgit v1.2.3


From 3b16651f806d35b5c404f2525fbce76afa3c9297 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 1 Apr 2011 11:15:12 +0200
Subject: x86: Clean up memory model related configs in arch/x86/Kconfig

* Remove bogus dependency on ARCH_SELECT_MEMORY_MODEL from
  ARCH_FLATMEM_ENABLE.  ENABLE configs don't interfere with
  SELECT_MEMORY_MODEL.  They just need to indicate whether the
  specific memory model is supported.

* Relocate HAVE_ARCH_ALLOC_REMAP, ARCH_PROC_KCORE_TEXT and
  ARCH_SPARSEMEM_DEFAULT so that memory model related configs are
  together in consistent order.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b034814bf975..8db4fbf30b59 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1223,6 +1223,10 @@ config HAVE_ARCH_BOOTMEM
 	def_bool y
 	depends on X86_32 && NUMA
 
+config HAVE_ARCH_ALLOC_REMAP
+	def_bool y
+	depends on X86_32 && NUMA
+
 config ARCH_HAVE_MEMORY_PRESENT
 	def_bool y
 	depends on X86_32 && DISCONTIGMEM
@@ -1231,13 +1235,9 @@ config NEED_NODE_MEMMAP_SIZE
 	def_bool y
 	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
 
-config HAVE_ARCH_ALLOC_REMAP
-	def_bool y
-	depends on X86_32 && NUMA
-
 config ARCH_FLATMEM_ENABLE
 	def_bool y
-	depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
+	depends on X86_32 && !NUMA
 
 config ARCH_DISCONTIGMEM_ENABLE
 	def_bool y
@@ -1247,20 +1247,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on NUMA && X86_32
 
-config ARCH_PROC_KCORE_TEXT
-	def_bool y
-	depends on X86_64 && PROC_KCORE
-
-config ARCH_SPARSEMEM_DEFAULT
-	def_bool y
-	depends on X86_64
-
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
 	select SPARSEMEM_STATIC if X86_32
 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
 
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+	depends on X86_64
+
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
@@ -1269,6 +1265,10 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+	depends on X86_64 && PROC_KCORE
+
 config ILLEGAL_POINTER_VALUE
        hex
        default 0 if X86_32
-- 
cgit v1.2.3


From 711b8c87a5fe6de78e90411cb67b506babfef74a Mon Sep 17 00:00:00 2001
From: Florian Mickler <florian@mickler.org>
Date: Mon, 4 Apr 2011 01:17:40 +0200
Subject: x86-64, NUMA: Remove unused variable

In case !CONFIG_ACPI_NUMA and !CONFIG_AMD_NUMA gcc emits a warning
about the unused variable ret.

As that variable is in fact not needed I choose to remove it.

Signed-off-by: Florian Mickler <florian@mickler.org>
LKML-Reference: <1301843624-22364-1-git-send-email-florian@mickler.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/mm/numa_64.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 3951ee6eade7..13f5b068e8c2 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -505,17 +505,13 @@ static int __init numa_init(int (*init_func)(void))
 
 void __init initmem_init(void)
 {
-	int ret;
-
 	if (!numa_off) {
 #ifdef CONFIG_ACPI_NUMA
-		ret = numa_init(x86_acpi_numa_init);
-		if (!ret)
+		if (!numa_init(x86_acpi_numa_init))
 			return;
 #endif
 #ifdef CONFIG_AMD_NUMA
-		ret = numa_init(amd_numa_init);
-		if (!ret)
+		if (!numa_init(amd_numa_init))
 			return;
 #endif
 	}
-- 
cgit v1.2.3


From 64d21fc194e12bdf7347019bf10325a4b3d77e7b Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Sat, 2 Apr 2011 13:17:48 +0600
Subject: x86, mpparse: Put check_slot() into .init section

check_slot() is only called from replace_intsrc_all() - which is
in the .init section.

So, put check_slot into the .init section as well, so it can be freed
after system boot.

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
LKML-Reference: <AANLkTing52ntzRcHkODCWDKOfRF=0uhXw5-cCUhx6M54@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5a532ce646bf..f1b27181de04 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -715,7 +715,7 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
 	}
 }
 
-static int
+static int __init
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
 	int ret = 0;
-- 
cgit v1.2.3


From d430d3d7e646eb1eac2bb4aa244a644312e67c76 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Wed, 16 Mar 2011 17:29:47 -0400
Subject: jump label: Introduce static_branch() interface

Introduce:

static __always_inline bool static_branch(struct jump_label_key *key);

instead of the old JUMP_LABEL(key, label) macro.

In this way, jump labels become really easy to use:

Define:

        struct jump_label_key jump_key;

Can be used as:

        if (static_branch(&jump_key))
                do unlikely code

enable/disale via:

        jump_label_inc(&jump_key);
        jump_label_dec(&jump_key);

that's it!

For the jump labels disabled case, the static_branch() becomes an
atomic_read(), and jump_label_inc()/dec() are simply atomic_inc(),
atomic_dec() operations. We show testing results for this change below.

Thanks to H. Peter Anvin for suggesting the 'static_branch()' construct.

Since we now require a 'struct jump_label_key *key', we can store a pointer into
the jump table addresses. In this way, we can enable/disable jump labels, in
basically constant time. This change allows us to completely remove the previous
hashtable scheme. Thanks to Peter Zijlstra for this re-write.

Testing:

I ran a series of 'tbench 20' runs 5 times (with reboots) for 3
configurations, where tracepoints were disabled.

jump label configured in
avg: 815.6

jump label *not* configured in (using atomic reads)
avg: 800.1

jump label *not* configured in (regular reads)
avg: 803.4

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20110316212947.GA8792@redhat.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
Suggested-by: H. Peter Anvin <hpa@linux.intel.com>
Tested-by: David Daney <ddaney@caviumnetworks.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/mips/include/asm/jump_label.h  |  22 +-
 arch/sparc/include/asm/jump_label.h |  25 +-
 arch/x86/include/asm/alternative.h  |   3 +-
 arch/x86/include/asm/jump_label.h   |  26 +-
 arch/x86/kernel/alternative.c       |   2 +-
 arch/x86/kernel/module.c            |   1 +
 include/asm-generic/vmlinux.lds.h   |  14 +-
 include/linux/dynamic_debug.h       |   2 -
 include/linux/jump_label.h          |  89 +++---
 include/linux/jump_label_ref.h      |  44 ---
 include/linux/perf_event.h          |  26 +-
 include/linux/tracepoint.h          |  22 +-
 kernel/jump_label.c                 | 539 +++++++++++++++---------------------
 kernel/perf_event.c                 |   4 +-
 kernel/tracepoint.c                 |  23 +-
 15 files changed, 356 insertions(+), 486 deletions(-)
 delete mode 100644 include/linux/jump_label_ref.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 7622ccf75076..1881b316ca45 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,16 +20,18 @@
 #define WORD_INSN ".word"
 #endif
 
-#define JUMP_LABEL(key, label)						\
-	do {								\
-		asm goto("1:\tnop\n\t"					\
-			"nop\n\t"					\
-			".pushsection __jump_table,  \"a\"\n\t"		\
-			WORD_INSN " 1b, %l[" #label "], %0\n\t"		\
-			".popsection\n\t"				\
-			: :  "i" (key) :  : label);			\
-	} while (0)
-
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:\tnop\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index 427d4684e0d2..fc73a82366f8 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,17 +7,20 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:\n\t"				\
-			 "nop\n\t"				\
-			 "nop\n\t"				\
-			 ".pushsection __jump_table,  \"a\"\n\t"\
-			 ".align 4\n\t"				\
-			 ".word 1b, %l[" #label "], %c0\n\t"	\
-			 ".popsection \n\t"			\
-			 : :  "i" (key) :  : label);\
-	} while (0)
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+		asm goto("1:\n\t"
+			 "nop\n\t"
+			 "nop\n\t"
+			 ".pushsection __jump_table,  \"aw\"\n\t"
+			 ".align 4\n\t"
+			 ".word 1b, %l[l_yes], %c0\n\t"
+			 ".popsection \n\t"
+			 : :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..8cdd1e247975 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
-#include <linux/jump_label.h>
 #include <asm/asm.h>
 
 /*
@@ -191,7 +190,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
 extern void arch_init_ideal_nop5(void);
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 574dbc22893a..f217cee86533 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -5,20 +5,24 @@
 
 #include <linux/types.h>
 #include <asm/nops.h>
+#include <asm/asm.h>
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-
-# define JUMP_LABEL(key, label)					\
-	do {							\
-		asm goto("1:"					\
-			JUMP_LABEL_INITIAL_NOP			\
-			".pushsection __jump_table,  \"aw\" \n\t"\
-			_ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
-			".popsection \n\t"			\
-			: :  "i" (key) :  : label);		\
-	} while (0)
+#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("1:"
+		JUMP_LABEL_INITIAL_NOP
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		".popsection \n\t"
+		: :  "i" (key) : : l_yes);
+	return false;
+l_yes:
+	return true;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..651454b0c811 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -679,7 +679,7 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 }
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
+#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
 unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index ab23f1ad4bf1..52f256f2cc81 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/jump_label.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 32c45e5fe0ab..79522166d7f1 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -170,6 +170,10 @@
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
 	/* implement dynamic printk debug */				\
+	. = ALIGN(8);                                                   \
+	VMLINUX_SYMBOL(__start___jump_table) = .;                       \
+	*(__jump_table)                                                 \
+	VMLINUX_SYMBOL(__stop___jump_table) = .;                        \
 	. = ALIGN(8);							\
 	VMLINUX_SYMBOL(__start___verbose) = .;                          \
 	*(__verbose)                                                    \
@@ -228,8 +232,6 @@
 									\
 	BUG_TABLE							\
 									\
-	JUMP_TABLE							\
-									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
@@ -589,14 +591,6 @@
 #define BUG_TABLE
 #endif
 
-#define JUMP_TABLE							\
-	. = ALIGN(8);							\
-	__jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___jump_table) = .;		\
-		*(__jump_table)						\
-		VMLINUX_SYMBOL(__stop___jump_table) = .;		\
-	}
-
 #ifdef CONFIG_PM_TRACE
 #define TRACEDATA							\
 	. = ALIGN(4);							\
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 0c9653f11c18..e747ecd48e1c 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -1,8 +1,6 @@
 #ifndef _DYNAMIC_DEBUG_H
 #define _DYNAMIC_DEBUG_H
 
-#include <linux/jump_label.h>
-
 /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
  * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
  * use independent hash functions, to reduce the chance of false positives.
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 7880f18e4b86..83e745f3ead7 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,20 +1,43 @@
 #ifndef _LINUX_JUMP_LABEL_H
 #define _LINUX_JUMP_LABEL_H
 
+#include <linux/types.h>
+#include <linux/compiler.h>
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+
+struct jump_label_key {
+	atomic_t enabled;
+	struct jump_entry *entries;
+#ifdef CONFIG_MODULES
+	struct jump_label_mod *next;
+#endif
+};
+
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
 #endif
 
 enum jump_label_type {
+	JUMP_LABEL_DISABLE = 0,
 	JUMP_LABEL_ENABLE,
-	JUMP_LABEL_DISABLE
 };
 
 struct module;
 
 #ifdef HAVE_JUMP_LABEL
 
+#ifdef CONFIG_MODULES
+#define JUMP_LABEL_INIT {{ 0 }, NULL, NULL}
+#else
+#define JUMP_LABEL_INIT {{ 0 }, NULL}
+#endif
+
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	return arch_static_branch(key);
+}
+
 extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
@@ -23,37 +46,37 @@ extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
-extern void jump_label_update(unsigned long key, enum jump_label_type type);
-extern void jump_label_apply_nops(struct module *mod);
 extern int jump_label_text_reserved(void *start, void *end);
+extern void jump_label_inc(struct jump_label_key *key);
+extern void jump_label_dec(struct jump_label_key *key);
+extern bool jump_label_enabled(struct jump_label_key *key);
+extern void jump_label_apply_nops(struct module *mod);
 
-#define jump_label_enable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE);
+#else
 
-#define jump_label_disable(key) \
-	jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE);
+#include <asm/atomic.h>
 
-#else
+#define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
 
-#define JUMP_LABEL(key, label)			\
-do {						\
-	if (unlikely(*key))			\
-		goto label;			\
-} while (0)
+struct jump_label_key {
+	atomic_t enabled;
+};
 
-#define jump_label_enable(cond_var)	\
-do {					\
-       *(cond_var) = 1;			\
-} while (0)
+static __always_inline bool static_branch(struct jump_label_key *key)
+{
+	if (unlikely(atomic_read(&key->enabled)))
+		return true;
+	return false;
+}
 
-#define jump_label_disable(cond_var)	\
-do {					\
-       *(cond_var) = 0;			\
-} while (0)
+static inline void jump_label_inc(struct jump_label_key *key)
+{
+	atomic_inc(&key->enabled);
+}
 
-static inline int jump_label_apply_nops(struct module *mod)
+static inline void jump_label_dec(struct jump_label_key *key)
 {
-	return 0;
+	atomic_dec(&key->enabled);
 }
 
 static inline int jump_label_text_reserved(void *start, void *end)
@@ -64,16 +87,16 @@ static inline int jump_label_text_reserved(void *start, void *end)
 static inline void jump_label_lock(void) {}
 static inline void jump_label_unlock(void) {}
 
-#endif
+static inline bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
 
-#define COND_STMT(key, stmt)					\
-do {								\
-	__label__ jl_enabled;					\
-	JUMP_LABEL(key, jl_enabled);				\
-	if (0) {						\
-jl_enabled:							\
-		stmt;						\
-	}							\
-} while (0)
+static inline int jump_label_apply_nops(struct module *mod)
+{
+	return 0;
+}
+
+#endif
 
 #endif
diff --git a/include/linux/jump_label_ref.h b/include/linux/jump_label_ref.h
deleted file mode 100644
index e5d012ad92c6..000000000000
--- a/include/linux/jump_label_ref.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _LINUX_JUMP_LABEL_REF_H
-#define _LINUX_JUMP_LABEL_REF_H
-
-#include <linux/jump_label.h>
-#include <asm/atomic.h>
-
-#ifdef HAVE_JUMP_LABEL
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	if (atomic_add_return(1, key) == 1)
-		jump_label_enable(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	if (atomic_dec_and_test(key))
-		jump_label_disable(key);
-}
-
-#else /* !HAVE_JUMP_LABEL */
-
-static inline void jump_label_inc(atomic_t *key)
-{
-	atomic_inc(key);
-}
-
-static inline void jump_label_dec(atomic_t *key)
-{
-	atomic_dec(key);
-}
-
-#undef JUMP_LABEL
-#define JUMP_LABEL(key, label)						\
-do {									\
-	if (unlikely(__builtin_choose_expr(				\
-	      __builtin_types_compatible_p(typeof(key), atomic_t *),	\
-	      atomic_read((atomic_t *)(key)), *(key))))			\
-		goto label;						\
-} while (0)
-
-#endif /* HAVE_JUMP_LABEL */
-
-#endif /* _LINUX_JUMP_LABEL_REF_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 311b4dc785a1..730b7821690f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -505,7 +505,7 @@ struct perf_guest_info_callbacks {
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
-#include <linux/jump_label_ref.h>
+#include <linux/jump_label.h>
 #include <asm/atomic.h>
 #include <asm/local.h>
 
@@ -1034,7 +1034,7 @@ static inline int is_software_event(struct perf_event *event)
 	return event->pmu->task_ctx_nr == perf_sw_context;
 }
 
-extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
 
@@ -1063,22 +1063,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
 	struct pt_regs hot_regs;
 
-	JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
-	return;
-
-have_event:
-	if (!regs) {
-		perf_fetch_caller_regs(&hot_regs);
-		regs = &hot_regs;
+	if (static_branch(&perf_swevent_enabled[event_id])) {
+		if (!regs) {
+			perf_fetch_caller_regs(&hot_regs);
+			regs = &hot_regs;
+		}
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
 	}
-	__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
-extern atomic_t perf_sched_events;
+extern struct jump_label_key perf_sched_events;
 
 static inline void perf_event_task_sched_in(struct task_struct *task)
 {
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_in(task);
 }
 
 static inline
@@ -1086,7 +1085,8 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex
 {
 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
 
-	COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
+	if (static_branch(&perf_sched_events))
+		__perf_event_task_sched_out(task, next);
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 97c84a58efb8..d530a4460a0b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -29,7 +29,7 @@ struct tracepoint_func {
 
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
-	int state;			/* State. */
+	struct jump_label_key key;
 	void (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
@@ -146,9 +146,7 @@ void tracepoint_update_probe_range(struct tracepoint * const *begin,
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
-		JUMP_LABEL(&__tracepoint_##name.state, do_trace);	\
-		return;							\
-do_trace:								\
+		if (static_branch(&__tracepoint_##name.key))		\
 			__DO_TRACE(&__tracepoint_##name,		\
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
@@ -176,14 +174,14 @@ do_trace:								\
  * structures, so we create an array of pointers that will be used for iteration
  * on the tracepoints.
  */
-#define DEFINE_TRACE_FN(name, reg, unreg)				\
-	static const char __tpstrtab_##name[]				\
-	__attribute__((section("__tracepoints_strings"))) = #name;	\
-	struct tracepoint __tracepoint_##name				\
-	__attribute__((section("__tracepoints"))) =			\
-		{ __tpstrtab_##name, 0, reg, unreg, NULL };		\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	\
-	__attribute__((section("__tracepoints_ptrs"))) =		\
+#define DEFINE_TRACE_FN(name, reg, unreg)				 \
+	static const char __tpstrtab_##name[]				 \
+	__attribute__((section("__tracepoints_strings"))) = #name;	 \
+	struct tracepoint __tracepoint_##name				 \
+	__attribute__((section("__tracepoints"))) =			 \
+		{ __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
 		&__tracepoint_##name;
 
 #define DEFINE_TRACE(name)						\
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 3b79bd938330..74d1c099fbd1 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,43 +2,23 @@
  * jump label support
  *
  * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com>
  *
  */
-#include <linux/jump_label.h>
 #include <linux/memory.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/list.h>
-#include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/err.h>
+#include <linux/jump_label.h>
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_HASH_BITS 6
-#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS)
-static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE];
-
 /* mutex to protect coming/going of the the jump_label table */
 static DEFINE_MUTEX(jump_label_mutex);
 
-struct jump_label_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	/* hang modules off here */
-	struct hlist_head modules;
-	unsigned long key;
-};
-
-struct jump_label_module_entry {
-	struct hlist_node hlist;
-	struct jump_entry *table;
-	int nr_entries;
-	struct module *mod;
-};
-
 void jump_label_lock(void)
 {
 	mutex_lock(&jump_label_mutex);
@@ -49,6 +29,11 @@ void jump_label_unlock(void)
 	mutex_unlock(&jump_label_mutex);
 }
 
+bool jump_label_enabled(struct jump_label_key *key)
+{
+	return !!atomic_read(&key->enabled);
+}
+
 static int jump_label_cmp(const void *a, const void *b)
 {
 	const struct jump_entry *jea = a;
@@ -64,7 +49,7 @@ static int jump_label_cmp(const void *a, const void *b)
 }
 
 static void
-sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
+jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 {
 	unsigned long size;
 
@@ -73,118 +58,25 @@ sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop)
 	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static struct jump_label_entry *get_jump_label_entry(jump_label_t key)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct jump_label_entry *e;
-	u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (key == e->key)
-			return e;
-	}
-	return NULL;
-}
+static void jump_label_update(struct jump_label_key *key, int enable);
 
-static struct jump_label_entry *
-add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table)
+void jump_label_inc(struct jump_label_key *key)
 {
-	struct hlist_head *head;
-	struct jump_label_entry *e;
-	u32 hash;
-
-	e = get_jump_label_entry(key);
-	if (e)
-		return ERR_PTR(-EEXIST);
-
-	e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-
-	hash = jhash((void *)&key, sizeof(jump_label_t), 0);
-	head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)];
-	e->key = key;
-	e->table = table;
-	e->nr_entries = nr_entries;
-	INIT_HLIST_HEAD(&(e->modules));
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
+	if (atomic_inc_not_zero(&key->enabled))
+		return;
 
-static int
-build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop)
-{
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	int count;
-
-	sort_jump_label_entries(start, stop);
-	iter = start;
-	while (iter < stop) {
-		entry = get_jump_label_entry(iter->key);
-		if (!entry) {
-			iter_begin = iter;
-			count = 0;
-			while ((iter < stop) &&
-				(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-			}
-			entry = add_jump_label_entry(iter_begin->key,
-							count, iter_begin);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
-		 } else {
-			WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n");
-			return -1;
-		}
-	}
-	return 0;
+	jump_label_lock();
+	if (atomic_add_return(1, &key->enabled) == 1)
+		jump_label_update(key, JUMP_LABEL_ENABLE);
+	jump_label_unlock();
 }
 
-/***
- * jump_label_update - update jump label text
- * @key -  key value associated with a a jump label
- * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE
- *
- * Will enable/disable the jump for jump label @key, depending on the
- * value of @type.
- *
- */
-
-void jump_label_update(unsigned long key, enum jump_label_type type)
+void jump_label_dec(struct jump_label_key *key)
 {
-	struct jump_entry *iter;
-	struct jump_label_entry *entry;
-	struct hlist_node *module_node;
-	struct jump_label_module_entry *e_module;
-	int count;
+	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex))
+		return;
 
-	jump_label_lock();
-	entry = get_jump_label_entry((jump_label_t)key);
-	if (entry) {
-		count = entry->nr_entries;
-		iter = entry->table;
-		while (count--) {
-			if (kernel_text_address(iter->code))
-				arch_jump_label_transform(iter, type);
-			iter++;
-		}
-		/* eanble/disable jump labels in modules */
-		hlist_for_each_entry(e_module, module_node, &(entry->modules),
-							hlist) {
-			count = e_module->nr_entries;
-			iter = e_module->table;
-			while (count--) {
-				if (iter->key &&
-						kernel_text_address(iter->code))
-					arch_jump_label_transform(iter, type);
-				iter++;
-			}
-		}
-	}
+	jump_label_update(key, JUMP_LABEL_DISABLE);
 	jump_label_unlock();
 }
 
@@ -197,77 +89,33 @@ static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 	return 0;
 }
 
-#ifdef CONFIG_MODULES
-
-static int module_conflict(void *start, void *end)
+static int __jump_label_text_reserved(struct jump_entry *iter_start,
+		struct jump_entry *iter_stop, void *start, void *end)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
 	struct jump_entry *iter;
-	int i, count;
-	int conflict = 0;
-
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-							module_node_next,
-							&(e->modules), hlist) {
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (addr_conflict(iter, start, end)) {
-						conflict = 1;
-						goto out;
-					}
-					iter++;
-				}
-			}
-		}
-	}
-out:
-	return conflict;
-}
-
-#endif
-
-/***
- * jump_label_text_reserved - check if addr range is reserved
- * @start: start text addr
- * @end: end text addr
- *
- * checks if the text addr located between @start and @end
- * overlaps with any of the jump label patch addresses. Code
- * that wants to modify kernel text should first verify that
- * it does not overlap with any of the jump label addresses.
- * Caller must hold jump_label_mutex.
- *
- * returns 1 if there is an overlap, 0 otherwise
- */
-int jump_label_text_reserved(void *start, void *end)
-{
-	struct jump_entry *iter;
-	struct jump_entry *iter_start = __start___jump_table;
-	struct jump_entry *iter_stop = __start___jump_table;
-	int conflict = 0;
 
 	iter = iter_start;
 	while (iter < iter_stop) {
-		if (addr_conflict(iter, start, end)) {
-			conflict = 1;
-			goto out;
-		}
+		if (addr_conflict(iter, start, end))
+			return 1;
 		iter++;
 	}
 
-	/* now check modules */
-#ifdef CONFIG_MODULES
-	conflict = module_conflict(start, end);
-#endif
-out:
-	return conflict;
+	return 0;
+}
+
+static void __jump_label_update(struct jump_label_key *key,
+		struct jump_entry *entry, int enable)
+{
+	for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
+		/*
+		 * entry->code set to 0 invalidates module init text sections
+		 * kernel_text_address() verifies we are not in core kernel
+		 * init code, see jump_label_invalidate_module_init().
+		 */
+		if (entry->code && kernel_text_address(entry->code))
+			arch_jump_label_transform(entry, enable);
+	}
 }
 
 /*
@@ -277,142 +125,173 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
-static __init int init_jump_label(void)
+static __init int jump_label_init(void)
 {
-	int ret;
 	struct jump_entry *iter_start = __start___jump_table;
 	struct jump_entry *iter_stop = __stop___jump_table;
+	struct jump_label_key *key = NULL;
 	struct jump_entry *iter;
 
 	jump_label_lock();
-	ret = build_jump_label_hashtable(__start___jump_table,
-					 __stop___jump_table);
-	iter = iter_start;
-	while (iter < iter_stop) {
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
 		arch_jump_label_text_poke_early(iter->code);
-		iter++;
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+		atomic_set(&key->enabled, 0);
+		key->entries = iter;
+#ifdef CONFIG_MODULES
+		key->next = NULL;
+#endif
 	}
 	jump_label_unlock();
-	return ret;
+
+	return 0;
 }
-early_initcall(init_jump_label);
+early_initcall(jump_label_init);
 
 #ifdef CONFIG_MODULES
 
-static struct jump_label_module_entry *
-add_jump_label_module_entry(struct jump_label_entry *entry,
-			    struct jump_entry *iter_begin,
-			    int count, struct module *mod)
+struct jump_label_mod {
+	struct jump_label_mod *next;
+	struct jump_entry *entries;
+	struct module *mod;
+};
+
+static int __jump_label_mod_text_reserved(void *start, void *end)
+{
+	struct module *mod;
+
+	mod = __module_text_address((unsigned long)start);
+	if (!mod)
+		return 0;
+
+	WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
+
+	return __jump_label_text_reserved(mod->jump_entries,
+				mod->jump_entries + mod->num_jump_entries,
+				start, end);
+}
+
+static void __jump_label_mod_update(struct jump_label_key *key, int enable)
+{
+	struct jump_label_mod *mod = key->next;
+
+	while (mod) {
+		__jump_label_update(key, mod->entries, enable);
+		mod = mod->next;
+	}
+}
+
+/***
+ * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
+ * @mod: module to patch
+ *
+ * Allow for run-time selection of the optimal nops. Before the module
+ * loads patch these with arch_get_jump_label_nop(), which is specified by
+ * the arch specific jump label code.
+ */
+void jump_label_apply_nops(struct module *mod)
 {
-	struct jump_label_module_entry *e;
-
-	e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	e->mod = mod;
-	e->nr_entries = count;
-	e->table = iter_begin;
-	hlist_add_head(&e->hlist, &entry->modules);
-	return e;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+
+	/* if the module doesn't have jump label entries, just return */
+	if (iter_start == iter_stop)
+		return;
+
+	for (iter = iter_start; iter < iter_stop; iter++)
+		arch_jump_label_text_poke_early(iter->code);
 }
 
-static int add_jump_label_module(struct module *mod)
+static int jump_label_add_module(struct module *mod)
 {
-	struct jump_entry *iter, *iter_begin;
-	struct jump_label_entry *entry;
-	struct jump_label_module_entry *module_entry;
-	int count;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm;
 
 	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
+	if (iter_start == iter_stop)
 		return 0;
 
-	sort_jump_label_entries(mod->jump_entries,
-				mod->jump_entries + mod->num_jump_entries);
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		entry = get_jump_label_entry(iter->key);
-		iter_begin = iter;
-		count = 0;
-		while ((iter < mod->jump_entries + mod->num_jump_entries) &&
-			(iter->key == iter_begin->key)) {
-				iter++;
-				count++;
-		}
-		if (!entry) {
-			entry = add_jump_label_entry(iter_begin->key, 0, NULL);
-			if (IS_ERR(entry))
-				return PTR_ERR(entry);
+	jump_label_sort_entries(iter_start, iter_stop);
+
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod) {
+			atomic_set(&key->enabled, 0);
+			key->entries = iter;
+			key->next = NULL;
+			continue;
 		}
-		module_entry = add_jump_label_module_entry(entry, iter_begin,
-							   count, mod);
-		if (IS_ERR(module_entry))
-			return PTR_ERR(module_entry);
+
+		jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL);
+		if (!jlm)
+			return -ENOMEM;
+
+		jlm->mod = mod;
+		jlm->entries = iter;
+		jlm->next = key->next;
+		key->next = jlm;
+
+		if (jump_label_enabled(key))
+			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
 	}
+
 	return 0;
 }
 
-static void remove_jump_label_module(struct module *mod)
+static void jump_label_del_module(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
-	int i;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
+	struct jump_entry *iter;
+	struct jump_label_key *key = NULL;
+	struct jump_label_mod *jlm, **prev;
 
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (iter->key == (jump_label_t)(unsigned long)key)
+			continue;
+
+		key = (struct jump_label_key *)(unsigned long)iter->key;
+
+		if (__module_address(iter->key) == mod)
+			continue;
+
+		prev = &key->next;
+		jlm = key->next;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod == mod) {
-					hlist_del(&e_module->hlist);
-					kfree(e_module);
-				}
-			}
-			if (hlist_empty(&e->modules) && (e->nr_entries == 0)) {
-				hlist_del(&e->hlist);
-				kfree(e);
-			}
+		while (jlm && jlm->mod != mod) {
+			prev = &jlm->next;
+			jlm = jlm->next;
+		}
+
+		if (jlm) {
+			*prev = jlm->next;
+			kfree(jlm);
 		}
 	}
 }
 
-static void remove_jump_label_module_init(struct module *mod)
+static void jump_label_invalidate_module_init(struct module *mod)
 {
-	struct hlist_head *head;
-	struct hlist_node *node, *node_next, *module_node, *module_node_next;
-	struct jump_label_entry *e;
-	struct jump_label_module_entry *e_module;
+	struct jump_entry *iter_start = mod->jump_entries;
+	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
 	struct jump_entry *iter;
-	int i, count;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
 
-	for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
-		head = &jump_label_table[i];
-		hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
-			hlist_for_each_entry_safe(e_module, module_node,
-						  module_node_next,
-						  &(e->modules), hlist) {
-				if (e_module->mod != mod)
-					continue;
-				count = e_module->nr_entries;
-				iter = e_module->table;
-				while (count--) {
-					if (within_module_init(iter->code, mod))
-						iter->key = 0;
-					iter++;
-				}
-			}
-		}
+	for (iter = iter_start; iter < iter_stop; iter++) {
+		if (within_module_init(iter->code, mod))
+			iter->code = 0;
 	}
 }
 
@@ -426,59 +305,77 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	switch (val) {
 	case MODULE_STATE_COMING:
 		jump_label_lock();
-		ret = add_jump_label_module(mod);
+		ret = jump_label_add_module(mod);
 		if (ret)
-			remove_jump_label_module(mod);
+			jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_GOING:
 		jump_label_lock();
-		remove_jump_label_module(mod);
+		jump_label_del_module(mod);
 		jump_label_unlock();
 		break;
 	case MODULE_STATE_LIVE:
 		jump_label_lock();
-		remove_jump_label_module_init(mod);
+		jump_label_invalidate_module_init(mod);
 		jump_label_unlock();
 		break;
 	}
-	return ret;
-}
 
-/***
- * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop()
- * @mod: module to patch
- *
- * Allow for run-time selection of the optimal nops. Before the module
- * loads patch these with arch_get_jump_label_nop(), which is specified by
- * the arch specific jump label code.
- */
-void jump_label_apply_nops(struct module *mod)
-{
-	struct jump_entry *iter;
-
-	/* if the module doesn't have jump label entries, just return */
-	if (!mod->num_jump_entries)
-		return;
-
-	iter = mod->jump_entries;
-	while (iter < mod->jump_entries + mod->num_jump_entries) {
-		arch_jump_label_text_poke_early(iter->code);
-		iter++;
-	}
+	return notifier_from_errno(ret);
 }
 
 struct notifier_block jump_label_module_nb = {
 	.notifier_call = jump_label_module_notify,
-	.priority = 0,
+	.priority = 1, /* higher than tracepoints */
 };
 
-static __init int init_jump_label_module(void)
+static __init int jump_label_init_module(void)
 {
 	return register_module_notifier(&jump_label_module_nb);
 }
-early_initcall(init_jump_label_module);
+early_initcall(jump_label_init_module);
 
 #endif /* CONFIG_MODULES */
 
+/***
+ * jump_label_text_reserved - check if addr range is reserved
+ * @start: start text addr
+ * @end: end text addr
+ *
+ * checks if the text addr located between @start and @end
+ * overlaps with any of the jump label patch addresses. Code
+ * that wants to modify kernel text should first verify that
+ * it does not overlap with any of the jump label addresses.
+ * Caller must hold jump_label_mutex.
+ *
+ * returns 1 if there is an overlap, 0 otherwise
+ */
+int jump_label_text_reserved(void *start, void *end)
+{
+	int ret = __jump_label_text_reserved(__start___jump_table,
+			__stop___jump_table, start, end);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_MODULES
+	ret = __jump_label_mod_text_reserved(start, end);
+#endif
+	return ret;
+}
+
+static void jump_label_update(struct jump_label_key *key, int enable)
+{
+	struct jump_entry *entry = key->entries;
+
+	/* if there are no users, entry can be NULL */
+	if (entry)
+		__jump_label_update(key, entry, enable);
+
+#ifdef CONFIG_MODULES
+	__jump_label_mod_update(key, enable);
+#endif
+}
+
 #endif
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index c75925c4d1e2..d665e92fbd44 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -125,7 +125,7 @@ enum event_type_t {
  * perf_sched_events : >0 events exist
  * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
  */
-atomic_t perf_sched_events __read_mostly;
+struct jump_label_key perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -5417,7 +5417,7 @@ fail:
 	return err;
 }
 
-atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 static void sw_perf_event_destroy(struct perf_event *event)
 {
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 68187af4889e..b219f1449c54 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -251,9 +251,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 {
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 
-	if (elem->regfunc && !elem->state && active)
+	if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
 		elem->regfunc();
-	else if (elem->unregfunc && elem->state && !active)
+	else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
 		elem->unregfunc();
 
 	/*
@@ -264,13 +264,10 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 	 * is used.
 	 */
 	rcu_assign_pointer(elem->funcs, (*entry)->funcs);
-	if (!elem->state && active) {
-		jump_label_enable(&elem->state);
-		elem->state = active;
-	} else if (elem->state && !active) {
-		jump_label_disable(&elem->state);
-		elem->state = active;
-	}
+	if (active && !jump_label_enabled(&elem->key))
+		jump_label_inc(&elem->key);
+	else if (!active && jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 }
 
 /*
@@ -281,13 +278,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
  */
 static void disable_tracepoint(struct tracepoint *elem)
 {
-	if (elem->unregfunc && elem->state)
+	if (elem->unregfunc && jump_label_enabled(&elem->key))
 		elem->unregfunc();
 
-	if (elem->state) {
-		jump_label_disable(&elem->state);
-		elem->state = 0;
-	}
+	if (jump_label_enabled(&elem->key))
+		jump_label_dec(&elem->key);
 	rcu_assign_pointer(elem->funcs, NULL);
 }
 
-- 
cgit v1.2.3


From ef64789413c73f32faa5e5f1bc393e5843b0aa51 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Wed, 16 Mar 2011 15:58:27 -0400
Subject: jump label: Add _ASM_ALIGN for x86 and x86_64

The linker should not be adding holes to word size aligned pointers, but
out of paranoia we are explicitly specifying that alignment. I have not
seen any holes in the jump label section in practice.

Signed-off-by: Jason Baron <jbaron@redhat.com>
LKML-Reference: <e119fbd060c9452c56063ea6148ba1070e7434cc.1300299760.git.jbaron@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/jump_label.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index f217cee86533..a32b18ce6ead 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -16,6 +16,7 @@ static __always_inline bool arch_static_branch(struct jump_label_key *key)
 	asm goto("1:"
 		JUMP_LABEL_INITIAL_NOP
 		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
 		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
 		".popsection \n\t"
 		: :  "i" (key) : : l_yes);
-- 
cgit v1.2.3


From 5373db886b791b2bc7811e2c115377916c409a5d Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Wed, 16 Mar 2011 15:58:30 -0400
Subject: jump label: Add s390 support

Implement the architecture backend for jump label support on s390.

For a shared kernel booted from a NSS silently disable jump labels
because the NSS is read-only. Therefore jump labels will be disabled
in a shared kernel and can't be activated.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
LKML-Reference: <6935d2c41ce111e1719176ed4bbd3dbe4de80855.1300299760.git.jbaron@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/s390/Kconfig                  |  1 +
 arch/s390/include/asm/jump_label.h | 37 ++++++++++++++++++++++++
 arch/s390/kernel/Makefile          |  2 +-
 arch/s390/kernel/jump_label.c      | 59 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 arch/s390/include/asm/jump_label.h
 create mode 100644 arch/s390/kernel/jump_label.c

(limited to 'arch')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 2508a6f31588..4a7f14079e03 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -88,6 +88,7 @@ config S390
 	select HAVE_KERNEL_XZ
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
+	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 000000000000..95a6cf2b5b67
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+
+#ifdef CONFIG_64BIT
+#define ASM_PTR ".quad"
+#define ASM_ALIGN ".balign 8"
+#else
+#define ASM_PTR ".long"
+#define ASM_ALIGN ".balign 4"
+#endif
+
+static __always_inline bool arch_static_branch(struct jump_label_key *key)
+{
+	asm goto("0:	brcl 0,0\n"
+		".pushsection __jump_table, \"aw\"\n"
+		ASM_ALIGN "\n"
+		ASM_PTR " 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (key) : : label);
+	return false;
+label:
+	return true;
+}
+
+typedef unsigned long jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 64230bc392fa..5ff15dacb571 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o setup.o \
 	    processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
 	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o \
-	    vdso.o vtime.o sysinfo.o nmi.o sclp.o
+	    vdso.o vtime.o sysinfo.o nmi.o sclp.o jump_label.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 000000000000..44cc06bedf77
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,59 @@
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+struct insn {
+	u16 opcode;
+	s32 offset;
+} __packed;
+
+struct insn_args {
+	unsigned long *target;
+	struct insn *insn;
+	ssize_t size;
+};
+
+static int __arch_jump_label_transform(void *data)
+{
+	struct insn_args *args = data;
+	int rc;
+
+	rc = probe_kernel_write(args->target, args->insn, args->size);
+	WARN_ON_ONCE(rc < 0);
+	return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	struct insn_args args;
+	struct insn insn;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		/* brcl 15,offset */
+		insn.opcode = 0xc0f4;
+		insn.offset = (entry->target - entry->code) >> 1;
+	} else {
+		/* brcl 0,0 */
+		insn.opcode = 0xc004;
+		insn.offset = 0;
+	}
+
+	args.target = (void *) entry->code;
+	args.insn = &insn;
+	args.size = JUMP_LABEL_NOP_SIZE;
+
+	stop_machine(__arch_jump_label_transform, &args, NULL);
+}
+
+#endif
-- 
cgit v1.2.3


From 660e34cebf0a11d54f2d5dd8838607452355f321 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Mon, 4 Apr 2011 13:55:05 -0400
Subject: x86: Reorder reboot method preferences

We have a never ending stream of 'reboot quirks' for new boxes
that will not reboot properly under Linux (they will hang on
reboot).

The reason is widespread 'Windows compatible' assumption of modern
x86 hardware, which expects the following reboot sequence:

 - hitting the ACPI reboot vector (if available)
 - trying the keyboard controller
 - hitting the ACPI reboot vector again
 - then giving the keyboard controller one last go

This sequence expectation gets more and more embedded in modern
hardware, which often lacks a keyboard controller and may even
lock up if the legacy io ports are hit - and which hardware is
often not tested with Linux during development.

The end result is that reboot works under Windows-alike OSs but not
under Linux.

Rework our reboot process to meet this hardware externality a little
better and match this assumption of newer x86 hardware.

In addition to the ACPI,kbd,ACPI,kbd sequence we'll still fall
through to attempting a legacy triple fault if nothing else
works - and keep trying that and the kbd reset.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
[ this commit will also save special casing Oaktrail boards ]
Acked-by: Alan Cox <alan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Leann Ogasawara <leann.ogasawara@canonical.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Len Brown <len.brown@intel.com>
LKML-Reference: <1301939705-2404-1-git-send-email-mjg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 08c44b08bf5b..0c016f727695 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 static const struct desc_ptr no_idt = {};
 static int reboot_mode;
-enum reboot_type reboot_type = BOOT_KBD;
+enum reboot_type reboot_type = BOOT_ACPI;
 int reboot_force;
 
 #if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
@@ -478,9 +478,24 @@ void __attribute__((weak)) mach_reboot_fixups(void)
 {
 }
 
+/*
+ * Windows compatible x86 hardware expects the following on reboot:
+ *
+ * 1) If the FADT has the ACPI reboot register flag set, try it
+ * 2) If still alive, write to the keyboard controller
+ * 3) If still alive, write to the ACPI reboot register again
+ * 4) If still alive, write to the keyboard controller again
+ *
+ * If the machine is still alive at this stage, it gives up. We default to
+ * following the same pattern, except that if we're still alive after (4) we'll
+ * try to force a triple fault and then cycle between hitting the keyboard
+ * controller and doing that
+ */
 static void native_machine_emergency_restart(void)
 {
 	int i;
+	int attempt = 0;
+	int orig_reboot_type = reboot_type;
 
 	if (reboot_emergency)
 		emergency_vmx_disable_all();
@@ -502,6 +517,13 @@ static void native_machine_emergency_restart(void)
 				outb(0xfe, 0x64); /* pulse reset low */
 				udelay(50);
 			}
+			if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
+				attempt = 1;
+				reboot_type = BOOT_ACPI;
+			} else {
+				reboot_type = BOOT_TRIPLE;
+			}
+			break;
 
 		case BOOT_TRIPLE:
 			load_idt(&no_idt);
-- 
cgit v1.2.3


From ded467374a34eb80020c2213456b1d9ca946b88c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 10:53:48 +0200
Subject: x86/amd-iommu: Move compl-wait command building to own function

This patch introduces a seperate function for building
completion-wait commands.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 57ca77787220..eebd504519c6 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -383,6 +383,13 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
+static void build_completion_wait(struct iommu_cmd *cmd)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = CMD_COMPL_WAIT_INT_MASK;
+	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
@@ -458,9 +465,7 @@ static int __iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
 
-	 memset(&cmd, 0, sizeof(cmd));
-	 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
-	 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
+	build_completion_wait(&cmd);
 
 	 return __iommu_queue_command(iommu, &cmd);
 }
-- 
cgit v1.2.3


From 94fe79e2f100bfcd8e7689cbf8838634779b80a2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 11:07:21 +0200
Subject: x86/amd-iommu: Move inv-dte command building to own function

This patch moves command building for the invalidate-dte
command into its own function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index eebd504519c6..4e5631a433aa 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -390,6 +390,13 @@ static void build_completion_wait(struct iommu_cmd *cmd)
 	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
 
+static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0] = devid;
+	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
@@ -533,10 +540,7 @@ static int iommu_flush_device(struct device *dev)
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	/* Build command */
-	memset(&cmd, 0, sizeof(cmd));
-	CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
-	cmd.data[0] = devid;
+	build_inv_dte(&cmd, devid);
 
 	return iommu_queue_command(iommu, &cmd);
 }
-- 
cgit v1.2.3


From 11b6402c6673b530fac9920c5640c75e99fee956 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 11:49:28 +0200
Subject: x86/amd-iommu: Cleanup inv_pages command handling

This patch reworks the processing of invalidate-pages
commands to the IOMMU. The function building the the command
is extended so we can get rid of another function. It was
also renamed to match with the other function names.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 83 ++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 4e5631a433aa..f8ec28ea3314 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -397,6 +397,37 @@ static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
 	CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
 }
 
+static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+				  size_t size, u16 domid, int pde)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[1] |= domid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+	if (s) /* size bit - we flush more than one 4kb page */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
@@ -545,37 +576,6 @@ static int iommu_flush_device(struct device *dev)
 	return iommu_queue_command(iommu, &cmd);
 }
 
-static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
-					  u16 domid, int pde, int s)
-{
-	memset(cmd, 0, sizeof(*cmd));
-	address &= PAGE_MASK;
-	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
-	cmd->data[1] |= domid;
-	cmd->data[2] = lower_32_bits(address);
-	cmd->data[3] = upper_32_bits(address);
-	if (s) /* size bit - we flush more than one 4kb page */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
-	if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
-		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
-}
-
-/*
- * Generic command send function for invalidaing TLB entries
- */
-static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
-		u64 address, u16 domid, int pde, int s)
-{
-	struct iommu_cmd cmd;
-	int ret;
-
-	__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
-
-	ret = iommu_queue_command(iommu, &cmd);
-
-	return ret;
-}
-
 /*
  * TLB invalidation function which is called from the mapping functions.
  * It invalidates a single PTE if the range to flush is within a single
@@ -584,20 +584,10 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 static void __iommu_flush_pages(struct protection_domain *domain,
 				u64 address, size_t size, int pde)
 {
-	int s = 0, i;
-	unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE);
-
-	address &= PAGE_MASK;
-
-	if (pages > 1) {
-		/*
-		 * If we have to flush more than one page, flush all
-		 * TLB entries for this domain
-		 */
-		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
-		s = 1;
-	}
+	struct iommu_cmd cmd;
+	int ret = 0, i;
 
+	build_inv_iommu_pages(&cmd, address, size, domain->id, pde);
 
 	for (i = 0; i < amd_iommus_present; ++i) {
 		if (!domain->dev_iommu[i])
@@ -607,11 +597,10 @@ static void __iommu_flush_pages(struct protection_domain *domain,
 		 * Devices of this domain are behind this IOMMU
 		 * We need a TLB flush
 		 */
-		iommu_queue_inv_iommu_pages(amd_iommus[i], address,
-					    domain->id, pde, s);
+		ret |= iommu_queue_command(amd_iommus[i], &cmd);
 	}
 
-	return;
+	WARN_ON(ret);
 }
 
 static void iommu_flush_pages(struct protection_domain *domain,
-- 
cgit v1.2.3


From 3fe14ab541cd9b0d1f243afb7556046f12c8743c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:47 +0200
Subject: x86-32, numa: Fix failure condition check in alloc_remap()

node_remap_{start|end}_vaddr[] describe [start, end) ranges; however,
alloc_remap() incorrectly failed when the current allocation + size
equaled the end but it should fail only when it goes over.  Fix it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-2-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index bde3906420df..84aac47c3887 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -200,7 +200,7 @@ void *alloc_remap(int nid, unsigned long size)
 
 	size = ALIGN(size, L1_CACHE_BYTES);
 
-	if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
+	if (!allocation || (allocation + size) > node_remap_end_vaddr[nid])
 		return NULL;
 
 	node_remap_alloc_vaddr[nid] += size;
-- 
cgit v1.2.3


From a6c24f7a705d939ddd2fcaa443fa3d8e852b933d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:48 +0200
Subject: x86-32, numa: Align pgdat size while initializing alloc_remap

When pgdat is reserved in init_remap_allocator(), PAGE_SIZE aligned
size will be used.  Match the size alignment in initialization to
avoid allocation failure down the road.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-3-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84aac47c3887..50e82507eab4 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -287,7 +287,8 @@ static __init unsigned long calculate_numa_remap_pages(void)
 			node_end_pfn[nid] = max_pfn;
 
 		/* ensure the remap includes space for the pgdat. */
-		size = node_remap_size[nid] + sizeof(pg_data_t);
+		size = node_remap_size[nid];
+		size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
 
 		/* convert size to large (pmd size) pages, rounding up */
 		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
-- 
cgit v1.2.3


From 5b8443b25c0f323ec190d094e4b441957b02664e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:49 +0200
Subject: x86-32, numa: Remove redundant top-down alloc code from remap
 initialization

memblock_find_in_range() now does top-down allocation by default, so
there's no reason for its callers to explicitly implement it by
gradually lowering the start address.

Remove redundant top-down allocation logic from init_meminit() and
calculate_numa_remap_pages().

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-4-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 43 ++++++++++++++-----------------------------
 1 file changed, 14 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 50e82507eab4..60701a5e0de0 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -270,8 +270,7 @@ static __init unsigned long calculate_numa_remap_pages(void)
 	unsigned long size, reserve_pages = 0;
 
 	for_each_online_node(nid) {
-		u64 node_kva_target;
-		u64 node_kva_final;
+		u64 node_kva;
 
 		/*
 		 * The acpi/srat node info can show hot-add memroy zones
@@ -295,19 +294,11 @@ static __init unsigned long calculate_numa_remap_pages(void)
 		/* now the roundup is correct, convert to PAGE_SIZE pages */
 		size = size * PTRS_PER_PTE;
 
-		node_kva_target = round_down(node_end_pfn[nid] - size,
-						 PTRS_PER_PTE);
-		node_kva_target <<= PAGE_SHIFT;
-		do {
-			node_kva_final = memblock_find_in_range(node_kva_target,
+		node_kva = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
 					((u64)node_end_pfn[nid])<<PAGE_SHIFT,
-						((u64)size)<<PAGE_SHIFT,
-						LARGE_PAGE_BYTES);
-			node_kva_target -= LARGE_PAGE_BYTES;
-		} while (node_kva_final == MEMBLOCK_ERROR &&
-			 (node_kva_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
-
-		if (node_kva_final == MEMBLOCK_ERROR)
+					((u64)size)<<PAGE_SHIFT,
+					LARGE_PAGE_BYTES);
+		if (node_kva == MEMBLOCK_ERROR)
 			panic("Can not get kva ram\n");
 
 		node_remap_size[nid] = size;
@@ -315,7 +306,7 @@ static __init unsigned long calculate_numa_remap_pages(void)
 		reserve_pages += size;
 		printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
 				  " node %d at %llx\n",
-				size, nid, node_kva_final>>PAGE_SHIFT);
+				size, nid, node_kva >> PAGE_SHIFT);
 
 		/*
 		 *  prevent kva address below max_low_pfn want it on system
@@ -328,11 +319,11 @@ static __init unsigned long calculate_numa_remap_pages(void)
 		 *  to use it as free.
 		 *  So memblock_x86_reserve_range here, hope we don't run out of that array
 		 */
-		memblock_x86_reserve_range(node_kva_final,
-			      node_kva_final+(((u64)size)<<PAGE_SHIFT),
-			      "KVA RAM");
+		memblock_x86_reserve_range(node_kva,
+					   node_kva + (((u64)size)<<PAGE_SHIFT),
+					   "KVA RAM");
 
-		node_remap_start_pfn[nid] = node_kva_final>>PAGE_SHIFT;
+		node_remap_start_pfn[nid] = node_kva >> PAGE_SHIFT;
 	}
 	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
 			reserve_pages);
@@ -356,7 +347,6 @@ static void init_remap_allocator(int nid)
 void __init initmem_init(void)
 {
 	int nid;
-	long kva_target_pfn;
 
 	/*
 	 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -371,15 +361,10 @@ void __init initmem_init(void)
 
 	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
-	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
-	do {
-		kva_start_pfn = memblock_find_in_range(kva_target_pfn<<PAGE_SHIFT,
-					max_low_pfn<<PAGE_SHIFT,
-					kva_pages<<PAGE_SHIFT,
-					PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
-		kva_target_pfn -= PTRS_PER_PTE;
-	} while (kva_start_pfn == MEMBLOCK_ERROR && kva_target_pfn > min_low_pfn);
-
+	kva_start_pfn = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
+				max_low_pfn << PAGE_SHIFT,
+				kva_pages << PAGE_SHIFT,
+				PTRS_PER_PTE << PAGE_SHIFT) >> PAGE_SHIFT;
 	if (kva_start_pfn == MEMBLOCK_ERROR)
 		panic("Can not get kva space\n");
 
-- 
cgit v1.2.3


From 5510db9c1be111528ce46c57f0bec1c9dce258f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:50 +0200
Subject: x86-32, numa: Reorganize calculate_numa_remap_page()

Separate the outer node walking loop and per-node logic from
calculate_numa_remap_pages().  The outer loop is collapsed into
initmem_init() and the per-node logic is moved into a new function -
init_alloc_remap().

The new function name is confusing with the existing
init_remap_allocator() and the behavior is the function isn't very
clean either at this point, but this is to prepare for further
cleanups and it will become prettier.

This function doesn't introduce any behavior change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-5-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 125 +++++++++++++++++++++++++-------------------------
 1 file changed, 62 insertions(+), 63 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 60701a5e0de0..5039e9b21d9e 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -264,70 +264,64 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 }
 #endif
 
-static __init unsigned long calculate_numa_remap_pages(void)
+static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 {
-	int nid;
-	unsigned long size, reserve_pages = 0;
+	unsigned long size;
+	u64 node_kva;
 
-	for_each_online_node(nid) {
-		u64 node_kva;
-
-		/*
-		 * The acpi/srat node info can show hot-add memroy zones
-		 * where memory could be added but not currently present.
-		 */
-		printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
-			nid, node_start_pfn[nid], node_end_pfn[nid]);
-		if (node_start_pfn[nid] > max_pfn)
-			continue;
-		if (!node_end_pfn[nid])
-			continue;
-		if (node_end_pfn[nid] > max_pfn)
-			node_end_pfn[nid] = max_pfn;
-
-		/* ensure the remap includes space for the pgdat. */
-		size = node_remap_size[nid];
-		size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
-
-		/* convert size to large (pmd size) pages, rounding up */
-		size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
-		/* now the roundup is correct, convert to PAGE_SIZE pages */
-		size = size * PTRS_PER_PTE;
-
-		node_kva = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
-					((u64)node_end_pfn[nid])<<PAGE_SHIFT,
-					((u64)size)<<PAGE_SHIFT,
-					LARGE_PAGE_BYTES);
-		if (node_kva == MEMBLOCK_ERROR)
-			panic("Can not get kva ram\n");
-
-		node_remap_size[nid] = size;
-		node_remap_offset[nid] = reserve_pages;
-		reserve_pages += size;
-		printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of"
-				  " node %d at %llx\n",
-				size, nid, node_kva >> PAGE_SHIFT);
-
-		/*
-		 *  prevent kva address below max_low_pfn want it on system
-		 *  with less memory later.
-		 *  layout will be: KVA address , KVA RAM
-		 *
-		 *  we are supposed to only record the one less then max_low_pfn
-		 *  but we could have some hole in high memory, and it will only
-		 *  check page_is_ram(pfn) && !page_is_reserved_early(pfn) to decide
-		 *  to use it as free.
-		 *  So memblock_x86_reserve_range here, hope we don't run out of that array
-		 */
-		memblock_x86_reserve_range(node_kva,
-					   node_kva + (((u64)size)<<PAGE_SHIFT),
-					   "KVA RAM");
-
-		node_remap_start_pfn[nid] = node_kva >> PAGE_SHIFT;
-	}
-	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
-			reserve_pages);
-	return reserve_pages;
+	/*
+	 * The acpi/srat node info can show hot-add memroy zones where
+	 * memory could be added but not currently present.
+	 */
+	printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
+	       nid, node_start_pfn[nid], node_end_pfn[nid]);
+	if (node_start_pfn[nid] > max_pfn)
+		return 0;
+	if (!node_end_pfn[nid])
+		return 0;
+	if (node_end_pfn[nid] > max_pfn)
+		node_end_pfn[nid] = max_pfn;
+
+	/* ensure the remap includes space for the pgdat. */
+	size = node_remap_size[nid];
+	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+
+	/* convert size to large (pmd size) pages, rounding up */
+	size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
+	/* now the roundup is correct, convert to PAGE_SIZE pages */
+	size = size * PTRS_PER_PTE;
+
+	node_kva = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
+					  (u64)node_end_pfn[nid] << PAGE_SHIFT,
+					  (u64)size << PAGE_SHIFT,
+					  LARGE_PAGE_BYTES);
+	if (node_kva == MEMBLOCK_ERROR)
+		panic("Can not get kva ram\n");
+
+	node_remap_size[nid] = size;
+	node_remap_offset[nid] = offset;
+	printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
+	       size, nid, node_kva >> PAGE_SHIFT);
+
+	/*
+	 *  prevent kva address below max_low_pfn want it on system
+	 *  with less memory later.
+	 *  layout will be: KVA address , KVA RAM
+	 *
+	 *  we are supposed to only record the one less then
+	 *  max_low_pfn but we could have some hole in high memory,
+	 *  and it will only check page_is_ram(pfn) &&
+	 *  !page_is_reserved_early(pfn) to decide to use it as free.
+	 *  So memblock_x86_reserve_range here, hope we don't run out
+	 *  of that array
+	 */
+	memblock_x86_reserve_range(node_kva,
+				   node_kva + ((u64)size << PAGE_SHIFT),
+				   "KVA RAM");
+
+	node_remap_start_pfn[nid] = node_kva >> PAGE_SHIFT;
+
+	return size;
 }
 
 static void init_remap_allocator(int nid)
@@ -346,6 +340,7 @@ static void init_remap_allocator(int nid)
 
 void __init initmem_init(void)
 {
+	unsigned long reserve_pages = 0;
 	int nid;
 
 	/*
@@ -359,7 +354,11 @@ void __init initmem_init(void)
 	get_memcfg_numa();
 	numa_init_array();
 
-	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
+	for_each_online_node(nid)
+		reserve_pages += init_alloc_remap(nid, reserve_pages);
+	kva_pages = roundup(reserve_pages, PTRS_PER_PTE);
+	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
+			reserve_pages);
 
 	kva_start_pfn = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
 				max_low_pfn << PAGE_SHIFT,
-- 
cgit v1.2.3


From c4d4f577d49c441ab4f1bb6068247dafb366e635 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:51 +0200
Subject: x86-32, numa: Rename @node_kva to @node_pa in init_alloc_remap()

init_alloc_remap() is about to do more and using _kva suffix for
physical address becomes confusing because the function will be
handling both physical and virtual addresses.  Rename @node_kva to
@node_pa.

This is trivial rename and doesn't cause any behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-6-git-send-email-tj@kernel.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 5039e9b21d9e..30933fec8f75 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -267,7 +267,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 {
 	unsigned long size;
-	u64 node_kva;
+	u64 node_pa;
 
 	/*
 	 * The acpi/srat node info can show hot-add memroy zones where
@@ -291,17 +291,17 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	/* now the roundup is correct, convert to PAGE_SIZE pages */
 	size = size * PTRS_PER_PTE;
 
-	node_kva = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
-					  (u64)node_end_pfn[nid] << PAGE_SHIFT,
-					  (u64)size << PAGE_SHIFT,
-					  LARGE_PAGE_BYTES);
-	if (node_kva == MEMBLOCK_ERROR)
+	node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
+					 (u64)node_end_pfn[nid] << PAGE_SHIFT,
+					 (u64)size << PAGE_SHIFT,
+					 LARGE_PAGE_BYTES);
+	if (node_pa == MEMBLOCK_ERROR)
 		panic("Can not get kva ram\n");
 
 	node_remap_size[nid] = size;
 	node_remap_offset[nid] = offset;
 	printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
-	       size, nid, node_kva >> PAGE_SHIFT);
+	       size, nid, node_pa >> PAGE_SHIFT);
 
 	/*
 	 *  prevent kva address below max_low_pfn want it on system
@@ -315,11 +315,10 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	 *  So memblock_x86_reserve_range here, hope we don't run out
 	 *  of that array
 	 */
-	memblock_x86_reserve_range(node_kva,
-				   node_kva + ((u64)size << PAGE_SHIFT),
+	memblock_x86_reserve_range(node_pa, node_pa + ((u64)size << PAGE_SHIFT),
 				   "KVA RAM");
 
-	node_remap_start_pfn[nid] = node_kva >> PAGE_SHIFT;
+	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
 
 	return size;
 }
-- 
cgit v1.2.3


From af7c1a6e8374e05aab4a98ce4d2fb07b66506a02 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:52 +0200
Subject: x86-32, numa: Make @size in init_aloc_remap() represent bytes

@size variable in init_alloc_remap() is confusing in that it starts as
number of bytes as its name implies and then becomes number of pages.
Make it consistently represent bytes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-7-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 30933fec8f75..99310d26fe34 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -286,22 +286,19 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	size = node_remap_size[nid];
 	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
 
-	/* convert size to large (pmd size) pages, rounding up */
-	size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
-	/* now the roundup is correct, convert to PAGE_SIZE pages */
-	size = size * PTRS_PER_PTE;
+	/* align to large page */
+	size = ALIGN(size, LARGE_PAGE_BYTES);
 
 	node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
 					 (u64)node_end_pfn[nid] << PAGE_SHIFT,
-					 (u64)size << PAGE_SHIFT,
-					 LARGE_PAGE_BYTES);
+					 size, LARGE_PAGE_BYTES);
 	if (node_pa == MEMBLOCK_ERROR)
 		panic("Can not get kva ram\n");
 
-	node_remap_size[nid] = size;
+	node_remap_size[nid] = size >> PAGE_SHIFT;
 	node_remap_offset[nid] = offset;
 	printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
-	       size, nid, node_pa >> PAGE_SHIFT);
+	       size >> PAGE_SHIFT, nid, node_pa >> PAGE_SHIFT);
 
 	/*
 	 *  prevent kva address below max_low_pfn want it on system
@@ -315,12 +312,11 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	 *  So memblock_x86_reserve_range here, hope we don't run out
 	 *  of that array
 	 */
-	memblock_x86_reserve_range(node_pa, node_pa + ((u64)size << PAGE_SHIFT),
-				   "KVA RAM");
+	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
 
 	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
 
-	return size;
+	return size >> PAGE_SHIFT;
 }
 
 static void init_remap_allocator(int nid)
-- 
cgit v1.2.3


From 7210cf9217937e470a9acbc113a590f476b9c047 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:53 +0200
Subject: x86-32, numa: Calculate remap size in common code

Only pgdat and memmap use remap area and there isn't much benefit in
allowing per-node override.  In addition, the use of node_remap_size[]
is confusing in that it contains number of bytes before remap
initialization and then number of pages afterwards.

Move remap size calculation for memap from specific NUMA config
implementations to init_alloc_remap() and make node_remap_size[]
static.

The only behavior difference is that, before this patch, numaq_32
didn't consider max_pfn when calculating the memmap size but it's
enforced after this patch, which is the right thing to do.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-8-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/topology.h |  1 -
 arch/x86/kernel/apic/numaq_32.c |  4 ----
 arch/x86/mm/numa_32.c           | 10 ++++------
 arch/x86/mm/srat_32.c           |  1 -
 4 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 910a7084f7f2..8dba76972fd7 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -95,7 +95,6 @@ extern void setup_node_to_cpumask_map(void);
 #ifdef CONFIG_X86_32
 extern unsigned long node_start_pfn[];
 extern unsigned long node_end_pfn[];
-extern unsigned long node_remap_size[];
 #define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
 
 # define SD_CACHE_NICE_TRIES	1
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 6273eee5134b..0aced70815f0 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -93,10 +93,6 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
 						node_end_pfn[node]);
 
 	memory_present(node, node_start_pfn[node], node_end_pfn[node]);
-
-	node_remap_size[node] = node_memmap_size_bytes(node,
-					node_start_pfn[node],
-					node_end_pfn[node]);
 }
 
 /*
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 99310d26fe34..9a7336550f0d 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -104,7 +104,7 @@ extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
 
-unsigned long node_remap_size[MAX_NUMNODES];
+static unsigned long node_remap_size[MAX_NUMNODES];
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
@@ -129,7 +129,6 @@ int __init get_memcfg_numa_flat(void)
 	node_end_pfn[0] = max_pfn;
 	memblock_x86_register_active_regions(0, 0, max_pfn);
 	memory_present(0, 0, max_pfn);
-	node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
 
         /* Indicate there is one node available. */
 	nodes_clear(node_online_map);
@@ -282,11 +281,10 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	if (node_end_pfn[nid] > max_pfn)
 		node_end_pfn[nid] = max_pfn;
 
-	/* ensure the remap includes space for the pgdat. */
-	size = node_remap_size[nid];
+	/* calculate the necessary space aligned to large page size */
+	size = node_memmap_size_bytes(nid, node_start_pfn[nid],
+				      min(node_end_pfn[nid], max_pfn));
 	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
-
-	/* align to large page */
 	size = ALIGN(size, LARGE_PAGE_BYTES);
 
 	node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 48651c6f657d..1b9e82c96dc5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -276,7 +276,6 @@ int __init get_memcfg_from_srat(void)
 		unsigned long end = min(node_end_pfn[nid], max_pfn);
 
 		memory_present(nid, start, end);
-		node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
 	}
 	return 1;
 out_fail:
-- 
cgit v1.2.3


From 82044c328d6f6b22882c2a936e487e6d2240817a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:54 +0200
Subject: x86-32, numa: Make init_alloc_remap() less panicky

Remap allocator failure isn't fatal.  The callers are required to fall
back to regular early memory allocation mechanisms on failure anyway,
so there's no reason to panic on remap init failure.  Whining and
returning are enough.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-9-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 9a7336550f0d..c127543372f5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -290,8 +290,11 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
 					 (u64)node_end_pfn[nid] << PAGE_SHIFT,
 					 size, LARGE_PAGE_BYTES);
-	if (node_pa == MEMBLOCK_ERROR)
-		panic("Can not get kva ram\n");
+	if (node_pa == MEMBLOCK_ERROR) {
+		pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
+			   size, nid);
+		return 0;
+	}
 
 	node_remap_size[nid] = size >> PAGE_SHIFT;
 	node_remap_offset[nid] = offset;
-- 
cgit v1.2.3


From 0e9f93c1c04c8ab10cc564df54a7ad0f83c67796 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:55 +0200
Subject: x86-32, numa: Move lowmem address space reservation to
 init_alloc_remap()

Remap alloc init is done in the following stages.

1. init_alloc_remap() calculates how much memory is necessary for each
   node and reserves node local memory.

2. initmem_init() collects how much each node needs and reserves a
   single contiguous lowmem area which can contain all.

3. init_remap_allocator() initializes allocator parameters from the
   determined lowmem address and per-node offsets.

4. Actual remap happens.

There is no reason for the lowmem remap area to be reserved as a
single contiguous area at one go.  They don't interact with each other
and the memblock allocator will put them side-by-side anyway.

This patch breaks up the single lowmem address reservation and put
per-node lowmem address reservation into init_alloc_remap() and
initializes allocator parameters directly in the function as all the
addresses are determined there.  This merges steps 2 and 3 into 1.

While at it, remove now largely irrelevant comments in
init_alloc_remap().

This change causes the following behavior changes.

* Remap lowmem areas are allocated in smaller per-node chunks.

* Remap lowmem area reservation failure fail future remap allocations
  instead of panicking.

* Remap allocator initialization is less verbose.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-10-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 82 ++++++++++++++++-----------------------------------
 1 file changed, 25 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index c127543372f5..12bb34c434ea 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -108,9 +108,6 @@ static unsigned long node_remap_size[MAX_NUMNODES];
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-static unsigned long kva_start_pfn;
-static unsigned long kva_pages;
-
 int __cpuinit numa_cpu_node(int cpu)
 {
 	return apic->x86_32_numa_cpu_node(cpu);
@@ -266,7 +263,8 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 {
 	unsigned long size;
-	u64 node_pa;
+	u64 node_pa, remap_pa;
+	void *remap_va;
 
 	/*
 	 * The acpi/srat node info can show hot-add memroy zones where
@@ -287,6 +285,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
 	size = ALIGN(size, LARGE_PAGE_BYTES);
 
+	/* allocate node memory and the lowmem remap area */
 	node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
 					 (u64)node_end_pfn[nid] << PAGE_SHIFT,
 					 size, LARGE_PAGE_BYTES);
@@ -295,45 +294,35 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 			   size, nid);
 		return 0;
 	}
+	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+
+	remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
+					  max_low_pfn << PAGE_SHIFT,
+					  size, LARGE_PAGE_BYTES);
+	if (remap_pa == MEMBLOCK_ERROR) {
+		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
+			   size, nid);
+		memblock_x86_free_range(node_pa, node_pa + size);
+		return 0;
+	}
+	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
+	remap_va = phys_to_virt(remap_pa);
 
+	/* initialize remap allocator parameters */
+	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
 	node_remap_size[nid] = size >> PAGE_SHIFT;
 	node_remap_offset[nid] = offset;
-	printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
-	       size >> PAGE_SHIFT, nid, node_pa >> PAGE_SHIFT);
 
-	/*
-	 *  prevent kva address below max_low_pfn want it on system
-	 *  with less memory later.
-	 *  layout will be: KVA address , KVA RAM
-	 *
-	 *  we are supposed to only record the one less then
-	 *  max_low_pfn but we could have some hole in high memory,
-	 *  and it will only check page_is_ram(pfn) &&
-	 *  !page_is_reserved_early(pfn) to decide to use it as free.
-	 *  So memblock_x86_reserve_range here, hope we don't run out
-	 *  of that array
-	 */
-	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+	node_remap_start_vaddr[nid] = remap_va;
+	node_remap_end_vaddr[nid] = remap_va + size;
+	node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE);
 
-	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
+	printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
+	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 
 	return size >> PAGE_SHIFT;
 }
 
-static void init_remap_allocator(int nid)
-{
-	node_remap_start_vaddr[nid] = pfn_to_kaddr(
-			kva_start_pfn + node_remap_offset[nid]);
-	node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
-		(node_remap_size[nid] * PAGE_SIZE);
-	node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
-		ALIGN(sizeof(pg_data_t), PAGE_SIZE);
-
-	printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
-		(ulong) node_remap_start_vaddr[nid],
-		(ulong) node_remap_end_vaddr[nid]);
-}
-
 void __init initmem_init(void)
 {
 	unsigned long reserve_pages = 0;
@@ -352,25 +341,7 @@ void __init initmem_init(void)
 
 	for_each_online_node(nid)
 		reserve_pages += init_alloc_remap(nid, reserve_pages);
-	kva_pages = roundup(reserve_pages, PTRS_PER_PTE);
-	printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
-			reserve_pages);
-
-	kva_start_pfn = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
-				max_low_pfn << PAGE_SHIFT,
-				kva_pages << PAGE_SHIFT,
-				PTRS_PER_PTE << PAGE_SHIFT) >> PAGE_SHIFT;
-	if (kva_start_pfn == MEMBLOCK_ERROR)
-		panic("Can not get kva space\n");
-
-	printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
-		kva_start_pfn, max_low_pfn);
-	printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
-
-	/* avoid clash with initrd */
-	memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
-		      (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
-		     "KVA PG");
+
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
@@ -390,11 +361,8 @@ void __init initmem_init(void)
 
 	printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(max_low_pfn));
-	for_each_online_node(nid) {
-		init_remap_allocator(nid);
-
+	for_each_online_node(nid)
 		allocate_pgdat(nid);
-	}
 	remap_numa_kva();
 
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
-- 
cgit v1.2.3


From 2a286344f06d6341740b284494379373e87648f7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:56 +0200
Subject: x86-32, numa: Move remapping for remap allocator into
 init_alloc_remap()

There's no reason to perform the actual remapping separately.
Collapse remap_numa_kva() into init_alloc_remap() and, while at it,
make it less verbose.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-11-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 12bb34c434ea..53ec13a17b9a 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -205,26 +205,6 @@ void *alloc_remap(int nid, unsigned long size)
 	return allocation;
 }
 
-static void __init remap_numa_kva(void)
-{
-	void *vaddr;
-	unsigned long pfn;
-	int node;
-
-	for_each_online_node(node) {
-		printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
-		for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
-			vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
-			printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
-				(unsigned long)vaddr,
-				node_remap_start_pfn[node] + pfn);
-			set_pmd_pfn((ulong) vaddr, 
-				node_remap_start_pfn[node] + pfn, 
-				PAGE_KERNEL_LARGE);
-		}
-	}
-}
-
 #ifdef CONFIG_HIBERNATION
 /**
  * resume_map_numa_kva - add KVA mapping to the temporary page tables created
@@ -262,7 +242,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 
 static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 {
-	unsigned long size;
+	unsigned long size, pfn;
 	u64 node_pa, remap_pa;
 	void *remap_va;
 
@@ -308,6 +288,12 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
 	remap_va = phys_to_virt(remap_pa);
 
+	/* perform actual remap */
+	for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE)
+		set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT),
+			    (node_pa >> PAGE_SHIFT) + pfn,
+			    PAGE_KERNEL_LARGE);
+
 	/* initialize remap allocator parameters */
 	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
 	node_remap_size[nid] = size >> PAGE_SHIFT;
@@ -363,7 +349,6 @@ void __init initmem_init(void)
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for_each_online_node(nid)
 		allocate_pgdat(nid);
-	remap_numa_kva();
 
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
-- 
cgit v1.2.3


From b2e3e4fa3eee752b893687783f2a427106c93423 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:57 +0200
Subject: x86-32, numa: Make pgdat allocation use alloc_remap()

pgdat allocation is handled differnetly from other remap allocations -
it's reserved during initialization.  There's no reason to handle this
any differnetly.  Remap allocator is initialized for every node and if
init failed the allocation will fail and pgdat allocation can fall
back to generic code like anyone else.

Remove special init-time pgdat reservation and make allocate_pgdat()
use alloc_remap() like everyone else.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-12-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 53ec13a17b9a..0184a9f5a345 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -160,9 +160,8 @@ static void __init allocate_pgdat(int nid)
 {
 	char buf[16];
 
-	if (node_has_online_mem(nid) && node_remap_start_vaddr[nid])
-		NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
-	else {
+	NODE_DATA(nid) = alloc_remap(nid, ALIGN(sizeof(pg_data_t), PAGE_SIZE));
+	if (!NODE_DATA(nid)) {
 		unsigned long pgdat_phys;
 		pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT,
 				 max_pfn_mapped<<PAGE_SHIFT,
@@ -301,7 +300,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 
 	node_remap_start_vaddr[nid] = remap_va;
 	node_remap_end_vaddr[nid] = remap_va + size;
-	node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE);
+	node_remap_alloc_vaddr[nid] = remap_va;
 
 	printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
 	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
-- 
cgit v1.2.3


From 1d85b61baf0334dd6bb88261bec42b808204d694 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:58 +0200
Subject: x86-32, numa: Remove now useless node_remap_offset[]

With lowmem address reservation moved into init_alloc_remap(),
node_remap_offset[] is no longer useful.  Remove it and related offset
handling code.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-13-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 0184a9f5a345..960ea7bc0ac7 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -187,7 +187,6 @@ static void __init allocate_pgdat(int nid)
 static unsigned long node_remap_start_pfn[MAX_NUMNODES];
 static void *node_remap_end_vaddr[MAX_NUMNODES];
 static void *node_remap_alloc_vaddr[MAX_NUMNODES];
-static unsigned long node_remap_offset[MAX_NUMNODES];
 
 void *alloc_remap(int nid, unsigned long size)
 {
@@ -239,7 +238,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 }
 #endif
 
-static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
+static __init void init_alloc_remap(int nid)
 {
 	unsigned long size, pfn;
 	u64 node_pa, remap_pa;
@@ -252,9 +251,9 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
 	       nid, node_start_pfn[nid], node_end_pfn[nid]);
 	if (node_start_pfn[nid] > max_pfn)
-		return 0;
+		return;
 	if (!node_end_pfn[nid])
-		return 0;
+		return;
 	if (node_end_pfn[nid] > max_pfn)
 		node_end_pfn[nid] = max_pfn;
 
@@ -271,7 +270,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	if (node_pa == MEMBLOCK_ERROR) {
 		pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
 			   size, nid);
-		return 0;
+		return;
 	}
 	memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
 
@@ -282,7 +281,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 		pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
 			   size, nid);
 		memblock_x86_free_range(node_pa, node_pa + size);
-		return 0;
+		return;
 	}
 	memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
 	remap_va = phys_to_virt(remap_pa);
@@ -296,7 +295,6 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 	/* initialize remap allocator parameters */
 	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
 	node_remap_size[nid] = size >> PAGE_SHIFT;
-	node_remap_offset[nid] = offset;
 
 	node_remap_start_vaddr[nid] = remap_va;
 	node_remap_end_vaddr[nid] = remap_va + size;
@@ -304,13 +302,10 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
 
 	printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
 	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
-
-	return size >> PAGE_SHIFT;
 }
 
 void __init initmem_init(void)
 {
-	unsigned long reserve_pages = 0;
 	int nid;
 
 	/*
@@ -325,7 +320,7 @@ void __init initmem_init(void)
 	numa_init_array();
 
 	for_each_online_node(nid)
-		reserve_pages += init_alloc_remap(nid, reserve_pages);
+		init_alloc_remap(nid);
 
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
-- 
cgit v1.2.3


From 198bd06bbfde2984027e91f64c55eb19a7034a27 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:23:59 +0200
Subject: x86-32, numa: Remove redundant node_remap_size[]

Remap area size can be determined from node_remap_start_vaddr[] and
node_remap_end_vaddr[] making node_remap_size[] redundant.  Remove it.

While at it, make resume_map_numa_kva() use @nr_pages for number of
pages instead of @size.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-14-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 960ea7bc0ac7..f325e6fab75b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -104,7 +104,6 @@ extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
 
-static unsigned long node_remap_size[MAX_NUMNODES];
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
@@ -214,15 +213,16 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 	int node;
 
 	for_each_online_node(node) {
-		unsigned long start_va, start_pfn, size, pfn;
+		unsigned long start_va, start_pfn, nr_pages, pfn;
 
 		start_va = (unsigned long)node_remap_start_vaddr[node];
 		start_pfn = node_remap_start_pfn[node];
-		size = node_remap_size[node];
+		nr_pages = (node_remap_end_vaddr[node] -
+			    node_remap_start_vaddr[node]) >> PAGE_SHIFT;
 
 		printk(KERN_DEBUG "%s: node %d\n", __func__, node);
 
-		for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+		for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) {
 			unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
 			pgd_t *pgd = pgd_base + pgd_index(vaddr);
 			pud_t *pud = pud_offset(pgd, vaddr);
@@ -294,8 +294,6 @@ static __init void init_alloc_remap(int nid)
 
 	/* initialize remap allocator parameters */
 	node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
-	node_remap_size[nid] = size >> PAGE_SHIFT;
-
 	node_remap_start_vaddr[nid] = remap_va;
 	node_remap_end_vaddr[nid] = remap_va + size;
 	node_remap_alloc_vaddr[nid] = remap_va;
-- 
cgit v1.2.3


From 993ba1585cbb03fab012e41d1a5d24330a283b31 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 00:24:00 +0200
Subject: x86-32, numa: Update remap allocator comments

Now that remap allocator is cleaned up, update comments such that they
are in docbook function description format and reflect the actual
implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-15-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 56 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index f325e6fab75b..c757c0a3b529 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -176,17 +176,31 @@ static void __init allocate_pgdat(int nid)
 }
 
 /*
- * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel
- * virtual address space (KVA) is reserved and portions of nodes are mapped
- * using it. This is to allow node-local memory to be allocated for
- * structures that would normally require ZONE_NORMAL. The memory is
- * allocated with alloc_remap() and callers should be prepared to allocate
- * from the bootmem allocator instead.
+ * Remap memory allocator
  */
 static unsigned long node_remap_start_pfn[MAX_NUMNODES];
 static void *node_remap_end_vaddr[MAX_NUMNODES];
 static void *node_remap_alloc_vaddr[MAX_NUMNODES];
 
+/**
+ * alloc_remap - Allocate remapped memory
+ * @nid: NUMA node to allocate memory from
+ * @size: The size of allocation
+ *
+ * Allocate @size bytes from the remap area of NUMA node @nid.  The
+ * size of the remap area is predetermined by init_alloc_remap() and
+ * only the callers considered there should call this function.  For
+ * more info, please read the comment on top of init_alloc_remap().
+ *
+ * The caller must be ready to handle allocation failure from this
+ * function and fall back to regular memory allocator in such cases.
+ *
+ * CONTEXT:
+ * Single CPU early boot context.
+ *
+ * RETURNS:
+ * Pointer to the allocated memory on success, %NULL on failure.
+ */
 void *alloc_remap(int nid, unsigned long size)
 {
 	void *allocation = node_remap_alloc_vaddr[nid];
@@ -238,6 +252,28 @@ void resume_map_numa_kva(pgd_t *pgd_base)
 }
 #endif
 
+/**
+ * init_alloc_remap - Initialize remap allocator for a NUMA node
+ * @nid: NUMA node to initizlie remap allocator for
+ *
+ * NUMA nodes may end up without any lowmem.  As allocating pgdat and
+ * memmap on a different node with lowmem is inefficient, a special
+ * remap allocator is implemented which can be used by alloc_remap().
+ *
+ * For each node, the amount of memory which will be necessary for
+ * pgdat and memmap is calculated and two memory areas of the size are
+ * allocated - one in the node and the other in lowmem; then, the area
+ * in the node is remapped to the lowmem area.
+ *
+ * As pgdat and memmap must be allocated in lowmem anyway, this
+ * doesn't waste lowmem address space; however, the actual lowmem
+ * which gets remapped over is wasted.  The amount shouldn't be
+ * problematic on machines this feature will be used.
+ *
+ * Initialization failure isn't fatal.  alloc_remap() is used
+ * opportunistically and the callers will fall back to other memory
+ * allocation mechanisms on failure.
+ */
 static __init void init_alloc_remap(int nid)
 {
 	unsigned long size, pfn;
@@ -306,14 +342,6 @@ void __init initmem_init(void)
 {
 	int nid;
 
-	/*
-	 * When mapping a NUMA machine we allocate the node_mem_map arrays
-	 * from node local memory.  They are then mapped directly into KVA
-	 * between zone normal and vmalloc space.  Calculate the size of
-	 * this space and use it to adjust the boundary between ZONE_NORMAL
-	 * and ZONE_HIGHMEM.
-	 */
-
 	get_memcfg_numa();
 	numa_init_array();
 
-- 
cgit v1.2.3


From 815b33fdc279d34ab40a8bfe1866623a4cc5669b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 17:26:49 +0200
Subject: x86/amd-iommu: Cleanup completion-wait handling

This patch cleans up the implementation of completion-wait
command sending. It also switches the completion indicator
from the MMIO bit to a memory store which can be checked
without IOMMU locking.
As a side effect this patch makes the __iommu_queue_command
function obsolete and so it is removed too.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 107 ++++++++++++--------------------------------
 1 file changed, 28 insertions(+), 79 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f8ec28ea3314..073c64b1994b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -25,6 +25,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iommu-helper.h>
 #include <linux/iommu.h>
+#include <linux/delay.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -34,7 +35,7 @@
 
 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
 
-#define EXIT_LOOP_COUNT 10000000
+#define LOOP_TIMEOUT	100000
 
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
@@ -383,10 +384,14 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
-static void build_completion_wait(struct iommu_cmd *cmd)
+static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 {
+	WARN_ON(address & 0x7ULL);
+
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->data[0] = CMD_COMPL_WAIT_INT_MASK;
+	cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK;
+	cmd->data[1] = upper_32_bits(__pa(address));
+	cmd->data[2] = 1;
 	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
 }
 
@@ -432,12 +437,14 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command. Must be called with iommu->lock held.
  */
-static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
+	unsigned long flags;
 	u32 tail, head;
 	u8 *target;
 
 	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
+	spin_lock_irqsave(&iommu->lock, flags);
 	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 	target = iommu->cmd_buf + tail;
 	memcpy_toio(target, cmd, sizeof(*cmd));
@@ -446,99 +453,41 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 	if (tail == head)
 		return -ENOMEM;
 	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-
-	return 0;
-}
-
-/*
- * General queuing function for commands. Takes iommu->lock and calls
- * __iommu_queue_command().
- */
-static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&iommu->lock, flags);
-	ret = __iommu_queue_command(iommu, cmd);
-	if (!ret)
-		iommu->need_sync = true;
+	iommu->need_sync = true;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return ret;
-}
-
-/*
- * This function waits until an IOMMU has completed a completion
- * wait command
- */
-static void __iommu_wait_for_completion(struct amd_iommu *iommu)
-{
-	int ready = 0;
-	unsigned status = 0;
-	unsigned long i = 0;
-
-	INC_STATS_COUNTER(compl_wait);
-
-	while (!ready && (i < EXIT_LOOP_COUNT)) {
-		++i;
-		/* wait for the bit to become one */
-		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
-		ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
-	}
-
-	/* set bit back to zero */
-	status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
-	writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
-	if (unlikely(i == EXIT_LOOP_COUNT))
-		iommu->reset_in_progress = true;
+	return 0;
 }
 
 /*
  * This function queues a completion wait command into the command
  * buffer of an IOMMU
  */
-static int __iommu_completion_wait(struct amd_iommu *iommu)
-{
-	struct iommu_cmd cmd;
-
-	build_completion_wait(&cmd);
-
-	 return __iommu_queue_command(iommu, &cmd);
-}
-
-/*
- * This function is called whenever we need to ensure that the IOMMU has
- * completed execution of all commands we sent. It sends a
- * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
- * us about that by writing a value to a physical address we pass with
- * the command.
- */
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
-	int ret = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu->lock, flags);
+	struct iommu_cmd cmd;
+	volatile u64 sem = 0;
+	int ret, i = 0;
 
 	if (!iommu->need_sync)
-		goto out;
-
-	ret = __iommu_completion_wait(iommu);
+		return 0;
 
-	iommu->need_sync = false;
+	build_completion_wait(&cmd, (u64)&sem);
 
+	ret = iommu_queue_command(iommu, &cmd);
 	if (ret)
-		goto out;
-
-	__iommu_wait_for_completion(iommu);
+		return ret;
 
-out:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	while (sem == 0 && i < LOOP_TIMEOUT) {
+		udelay(1);
+		i += 1;
+	}
 
-	if (iommu->reset_in_progress)
+	if (i == LOOP_TIMEOUT) {
+		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+		iommu->reset_in_progress = true;
 		reset_iommu_command_buffer(iommu);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 61985a040f17c03b09a2772508ee02729571365b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 17:46:49 +0200
Subject: x86/amd-iommu: Remove command buffer resetting logic

The logic to reset the command buffer caused more problems
than it actually helped. The logic jumped in when the IOMMU
hardware doesn't execute commands anymore but the reasons
for this are usually not fixed by just resetting the command
buffer. So the code can be removed to reduce complexity.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  3 ---
 arch/x86/kernel/amd_iommu.c            | 20 +-------------------
 2 files changed, 1 insertion(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index e3509fc303bf..878ae008eb04 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -409,9 +409,6 @@ struct amd_iommu {
 	/* if one, we need to send a completion wait command */
 	bool need_sync;
 
-	/* becomes true if a command buffer reset is running */
-	bool reset_in_progress;
-
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 073c64b1994b..0147c5c87aa8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -58,7 +58,6 @@ struct iommu_cmd {
 	u32 data[4];
 };
 
-static void reset_iommu_command_buffer(struct amd_iommu *iommu);
 static void update_domain(struct protection_domain *domain);
 
 /****************************************************************************
@@ -323,8 +322,6 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 		break;
 	case EVENT_TYPE_ILL_CMD:
 		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
-		iommu->reset_in_progress = true;
-		reset_iommu_command_buffer(iommu);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
@@ -485,8 +482,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	if (i == LOOP_TIMEOUT) {
 		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
-		iommu->reset_in_progress = true;
-		reset_iommu_command_buffer(iommu);
+		ret = -EIO;
 	}
 
 	return 0;
@@ -628,20 +624,6 @@ void amd_iommu_flush_all_domains(void)
 	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
 }
 
-static void reset_iommu_command_buffer(struct amd_iommu *iommu)
-{
-	pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
-
-	if (iommu->reset_in_progress)
-		panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
-
-	amd_iommu_reset_cmd_buffer(iommu);
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
-
-	iommu->reset_in_progress = false;
-}
-
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
-- 
cgit v1.2.3


From 17b124bf1463582005d662d4dd95f037ad863c57 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 18:01:35 +0200
Subject: x86/amd-iommu: Rename iommu_flush* to domain_flush*

These functions all operate on protection domains and not on
singe IOMMUs. Represent that in their name.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 87 +++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0147c5c87aa8..9d66b2092ae1 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -488,22 +488,6 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	return 0;
 }
 
-static void iommu_flush_complete(struct protection_domain *domain)
-{
-	int i;
-
-	for (i = 0; i < amd_iommus_present; ++i) {
-		if (!domain->dev_iommu[i])
-			continue;
-
-		/*
-		 * Devices of this domain are behind this IOMMU
-		 * We need to wait for completion of all commands.
-		 */
-		iommu_completion_wait(amd_iommus[i]);
-	}
-}
-
 /*
  * Command send function for invalidating a device table entry
  */
@@ -526,8 +510,8 @@ static int iommu_flush_device(struct device *dev)
  * It invalidates a single PTE if the range to flush is within a single
  * page. Otherwise it flushes the whole TLB of the IOMMU.
  */
-static void __iommu_flush_pages(struct protection_domain *domain,
-				u64 address, size_t size, int pde)
+static void __domain_flush_pages(struct protection_domain *domain,
+				 u64 address, size_t size, int pde)
 {
 	struct iommu_cmd cmd;
 	int ret = 0, i;
@@ -548,29 +532,45 @@ static void __iommu_flush_pages(struct protection_domain *domain,
 	WARN_ON(ret);
 }
 
-static void iommu_flush_pages(struct protection_domain *domain,
-			     u64 address, size_t size)
+static void domain_flush_pages(struct protection_domain *domain,
+			       u64 address, size_t size)
 {
-	__iommu_flush_pages(domain, address, size, 0);
+	__domain_flush_pages(domain, address, size, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain */
-static void iommu_flush_tlb(struct protection_domain *domain)
+static void domain_flush_tlb(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
 }
 
 /* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void iommu_flush_tlb_pde(struct protection_domain *domain)
+static void domain_flush_tlb_pde(struct protection_domain *domain)
 {
-	__iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+	__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+}
+
+static void domain_flush_complete(struct protection_domain *domain)
+{
+	int i;
+
+	for (i = 0; i < amd_iommus_present; ++i) {
+		if (!domain->dev_iommu[i])
+			continue;
+
+		/*
+		 * Devices of this domain are behind this IOMMU
+		 * We need to wait for completion of all commands.
+		 */
+		iommu_completion_wait(amd_iommus[i]);
+	}
 }
 
 
 /*
  * This function flushes the DTEs for all devices in domain
  */
-static void iommu_flush_domain_devices(struct protection_domain *domain)
+static void domain_flush_devices(struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 	unsigned long flags;
@@ -591,8 +591,8 @@ static void iommu_flush_all_domain_devices(void)
 	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
 
 	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		iommu_flush_domain_devices(domain);
-		iommu_flush_complete(domain);
+		domain_flush_devices(domain);
+		domain_flush_complete(domain);
 	}
 
 	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
@@ -616,8 +616,8 @@ void amd_iommu_flush_all_domains(void)
 
 	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
 		spin_lock(&domain->lock);
-		iommu_flush_tlb_pde(domain);
-		iommu_flush_complete(domain);
+		domain_flush_tlb_pde(domain);
+		domain_flush_complete(domain);
 		spin_unlock(&domain->lock);
 	}
 
@@ -1480,7 +1480,7 @@ static int attach_device(struct device *dev,
 	 * left the caches in the IOMMU dirty. So we have to flush
 	 * here to evict all dirty stuff.
 	 */
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return ret;
 }
@@ -1693,8 +1693,9 @@ static void update_domain(struct protection_domain *domain)
 		return;
 
 	update_device_table(domain);
-	iommu_flush_domain_devices(domain);
-	iommu_flush_tlb_pde(domain);
+
+	domain_flush_devices(domain);
+	domain_flush_tlb_pde(domain);
 
 	domain->updated = false;
 }
@@ -1853,10 +1854,10 @@ retry:
 	ADD_STATS_COUNTER(alloced_io_mem, size);
 
 	if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
-		iommu_flush_tlb(&dma_dom->domain);
+		domain_flush_tlb(&dma_dom->domain);
 		dma_dom->need_flush = false;
 	} else if (unlikely(amd_iommu_np_cache))
-		iommu_flush_pages(&dma_dom->domain, address, size);
+		domain_flush_pages(&dma_dom->domain, address, size);
 
 out:
 	return address;
@@ -1905,7 +1906,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
 	if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-		iommu_flush_pages(&dma_dom->domain, flush_addr, size);
+		domain_flush_pages(&dma_dom->domain, flush_addr, size);
 		dma_dom->need_flush = false;
 	}
 }
@@ -1941,7 +1942,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 	if (addr == DMA_ERROR_CODE)
 		goto out;
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -1968,7 +1969,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, dir);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2033,7 +2034,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 			goto unmap;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -2079,7 +2080,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		s->dma_address = s->dma_length = 0;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -2129,7 +2130,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		goto out_free;
 	}
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2161,7 +2162,7 @@ static void free_coherent(struct device *dev, size_t size,
 
 	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-	iommu_flush_complete(domain);
+	domain_flush_complete(domain);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
@@ -2471,7 +2472,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
 	mutex_unlock(&domain->api_lock);
 
-	iommu_flush_tlb_pde(domain);
+	domain_flush_tlb_pde(domain);
 
 	return get_order(unmap_size);
 }
-- 
cgit v1.2.3


From ac0ea6e92b2227c86fe4f7f9eb429071d617a25d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 18:38:20 +0200
Subject: x86/amd-iommu: Improve handling of full command buffer

This patch improved the handling of commands when the IOMMU
command buffer is nearly full. In this case it issues an
completion wait command and waits until the IOMMU has
processed it before continuing queuing new commands.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 88 +++++++++++++++++++++++++++++++++------------
 1 file changed, 65 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 9d66b2092ae1..75c7f8c3fe12 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -381,6 +381,39 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
+static int wait_on_sem(volatile u64 *sem)
+{
+	int i = 0;
+
+	while (*sem == 0 && i < LOOP_TIMEOUT) {
+		udelay(1);
+		i += 1;
+	}
+
+	if (i == LOOP_TIMEOUT) {
+		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void copy_cmd_to_buffer(struct amd_iommu *iommu,
+			       struct iommu_cmd *cmd,
+			       u32 tail)
+{
+	u8 *target;
+
+	target = iommu->cmd_buf + tail;
+	tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+
+	/* Copy command to buffer */
+	memcpy(target, cmd, sizeof(*cmd));
+
+	/* Tell the IOMMU about it */
+	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+}
+
 static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
 {
 	WARN_ON(address & 0x7ULL);
@@ -432,25 +465,44 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 
 /*
  * Writes the command to the IOMMUs command buffer and informs the
- * hardware about the new command. Must be called with iommu->lock held.
+ * hardware about the new command.
  */
 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 {
+	u32 left, tail, head, next_tail;
 	unsigned long flags;
-	u32 tail, head;
-	u8 *target;
 
 	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
+
+again:
 	spin_lock_irqsave(&iommu->lock, flags);
-	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-	target = iommu->cmd_buf + tail;
-	memcpy_toio(target, cmd, sizeof(*cmd));
-	tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
-	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
-	if (tail == head)
-		return -ENOMEM;
-	writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+	next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+	left      = (head - next_tail) % iommu->cmd_buf_size;
+
+	if (left <= 2) {
+		struct iommu_cmd sync_cmd;
+		volatile u64 sem = 0;
+		int ret;
+
+		build_completion_wait(&sync_cmd, (u64)&sem);
+		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+
+		spin_unlock_irqrestore(&iommu->lock, flags);
+
+		if ((ret = wait_on_sem(&sem)) != 0)
+			return ret;
+
+		goto again;
+	}
+
+	copy_cmd_to_buffer(iommu, cmd, tail);
+
+	/* We need to sync now to make sure all commands are processed */
 	iommu->need_sync = true;
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return 0;
@@ -464,7 +516,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
 	volatile u64 sem = 0;
-	int ret, i = 0;
+	int ret;
 
 	if (!iommu->need_sync)
 		return 0;
@@ -475,17 +527,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	if (ret)
 		return ret;
 
-	while (sem == 0 && i < LOOP_TIMEOUT) {
-		udelay(1);
-		i += 1;
-	}
-
-	if (i == LOOP_TIMEOUT) {
-		pr_alert("AMD-Vi: Completion-Wait loop timed out\n");
-		ret = -EIO;
-	}
-
-	return 0;
+	return wait_on_sem(&sem);
 }
 
 /*
-- 
cgit v1.2.3


From d8c13085775c72e2d46edc54ed0c803c3a944ddb Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 6 Apr 2011 18:51:26 +0200
Subject: x86/amd-iommu: Rename iommu_flush_device

This function operates on a struct device, so give it a name
that represents that. As a side effect a new function is
introduced which operates on am iommu and a device-id. It
will be used again in a later patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 75c7f8c3fe12..3557f223f40b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -530,21 +530,27 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	return wait_on_sem(&sem);
 }
 
+static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
+{
+	struct iommu_cmd cmd;
+
+	build_inv_dte(&cmd, devid);
+
+	return iommu_queue_command(iommu, &cmd);
+}
+
 /*
  * Command send function for invalidating a device table entry
  */
-static int iommu_flush_device(struct device *dev)
+static int device_flush_dte(struct device *dev)
 {
 	struct amd_iommu *iommu;
-	struct iommu_cmd cmd;
 	u16 devid;
 
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	build_inv_dte(&cmd, devid);
-
-	return iommu_queue_command(iommu, &cmd);
+	return iommu_flush_dte(iommu, devid);
 }
 
 /*
@@ -620,7 +626,7 @@ static void domain_flush_devices(struct protection_domain *domain)
 	spin_lock_irqsave(&domain->lock, flags);
 
 	list_for_each_entry(dev_data, &domain->dev_list, list)
-		iommu_flush_device(dev_data->dev);
+		device_flush_dte(dev_data->dev);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -1424,7 +1430,7 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
 	domain->dev_cnt                 += 1;
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 static void do_detach(struct device *dev)
@@ -1447,7 +1453,7 @@ static void do_detach(struct device *dev)
 	clear_dte_entry(devid);
 
 	/* Flush the DTE entry */
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 }
 
 /*
@@ -1663,7 +1669,7 @@ static int device_change_notifier(struct notifier_block *nb,
 		goto out;
 	}
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 
 out:
@@ -2448,7 +2454,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 	if (!iommu)
 		return;
 
-	iommu_flush_device(dev);
+	device_flush_dte(dev);
 	iommu_completion_wait(iommu);
 }
 
-- 
cgit v1.2.3


From 7d0c5cc5be73f7ce26fdcca7b8ec2203f661eb93 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 7 Apr 2011 08:16:10 +0200
Subject: x86/amd-iommu: Flush all internal TLBs when IOMMUs are enabled

The old code only flushed a DTE or a domain TLB before it is
actually used by the IOMMU driver. While this is efficient
and works when done right it is more likely to introduce new
bugs when changing code (which happened in the past).
This patch adds code to flush all DTEs and all domain TLBs
in each IOMMU right after it is enabled (at boot and after
resume). This reduces the complexity of the driver and makes
it less likely to introduce stale-TLB bugs in the future.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_proto.h |  2 -
 arch/x86/kernel/amd_iommu.c            | 75 +++++++++++++++-------------------
 arch/x86/kernel/amd_iommu_init.c       | 11 ++++-
 3 files changed, 43 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 916bc8111a01..1223c0fe03f5 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -24,8 +24,6 @@ struct amd_iommu;
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_flush_all_domains(void);
-extern void amd_iommu_flush_all_devices(void);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
 extern int amd_iommu_init_devices(void);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3557f223f40b..bcf58ea55cfa 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -539,6 +539,40 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
 	return iommu_queue_command(iommu, &cmd);
 }
 
+static void iommu_flush_dte_all(struct amd_iommu *iommu)
+{
+	u32 devid;
+
+	for (devid = 0; devid <= 0xffff; ++devid)
+		iommu_flush_dte(iommu, devid);
+
+	iommu_completion_wait(iommu);
+}
+
+/*
+ * This function uses heavy locking and may disable irqs for some time. But
+ * this is no issue because it is only called during resume.
+ */
+static void iommu_flush_tlb_all(struct amd_iommu *iommu)
+{
+	u32 dom_id;
+
+	for (dom_id = 0; dom_id <= 0xffff; ++dom_id) {
+		struct iommu_cmd cmd;
+		build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+				      dom_id, 1);
+		iommu_queue_command(iommu, &cmd);
+	}
+
+	iommu_completion_wait(iommu);
+}
+
+void iommu_flush_all_caches(struct amd_iommu *iommu)
+{
+	iommu_flush_dte_all(iommu);
+	iommu_flush_tlb_all(iommu);
+}
+
 /*
  * Command send function for invalidating a device table entry
  */
@@ -631,47 +665,6 @@ static void domain_flush_devices(struct protection_domain *domain)
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
-static void iommu_flush_all_domain_devices(void)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		domain_flush_devices(domain);
-		domain_flush_complete(domain);
-	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
-void amd_iommu_flush_all_devices(void)
-{
-	iommu_flush_all_domain_devices();
-}
-
-/*
- * This function uses heavy locking and may disable irqs for some time. But
- * this is no issue because it is only called during resume.
- */
-void amd_iommu_flush_all_domains(void)
-{
-	struct protection_domain *domain;
-	unsigned long flags;
-
-	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
-
-	list_for_each_entry(domain, &amd_iommu_pd_list, list) {
-		spin_lock(&domain->lock);
-		domain_flush_tlb_pde(domain);
-		domain_flush_complete(domain);
-		spin_unlock(&domain->lock);
-	}
-
-	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-}
-
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 246d727b65b7..8848dda808e2 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -180,6 +180,12 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
+/*
+ * This function flushes all internal caches of
+ * the IOMMU used by this driver.
+ */
+extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+
 static inline void update_last_devid(u16 devid)
 {
 	if (devid > amd_iommu_last_bdf)
@@ -1244,6 +1250,7 @@ static void enable_iommus(void)
 		iommu_set_exclusion_range(iommu);
 		iommu_init_msi(iommu);
 		iommu_enable(iommu);
+		iommu_flush_all_caches(iommu);
 	}
 }
 
@@ -1274,8 +1281,8 @@ static void amd_iommu_resume(void)
 	 * we have to flush after the IOMMUs are enabled because a
 	 * disabled IOMMU will never execute the commands we send
 	 */
-	amd_iommu_flush_all_devices();
-	amd_iommu_flush_all_domains();
+	for_each_iommu(iommu)
+		iommu_flush_all_caches(iommu);
 }
 
 static int amd_iommu_suspend(void)
-- 
cgit v1.2.3


From ff00c2a5b2308219ab952d01e5bb17b9ea772d7e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 7 Apr 2011 11:05:56 -0700
Subject: [IA64] fix build warning in arch/ia64/oprofile/backtrace.c

arch/ia64/oprofile/backtrace.c:63: warning: comparison of distinct pointer types lacks a cast

Comparing a "u64 *" with an "unsigned long *". Make them match.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/oprofile/backtrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
index 5cdd7e4a597c..f7b798993cea 100644
--- a/arch/ia64/oprofile/backtrace.c
+++ b/arch/ia64/oprofile/backtrace.c
@@ -29,7 +29,7 @@ typedef struct
 	unsigned int depth;
 	struct pt_regs *regs;
 	struct unw_frame_info frame;
-	u64 *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
+	unsigned long *prev_pfs_loc;	/* state for WAR for old spinlock ool code */
 } ia64_backtrace_t;
 
 /* Returns non-zero if the PC is in the Interrupt Vector Table */
-- 
cgit v1.2.3


From ce9c99af8d4b3b0b9463654fd252d8640d804dc3 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ijc@hellion.org.uk>
Date: Fri, 8 Apr 2011 07:42:29 +0100
Subject: x86, cpu: Move AMD Elan Kconfig under "Processor family"

Currently the option resides under X86_EXTENDED_PLATFORM due to historical
nonstandard A20M# handling. However that is no longer the case and so Elan can
be treated as part of the standard processor choice Kconfig option.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Link: http://lkml.kernel.org/r/1302245177.31620.47.camel@localhost.localdomain
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig                    | 11 -----------
 arch/x86/Kconfig.cpu                | 16 ++++++++++------
 arch/x86/Makefile_32.cpu            |  2 +-
 arch/x86/include/asm/module.h       |  2 +-
 arch/x86/kernel/cpu/cpufreq/Kconfig |  4 ++--
 5 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..f00a3f377cc0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -365,17 +365,6 @@ config X86_UV
 # Following is an alphabetically sorted list of 32 bit extended platforms
 # Please maintain the alphabetic order if and when there are additions
 
-config X86_ELAN
-	bool "AMD Elan"
-	depends on X86_32
-	depends on X86_EXTENDED_PLATFORM
-	---help---
-	  Select this for an AMD Elan processor.
-
-	  Do not use this option for K6/Athlon/Opteron processors!
-
-	  If unsure, choose "PC-compatible" instead.
-
 config X86_INTEL_CE
 	bool "CE4100 TV platform"
 	depends on PCI
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index d161e939df62..6a7cfdf8ff69 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -1,6 +1,4 @@
 # Put here option for CPU selection and depending optimization
-if !X86_ELAN
-
 choice
 	prompt "Processor family"
 	default M686 if X86_32
@@ -203,6 +201,14 @@ config MWINCHIP3D
 	  stores for this CPU, which can increase performance of some
 	  operations.
 
+config MELAN
+	bool "AMD Elan"
+	depends on X86_32
+	---help---
+	  Select this for an AMD Elan processor.
+
+	  Do not use this option for K6/Athlon/Opteron processors!
+
 config MGEODEGX1
 	bool "GeodeGX1"
 	depends on X86_32
@@ -292,8 +298,6 @@ config X86_GENERIC
 	  This is really intended for distributors who need more
 	  generic optimizations.
 
-endif
-
 #
 # Define implied options from the CPU selection here
 config X86_INTERNODE_CACHE_SHIFT
@@ -312,7 +316,7 @@ config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC
 	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
+	default "4" if MELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
 config X86_XADD
@@ -358,7 +362,7 @@ config X86_POPAD_OK
 
 config X86_ALIGNMENT_16
 	def_bool y
-	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
 
 config X86_INTEL_USERCOPY
 	def_bool y
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index f2ee1abb1df9..86cee7b749e1 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -37,7 +37,7 @@ cflags-$(CONFIG_MATOM)		+= $(call cc-option,-march=atom,$(call cc-option,-march=
 	$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
 
 # AMD Elan support
-cflags-$(CONFIG_X86_ELAN)	+= -march=i486
+cflags-$(CONFIG_MELAN)		+= -march=i486
 
 # Geode GX1 support
 cflags-$(CONFIG_MGEODEGX1)	+= -march=pentium-mmx
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 67763c5d8b4e..9eae7752ae9b 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -35,7 +35,7 @@
 #define MODULE_PROC_FAMILY "K7 "
 #elif defined CONFIG_MK8
 #define MODULE_PROC_FAMILY "K8 "
-#elif defined CONFIG_X86_ELAN
+#elif defined CONFIG_MELAN
 #define MODULE_PROC_FAMILY "ELAN "
 #elif defined CONFIG_MCRUSOE
 #define MODULE_PROC_FAMILY "CRUSOE "
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 870e6cc6ad28..0ab9b22557c4 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -43,7 +43,7 @@ config X86_ACPI_CPUFREQ
 config ELAN_CPUFREQ
 	tristate "AMD Elan SC400 and SC410"
 	select CPU_FREQ_TABLE
-	depends on X86_ELAN
+	depends on MELAN
 	---help---
 	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
 	  processors.
@@ -59,7 +59,7 @@ config ELAN_CPUFREQ
 config SC520_CPUFREQ
 	tristate "AMD Elan SC520"
 	select CPU_FREQ_TABLE
-	depends on X86_ELAN
+	depends on MELAN
 	---help---
 	  This adds the CPUFreq driver for AMD Elan SC520 processor.
 
-- 
cgit v1.2.3


From 9844b4e5dd1932e175a23d84ce09702bdf4b5689 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 09:22:56 +0200
Subject: x86/amd-iommu: Select PCI_IOV with AMD IOMMU driver

In order to support ATS in the AMD IOMMU driver this patch
makes sure that the generic support for ATS is compiled in.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..8cc29da2d689 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -690,6 +690,7 @@ config AMD_IOMMU
 	bool "AMD IOMMU support"
 	select SWIOTLB
 	select PCI_MSI
+	select PCI_IOV
 	depends on X86_64 && PCI && ACPI
 	---help---
 	  With this option you can enable support for AMD IOMMU hardware in
-- 
cgit v1.2.3


From cb41ed85efa01e633388314c03a4f3004c6b783b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 11:00:53 +0200
Subject: x86/amd-iommu: Flush device IOTLB if ATS is enabled

This patch implements a function to flush the IOTLB on
devices supporting ATS and makes sure that this TLB is also
flushed if necessary.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  3 +-
 arch/x86/kernel/amd_iommu.c            | 74 +++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 878ae008eb04..f5d184e7d5be 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -113,7 +113,8 @@
 /* command specific defines */
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
-#define CMD_INV_IOMMU_PAGES     0x03
+#define CMD_INV_IOMMU_PAGES	0x03
+#define CMD_INV_IOTLB_PAGES	0x04
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bcf58ea55cfa..f3ce4338dade 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -463,6 +464,37 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 }
 
+static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
+				  u64 address, size_t size)
+{
+	u64 pages;
+	int s;
+
+	pages = iommu_num_pages(address, size, PAGE_SIZE);
+	s     = 0;
+
+	if (pages > 1) {
+		/*
+		 * If we have to flush more than one page, flush all
+		 * TLB entries for this domain
+		 */
+		address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+		s = 1;
+	}
+
+	address &= PAGE_MASK;
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->data[0]  = devid;
+	cmd->data[0] |= (qdep & 0xff) << 24;
+	cmd->data[1]  = devid;
+	cmd->data[2]  = lower_32_bits(address);
+	cmd->data[3]  = upper_32_bits(address);
+	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+	if (s)
+		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
@@ -573,18 +605,48 @@ void iommu_flush_all_caches(struct amd_iommu *iommu)
 	iommu_flush_tlb_all(iommu);
 }
 
+/*
+ * Command send function for flushing on-device TLB
+ */
+static int device_flush_iotlb(struct device *dev, u64 address, size_t size)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct amd_iommu *iommu;
+	struct iommu_cmd cmd;
+	u16 devid;
+	int qdep;
+
+	qdep  = pci_ats_queue_depth(pdev);
+	devid = get_device_id(dev);
+	iommu = amd_iommu_rlookup_table[devid];
+
+	build_inv_iotlb_pages(&cmd, devid, qdep, address, size);
+
+	return iommu_queue_command(iommu, &cmd);
+}
+
 /*
  * Command send function for invalidating a device table entry
  */
 static int device_flush_dte(struct device *dev)
 {
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
 	u16 devid;
+	int ret;
 
+	pdev  = to_pci_dev(dev);
 	devid = get_device_id(dev);
 	iommu = amd_iommu_rlookup_table[devid];
 
-	return iommu_flush_dte(iommu, devid);
+	ret = iommu_flush_dte(iommu, devid);
+	if (ret)
+		return ret;
+
+	if (pci_ats_enabled(pdev))
+		ret = device_flush_iotlb(dev, 0, ~0UL);
+
+	return ret;
 }
 
 /*
@@ -595,6 +657,7 @@ static int device_flush_dte(struct device *dev)
 static void __domain_flush_pages(struct protection_domain *domain,
 				 u64 address, size_t size, int pde)
 {
+	struct iommu_dev_data *dev_data;
 	struct iommu_cmd cmd;
 	int ret = 0, i;
 
@@ -611,6 +674,15 @@ static void __domain_flush_pages(struct protection_domain *domain,
 		ret |= iommu_queue_command(amd_iommus[i], &cmd);
 	}
 
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
+
+		if (!pci_ats_enabled(pdev))
+			continue;
+
+		ret |= device_flush_iotlb(dev_data->dev, address, size);
+	}
+
 	WARN_ON(ret);
 }
 
-- 
cgit v1.2.3


From 60f723b4117507c05c8b0b5c8b98ecc12a76878e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 12:50:24 +0200
Subject: x86/amd-iommu: Add flag to indicate IOTLB support

This patch adds a flag to the AMD IOMMU driver to indicate
that all IOMMUs present in the system support device IOTLBs.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 2 ++
 arch/x86/kernel/amd_iommu_init.c       | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index f5d184e7d5be..cb811c965548 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -250,6 +250,8 @@ extern bool amd_iommu_dump;
 
 /* global flag if IOMMUs cache non-present entries */
 extern bool amd_iommu_np_cache;
+/* Only true if all IOMMUs support device IOTLBs */
+extern bool amd_iommu_iotlb_sup;
 
 /*
  * Make iterating over all IOMMUs easier
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8848dda808e2..b6c634f3dc07 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -137,6 +137,7 @@ int amd_iommus_present;
 
 /* IOMMUs have a non-present cache? */
 bool amd_iommu_np_cache __read_mostly;
+bool amd_iommu_iotlb_sup __read_mostly = true;
 
 /*
  * The ACPI table parsing functions set this variable on an error
@@ -673,6 +674,9 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
 
+	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
+		amd_iommu_iotlb_sup = false;
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
-- 
cgit v1.2.3


From fd7b5535e10ce820f030842da3f289f80ec0d4f3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 5 Apr 2011 15:31:08 +0200
Subject: x86/amd-iommu: Add ATS enable/disable code

This patch adds the necessary code to the AMD IOMMU driver
for enabling and disabling the ATS capability on a device
and to setup the IOMMU data structures correctly.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  2 ++
 arch/x86/kernel/amd_iommu.c            | 34 ++++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index cb811c965548..7434377b2ab9 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -216,6 +216,8 @@
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+#define DTE_FLAG_IOTLB	0x01
+
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index f3ce4338dade..e4791f66aa38 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1452,17 +1452,22 @@ static bool dma_ops_domain(struct protection_domain *domain)
 	return domain->flags & PD_DMA_OPS_MASK;
 }
 
-static void set_dte_entry(u16 devid, struct protection_domain *domain)
+static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 {
 	u64 pte_root = virt_to_phys(domain->pt_root);
+	u32 flags = 0;
 
 	pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 	pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
-	amd_iommu_dev_table[devid].data[2] = domain->id;
-	amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
-	amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+	if (ats)
+		flags |= DTE_FLAG_IOTLB;
+
+	amd_iommu_dev_table[devid].data[3] |= flags;
+	amd_iommu_dev_table[devid].data[2]  = domain->id;
+	amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
+	amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
 }
 
 static void clear_dte_entry(u16 devid)
@@ -1479,16 +1484,22 @@ static void do_attach(struct device *dev, struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
+	bool ats = false;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
+
+	if (amd_iommu_iotlb_sup)
+		ats = pci_ats_enabled(pdev);
 
 	/* Update data structures */
 	dev_data->domain = domain;
 	list_add(&dev_data->list, &domain->dev_list);
-	set_dte_entry(devid, domain);
+	set_dte_entry(devid, domain, ats);
 
 	/* Do reference counting */
 	domain->dev_iommu[iommu->index] += 1;
@@ -1502,11 +1513,13 @@ static void do_detach(struct device *dev)
 {
 	struct iommu_dev_data *dev_data;
 	struct amd_iommu *iommu;
+	struct pci_dev *pdev;
 	u16 devid;
 
 	devid    = get_device_id(dev);
 	iommu    = amd_iommu_rlookup_table[devid];
 	dev_data = get_dev_data(dev);
+	pdev     = to_pci_dev(dev);
 
 	/* decrease reference counters */
 	dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -1581,9 +1594,13 @@ out_unlock:
 static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 	int ret;
 
+	if (amd_iommu_iotlb_sup)
+		pci_enable_ats(pdev, PAGE_SHIFT);
+
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev, domain);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -1640,12 +1657,16 @@ static void __detach_device(struct device *dev)
  */
 static void detach_device(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long flags;
 
 	/* lock device table */
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+
+	if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev))
+		pci_disable_ats(pdev);
 }
 
 /*
@@ -1795,8 +1816,9 @@ static void update_device_table(struct protection_domain *domain)
 	struct iommu_dev_data *dev_data;
 
 	list_for_each_entry(dev_data, &domain->dev_list, list) {
+		struct pci_dev *pdev = to_pci_dev(dev_data->dev);
 		u16 devid = get_device_id(dev_data->dev);
-		set_dte_entry(devid, domain);
+		set_dte_entry(devid, domain, pci_ats_enabled(pdev));
 	}
 }
 
-- 
cgit v1.2.3


From 3905c54f2bd2c6f937f87307987ca072eabc3e7b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 12 Apr 2011 14:00:40 +1000
Subject: sched, sparc64: Turn cpu_coregroup_mask() into a real function

This compile error triggers on Sparc64:

   kernel/sched.c:7140: error: 'cpu_coregroup_mask' undeclared here (not in a function)

Because after the recent scheduler domain cleanups the scheduler
uses this arch method as a function pointer in a scheduler
topology data structure - which is not possible with a macro.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20110412140040.3020ef55.sfr@canb.auug.org.au
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/include/asm/topology_64.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 1c79f32734a0..8b9c556d630b 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -65,6 +65,10 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 #define smt_capable()				(sparc64_multi_core)
 #endif /* CONFIG_SMP */
 
-#define cpu_coregroup_mask(cpu)			(&cpu_core_map[cpu])
+extern cpumask_t cpu_core_map[NR_CPUS];
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+        return &cpu_core_map[cpu];
+}
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
-- 
cgit v1.2.3


From d99ddec3eee0be8a43b2c1ff624b9dfaaa26b959 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 11 Apr 2011 11:03:18 +0200
Subject: x86/amd-iommu: Add extended feature detection

This patch adds detection of the extended features of an
AMD IOMMU. The available features are printed to dmesg on
boot.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_proto.h | 10 +++++++++-
 arch/x86/include/asm/amd_iommu_types.h | 17 +++++++++++++++++
 arch/x86/kernel/amd_iommu_init.c       | 24 ++++++++++++++++++++++--
 3 files changed, 48 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 1223c0fe03f5..a4ae6c3875eb 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -19,7 +19,7 @@
 #ifndef _ASM_X86_AMD_IOMMU_PROTO_H
 #define _ASM_X86_AMD_IOMMU_PROTO_H
 
-struct amd_iommu;
+#include <asm/amd_iommu_types.h>
 
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
@@ -42,4 +42,12 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
 	       (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
 }
 
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+{
+	if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+		return false;
+
+	return !!(iommu->features & f);
+}
+
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 878ae008eb04..5c24e4652347 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -68,12 +68,25 @@
 #define MMIO_CONTROL_OFFSET     0x0018
 #define MMIO_EXCL_BASE_OFFSET   0x0020
 #define MMIO_EXCL_LIMIT_OFFSET  0x0028
+#define MMIO_EXT_FEATURES	0x0030
 #define MMIO_CMD_HEAD_OFFSET	0x2000
 #define MMIO_CMD_TAIL_OFFSET	0x2008
 #define MMIO_EVT_HEAD_OFFSET	0x2010
 #define MMIO_EVT_TAIL_OFFSET	0x2018
 #define MMIO_STATUS_OFFSET	0x2020
 
+
+/* Extended Feature Bits */
+#define FEATURE_PREFETCH	(1ULL<<0)
+#define FEATURE_PPR		(1ULL<<1)
+#define FEATURE_X2APIC		(1ULL<<2)
+#define FEATURE_NX		(1ULL<<3)
+#define FEATURE_GT		(1ULL<<4)
+#define FEATURE_IA		(1ULL<<6)
+#define FEATURE_GA		(1ULL<<7)
+#define FEATURE_HE		(1ULL<<8)
+#define FEATURE_PC		(1ULL<<9)
+
 /* MMIO status bits */
 #define MMIO_STATUS_COM_WAIT_INT_MASK	0x04
 
@@ -227,6 +240,7 @@
 /* IOMMU capabilities */
 #define IOMMU_CAP_IOTLB   24
 #define IOMMU_CAP_NPCACHE 26
+#define IOMMU_CAP_EFR     27
 
 #define MAX_DOMAIN_ID 65536
 
@@ -371,6 +385,9 @@ struct amd_iommu {
 	/* flags read from acpi table */
 	u8 acpi_flags;
 
+	/* Extended features */
+	u64 features;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 8848dda808e2..047905dc3e14 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -299,9 +299,23 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
+	static const char * const feat_str[] = {
+		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
+		"IA", "GA", "HE", "PC", NULL
+	};
+	int i;
+
+	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
 	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
 
+	if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
+		printk(KERN_CONT " extended features: ");
+		for (i = 0; feat_str[i]; ++i)
+			if (iommu_feature(iommu, (1ULL << i)))
+				printk(KERN_CONT " %s", feat_str[i]);
+	}
+	printk(KERN_CONT "\n");
+
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
@@ -657,7 +671,7 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 {
 	int cap_ptr = iommu->cap_ptr;
-	u32 range, misc;
+	u32 range, misc, low, high;
 	int i, j;
 
 	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
@@ -673,6 +687,12 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 					MMIO_GET_LD(range));
 	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
 
+	/* read extended feature bits */
+	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
+	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
+
+	iommu->features = ((u64)high << 32) | low;
+
 	if (!is_rd890_iommu(iommu->dev))
 		return;
 
-- 
cgit v1.2.3


From 58fc7f1419560efa9c426b829c195050e0147d7f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 11 Apr 2011 11:13:24 +0200
Subject: x86/amd-iommu: Add support for invalidate_all command

This patch adds support for the invalidate_all command
present in new versions of the AMD IOMMU.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h |  1 +
 arch/x86/kernel/amd_iommu.c            | 24 ++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 5c24e4652347..df62d26ed2ab 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -127,6 +127,7 @@
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
 #define CMD_INV_IOMMU_PAGES     0x03
+#define CMD_INV_ALL		0x08
 
 #define CMD_COMPL_WAIT_STORE_MASK	0x01
 #define CMD_COMPL_WAIT_INT_MASK		0x02
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index bcf58ea55cfa..d6192bcf9f09 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -463,6 +463,12 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 }
 
+static void build_inv_all(struct iommu_cmd *cmd)
+{
+	memset(cmd, 0, sizeof(*cmd));
+	CMD_SET_TYPE(cmd, CMD_INV_ALL);
+}
+
 /*
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
@@ -567,10 +573,24 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu)
 	iommu_completion_wait(iommu);
 }
 
+static void iommu_flush_all(struct amd_iommu *iommu)
+{
+	struct iommu_cmd cmd;
+
+	build_inv_all(&cmd);
+
+	iommu_queue_command(iommu, &cmd);
+	iommu_completion_wait(iommu);
+}
+
 void iommu_flush_all_caches(struct amd_iommu *iommu)
 {
-	iommu_flush_dte_all(iommu);
-	iommu_flush_tlb_all(iommu);
+	if (iommu_feature(iommu, FEATURE_IA)) {
+		iommu_flush_all(iommu);
+	} else {
+		iommu_flush_dte_all(iommu);
+		iommu_flush_tlb_all(iommu);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 32a90b6e65792260d6212ac52e8f5be140b6f5be Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 8 Apr 2011 12:01:51 +0200
Subject: ARM: imx: fix usb related build failure for mach-vpr200
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was broken by

	4bd597b (ARM i.MX ehci: do ehci init in board specific functions)

and fixes:

  CC      arch/arm/mach-mx3/mach-vpr200.o
arch/arm/mach-mx3/mach-vpr200.c:263: error: unknown field 'flags' specified in initializer
arch/arm/mach-mx3/mach-vpr200.c:264: warning: initialization makes pointer from integer without a cast

by just applying the change to mach-vpr200.c that the other machine files
got by 4bd597b.

LAKML-Reference: 1302257029-17397-1-git-send-email-u.kleine-koenig@pengutronix.de
Acked-by: Marc Reilly <marc@cpdesign.com.au>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx3/mach-vpr200.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-mx3/mach-vpr200.c b/arch/arm/mach-mx3/mach-vpr200.c
index 2cf390fbd980..47a69cbc31a8 100644
--- a/arch/arm/mach-mx3/mach-vpr200.c
+++ b/arch/arm/mach-mx3/mach-vpr200.c
@@ -257,11 +257,16 @@ static const struct fsl_usb2_platform_data otg_device_pdata __initconst = {
 	.workaround	= FLS_USB2_WORKAROUND_ENGCM09152,
 };
 
+static int vpr200_usbh_init(struct platform_device *pdev)
+{
+	return mx35_initialize_usb_hw(pdev->id,
+			MXC_EHCI_INTERFACE_SINGLE_UNI | MXC_EHCI_INTERNAL_PHY);
+}
+
 /* USB HOST config */
 static const struct mxc_usbh_platform_data usb_host_pdata __initconst = {
-	.portsc		= MXC_EHCI_MODE_SERIAL,
-	.flags		= MXC_EHCI_INTERFACE_SINGLE_UNI |
-			  MXC_EHCI_INTERNAL_PHY,
+	.init = vpr200_usbh_init,
+	.portsc = MXC_EHCI_MODE_SERIAL,
 };
 
 static struct platform_device *devices[] __initdata = {
-- 
cgit v1.2.3


From f61b9fc27e5b61dbc330696f040cc66ba1dbcbaa Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 4 Apr 2011 16:06:12 +0200
Subject: ARM: mxs/clock-mx28: fix up name##_set_rate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For the lcdif clock get_rate looks as follows:

	read div from HW_CLKCTRL_DIS_LCDIF.DIV
	return clk_get_rate(clk->parent) / div

with clk->parent being ref_pix_clk on my system.

ref_pix_clk's rate depends on HW_CLKCTRL_FRAC1.PIXFRAC.

The set_rate function for lcdif does:

	parent_rate = clk_get_rate(clk->parent);
	based on that calculate frac and div such that
	  parent_rate * 18 / frac / div is near the requested rate.
	HW_CLKCTRL_FRAC1.PIXFRAC is updated with frac
	HW_CLKCTRL_DIS_LCDIF.DIV is updated with div

For this calculation to be correct parent_rate needs to be
initialized not with the clock rate of lcdif's parent (i.e. ref_pix) but
that of its grandparent (i.e. ref_pix' parent == pll0_clk).

The obvious downside of this patch is that now set_rate(lcdif) changes
its parent's rate, too.  Still this is better than a wrong rate.

Acked-by: Shawn Guo <shawn.guo@freescale.com>
LAKML-Reference: 20110225084950.GA13684@S2101-09.ap.freescale.net
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mxs/clock-mx28.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-mxs/clock-mx28.c b/arch/arm/mach-mxs/clock-mx28.c
index 1ad97fed1e94..5dcc59d5b9ec 100644
--- a/arch/arm/mach-mxs/clock-mx28.c
+++ b/arch/arm/mach-mxs/clock-mx28.c
@@ -295,11 +295,11 @@ static int name##_set_rate(struct clk *clk, unsigned long rate)		\
 	unsigned long diff, parent_rate, calc_rate;			\
 	int i;								\
 									\
-	parent_rate = clk_get_rate(clk->parent);			\
 	div_max = BM_CLKCTRL_##dr##_DIV >> BP_CLKCTRL_##dr##_DIV;	\
 	bm_busy = BM_CLKCTRL_##dr##_BUSY;				\
 									\
 	if (clk->parent == &ref_xtal_clk) {				\
+		parent_rate = clk_get_rate(clk->parent);		\
 		div = DIV_ROUND_UP(parent_rate, rate);			\
 		if (clk == &cpu_clk) {					\
 			div_max = BM_CLKCTRL_CPU_DIV_XTAL >>		\
@@ -309,6 +309,11 @@ static int name##_set_rate(struct clk *clk, unsigned long rate)		\
 		if (div == 0 || div > div_max)				\
 			return -EINVAL;					\
 	} else {							\
+		/*							\
+		 * hack alert: this block modifies clk->parent, too,	\
+		 * so the base to use it the grand parent.		\
+		 */							\
+		parent_rate = clk_get_rate(clk->parent->parent);	\
 		rate >>= PARENT_RATE_SHIFT;				\
 		parent_rate >>= PARENT_RATE_SHIFT;			\
 		diff = parent_rate;					\
-- 
cgit v1.2.3


From 0575b4b83edb0a766a9c1518a5da57780f386340 Mon Sep 17 00:00:00 2001
From: Dave Martin <dave.martin@linaro.org>
Date: Tue, 16 Nov 2010 13:13:37 +0000
Subject: ARM: mxc: Correct data alignment in headsmp.S for
 CONFIG_THUMB2_KERNEL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Directives such as .long and .word do not magically cause the
assembler location counter to become aligned in gas.  As a
result, using these directives in code sections can result in
misaligned data words when building a Thumb-2 kernel
(CONFIG_THUMB2_KERNEL).

This is a Bad Thing, since the ABI permits the compiler to
assume that fundamental types of word size or above are word-
aligned when accessing them from C.  If the data is not really
word-aligned, this can cause impaired performance and stray
alignment faults in some circumstances.

In general, the following rules should be applied when using
data word declaration directives inside code sections:

    * .quad and .double:
         .align 3

    * .long, .word, .single, .float:
         .align (or .align 2)

    * .short:
        No explicit alignment required, since Thumb-2
        instructions are always 2 or 4 bytes in size.
        immediately after an instruction.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
LAKML-Reference: 1289913217-8672-1-git-send-email-dave.martin@linaro.org
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/plat-mxc/ssi-fiq.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-mxc/ssi-fiq.S b/arch/arm/plat-mxc/ssi-fiq.S
index 4ddce565b353..8397a2dd19f2 100644
--- a/arch/arm/plat-mxc/ssi-fiq.S
+++ b/arch/arm/plat-mxc/ssi-fiq.S
@@ -124,6 +124,8 @@ imx_ssi_fiq_start:
 1:
 		@ return from FIQ
 		subs	pc, lr, #4
+
+		.align
 imx_ssi_fiq_base:
 		.word 0x0
 imx_ssi_fiq_rx_buffer:
-- 
cgit v1.2.3


From b5eee2fdefc89df312034e5e0e6f5dd55c4e9bae Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Apr 2011 14:29:58 +0200
Subject: ARM: mxc: Add missing lockdep annotation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The irq_set_wake() function of the gpio irq_chip calls
enable/disable_irq_wake() on the demultiplex interrupt. That leads to
a lockdep warning "INFO: possible recursive locking detected" because
irq_set_type() is called under irq_desc->lock and the *_irq_wake()
calls take irq_desc->lock of the demux interrupt.

Tell lockdep that the gpio irqs are in a different lock class.

Documentation/SubmitChecklist:
 15: All codepaths have been exercised with all lockdep features enabled.

That's a non-optional requirement, AFAICT.

Reported-and-tested-by: Arnaud Patard <arnaud.patard@rtp-net.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LAKML-Reference: alpine.LFD.2.00.1104041416290.19945@localhost6.localdomain6
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/plat-mxc/gpio.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-mxc/gpio.c b/arch/arm/plat-mxc/gpio.c
index 7a107246fd98..6cd6d7f686f6 100644
--- a/arch/arm/plat-mxc/gpio.c
+++ b/arch/arm/plat-mxc/gpio.c
@@ -295,6 +295,12 @@ static int mxc_gpio_direction_output(struct gpio_chip *chip,
 	return 0;
 }
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different
+ * category than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 int __init mxc_gpio_init(struct mxc_gpio_port *port, int cnt)
 {
 	int i, j;
@@ -311,6 +317,7 @@ int __init mxc_gpio_init(struct mxc_gpio_port *port, int cnt)
 		__raw_writel(~0, port[i].base + GPIO_ISR);
 		for (j = port[i].virtual_irq_start;
 			j < port[i].virtual_irq_start + 32; j++) {
+			irq_set_lockdep_class(j, &gpio_lock_class);
 			irq_set_chip_and_handler(j, &gpio_irq_chip,
 						 handle_level_irq);
 			set_irq_flags(j, IRQF_VALID);
-- 
cgit v1.2.3


From ef1fd2df85f5ea8ca8876a45d0ebb152d3a144d1 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Thu, 31 Mar 2011 12:31:09 +0530
Subject: powerpc/85xx: Don't add disabled PCIe devices

PCIe nodes with the property status="disabled" are not usable and so
avoid adding "disabled" PCIe bridge with the system.

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index f8f7f28c6343..68ca9290df94 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -324,6 +324,11 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary)
 	struct resource rsrc;
 	const int *bus_range;
 
+	if (!of_device_is_available(dev)) {
+		pr_warning("%s: disabled\n", dev->full_name);
+		return -ENODEV;
+	}
+
 	pr_debug("Adding PCI host bridge %s\n", dev->full_name);
 
 	/* Fetch host bridge registers address */
-- 
cgit v1.2.3


From 07d9fce24d871785dbd25458469032fea73f17b8 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Wed, 6 Apr 2011 12:56:29 +0530
Subject: powerpc: Check device status before adding serial device

serial port nodes with the property status="disabled" are not usable and so
avoid adding "disabled" port with the system.

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/legacy_serial.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index c834757bebc0..2b97b80d6d7d 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -330,9 +330,11 @@ void __init find_legacy_serial_ports(void)
 		if (!parent)
 			continue;
 		if (of_match_node(legacy_serial_parents, parent) != NULL) {
-			index = add_legacy_soc_port(np, np);
-			if (index >= 0 && np == stdout)
-				legacy_serial_console = index;
+			if (of_device_is_available(np)) {
+				index = add_legacy_soc_port(np, np);
+				if (index >= 0 && np == stdout)
+					legacy_serial_console = index;
+			}
 		}
 		of_node_put(parent);
 	}
-- 
cgit v1.2.3


From 11ed0db9f6c7811233632d2ab79c50c011b89902 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Wed, 6 Apr 2011 00:11:06 -0500
Subject: powerpc/book3e: Fix CPU feature handling on 64-bit e5500

The CPU_FTRS_POSSIBLE and CPU_FTRS_ALWAYS defines did not encompass
e5500 CPU features when built for 64-bit.  This causes issues with
cpu_has_feature() as it utilizes the POSSIBLE & ALWAYS defines as part
of its check.

Create a unique CPU_FTRS_E5500 (as its different from CPU_FTRS_E500MC),
created a new group for 64-bit Book3e based CPUs and add CPU_FTRS_E5500
to that group.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h | 13 +++++++++++++
 arch/powerpc/kernel/cputable.c      |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index be3cdf9134ce..f1fbf6092ed2 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -386,6 +386,9 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
 	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL)
+#define CPU_FTRS_E5500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
@@ -435,11 +438,15 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
 
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_BOOK3E
+#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E5500)
+#else
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
 	    CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |		\
 	    CPU_FTR_1T_SEGMENT | CPU_FTR_VSX)
+#endif
 #else
 enum {
 	CPU_FTRS_POSSIBLE =
@@ -473,16 +480,21 @@ enum {
 #endif
 #ifdef CONFIG_E500
 	    CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC |
+	    CPU_FTRS_E5500 |
 #endif
 	    0,
 };
 #endif /* __powerpc64__ */
 
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_BOOK3E
+#define CPU_FTRS_ALWAYS		(CPU_FTRS_E5500)
+#else
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
 	    CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & CPU_FTRS_POWER6 &	\
 	    CPU_FTRS_POWER7 & CPU_FTRS_CELL & CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
+#endif
 #else
 enum {
 	CPU_FTRS_ALWAYS =
@@ -513,6 +525,7 @@ enum {
 #endif
 #ifdef CONFIG_E500
 	    CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC &
+	    CPU_FTRS_E5500 &
 #endif
 	    CPU_FTRS_POSSIBLE,
 };
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c9b68d07ac4f..b9602ee06deb 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1973,7 +1973,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x80240000,
 		.cpu_name		= "e5500",
-		.cpu_features		= CPU_FTRS_E500MC,
+		.cpu_features		= CPU_FTRS_E5500,
 		.cpu_user_features	= COMMON_USER_BOOKE,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
 			MMU_FTR_USE_TLBILX,
-- 
cgit v1.2.3


From d51ad91535b75c043f074f093ef913fe20ff2b5e Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 27 May 2010 17:35:12 -0500
Subject: powerpc/e500mc: Remove CPU_FTR_MAYBE_CAN_NAP/CPU_FTR_MAYBE_CAN_DOZE

e500mc does not support the HID0/MSR mechanism that is used by e500_idle
(and there are also issues with waking on certain types of interrupts).

Further, even if napping is never actually enabled, just having
CPU_FTR_CAN_NAP will cause machine_init() to overwrite the board's supplied
ppc_md.power_save().

We drop CPU_FTR_MAYBE_CAN_DOZE becuase we should use 'wait' instead on
e500mc.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index f1fbf6092ed2..1833d1a07e79 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -382,8 +382,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_E500_2	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
 	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
-	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
+#define CPU_FTRS_E500MC	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
 	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL)
 #define CPU_FTRS_E5500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-- 
cgit v1.2.3


From e5462d16f76ad7a9156a82a97fbafba298da9ca6 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 8 Apr 2011 04:20:54 -0500
Subject: powerpc/85xx: disable Suspend support if SMP enabled

We currently dont have CPU Hotplug support working on 85xx so we need to
disable Suspsend support as it will force enabling of CPU Hotplug.

arch/powerpc/kernel/built-in.o: In function `cpu_die': arch/powerpc/kernel/smp.c:702: undefined reference to `start_secondary_resume'
make: *** [.tmp_vmlinux1] Error 1

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b6ff882f695b..8f4d50b0adfa 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -209,7 +209,7 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 	depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
-		   PPC_85xx || PPC_86xx || PPC_PSERIES || 44x || 40x
+		   (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x
 
 config PPC_DCR_NATIVE
 	bool
-- 
cgit v1.2.3


From 6caa15d0b84d2ea688fd31f4f172c8353463e109 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 12 Apr 2011 22:51:04 +0200
Subject: ARM: s3c2440: gta02; Register dfbmcs320 device for BT audio interface

Register the dfbmcs320 device which provides the PCM DAI for the bluetooth
module.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/mach-s3c2440/mach-gta02.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-s3c2440/mach-gta02.c b/arch/arm/mach-s3c2440/mach-gta02.c
index 0db2411ef4bb..716662008ce2 100644
--- a/arch/arm/mach-s3c2440/mach-gta02.c
+++ b/arch/arm/mach-s3c2440/mach-gta02.c
@@ -409,6 +409,10 @@ struct platform_device s3c24xx_pwm_device = {
 	.num_resources	= 0,
 };
 
+static struct platform_device gta02_dfbmcs320_device = {
+	.name = "dfbmcs320",
+};
+
 static struct i2c_board_info gta02_i2c_devs[] __initdata = {
 	{
 		I2C_BOARD_INFO("pcf50633", 0x73),
@@ -523,6 +527,7 @@ static struct platform_device *gta02_devices[] __initdata = {
 	&s3c_device_iis,
 	&samsung_asoc_dma,
 	&s3c_device_i2c0,
+	&gta02_dfbmcs320_device,
 	&gta02_buttons_device,
 	&s3c_device_adc,
 	&s3c_device_ts,
-- 
cgit v1.2.3


From 184748cc50b2dceb8287f9fb657eda48ff8fcfe7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 5 Apr 2011 17:23:39 +0200
Subject: sched: Provide scheduler_ipi() callback in response to
 smp_send_reschedule()

For future rework of try_to_wake_up() we'd like to push part of that
function onto the CPU the task is actually going to run on.

In order to do so we need a generic callback from the existing scheduler IPI.

This patch introduces such a generic callback: scheduler_ipi() and
implements it as a NOP.

BenH notes: PowerPC might use this IPI on offline CPUs under rare conditions!

Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reviewed-by: Frank Rowand <frank.rowand@am.sony.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110405152728.744338123@chello.nl
---
 arch/alpha/kernel/smp.c             |  3 +--
 arch/arm/kernel/smp.c               |  5 +----
 arch/blackfin/mach-common/smp.c     |  3 +++
 arch/cris/arch-v32/kernel/smp.c     | 13 ++++++++-----
 arch/ia64/kernel/irq_ia64.c         |  2 ++
 arch/ia64/xen/irq_xen.c             | 10 +++++++++-
 arch/m32r/kernel/smp.c              |  4 +---
 arch/mips/cavium-octeon/smp.c       |  2 ++
 arch/mips/kernel/smtc.c             |  2 +-
 arch/mips/mti-malta/malta-int.c     |  2 ++
 arch/mips/pmc-sierra/yosemite/smp.c |  4 ++++
 arch/mips/sgi-ip27/ip27-irq.c       |  2 ++
 arch/mips/sibyte/bcm1480/smp.c      |  7 +++----
 arch/mips/sibyte/sb1250/smp.c       |  7 +++----
 arch/mn10300/kernel/smp.c           |  5 +----
 arch/parisc/kernel/smp.c            |  5 +----
 arch/powerpc/kernel/smp.c           |  4 ++--
 arch/s390/kernel/smp.c              |  6 +++---
 arch/sh/kernel/smp.c                |  2 ++
 arch/sparc/kernel/smp_32.c          |  4 +++-
 arch/sparc/kernel/smp_64.c          |  1 +
 arch/tile/kernel/smp.c              |  6 +-----
 arch/um/kernel/smp.c                |  2 +-
 arch/x86/kernel/smp.c               |  5 ++---
 arch/x86/xen/smp.c                  |  5 ++---
 include/linux/sched.h               |  2 ++
 26 files changed, 63 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 42aa078a5e4d..5a621c6d22ab 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
 
 		switch (which) {
 		case IPI_RESCHEDULE:
-			/* Reschedule callback.  Everything to be done
-			   is done by the interrupt return path.  */
+			scheduler_ipi();
 			break;
 
 		case IPI_CALL_FUNC:
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8fe05ad932e4..7a561eb731ea 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 		break;
 
 	case IPI_RESCHEDULE:
-		/*
-		 * nothing more to do - eveything is
-		 * done on the interrupt return path
-		 */
+		scheduler_ipi();
 		break;
 
 	case IPI_CALL_FUNC:
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 6e17a265c4d3..326bb86f4d29 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -164,6 +164,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 	while (msg_queue->count) {
 		msg = &msg_queue->ipi_message[msg_queue->head];
 		switch (msg->type) {
+		case BFIN_IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
 		case BFIN_IPI_CALL_FUNC:
 			spin_unlock_irqrestore(&msg_queue->lock, flags);
 			ipi_call_function(cpu, msg);
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 4c9e3e1ba5d1..66cc75657e2f 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
 
 	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
 
+	if (ipi.vector & IPI_SCHEDULE) {
+		scheduler_ipi();
+	}
 	if (ipi.vector & IPI_CALL) {
-	         func(info);
+		func(info);
 	}
 	if (ipi.vector & IPI_FLUSH_TLB) {
-		     if (flush_mm == FLUSH_ALL)
-			 __flush_tlb_all();
-		     else if (flush_vma == FLUSH_ALL)
+		if (flush_mm == FLUSH_ALL)
+			__flush_tlb_all();
+		else if (flush_vma == FLUSH_ALL)
 			__flush_tlb_mm(flush_mm);
-		     else
+		else
 			__flush_tlb_page(flush_vma, flush_addr);
 	}
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 5b704740f160..782c3a357f24 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -31,6 +31,7 @@
 #include <linux/irq.h>
 #include <linux/ratelimit.h>
 #include <linux/acpi.h>
+#include <linux/sched.h>
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			smp_local_flush_tlb();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else if (unlikely(IS_RESCHEDULE(vector))) {
+			scheduler_ipi();
 			kstat_incr_irqs_this_cpu(irq, desc);
 		} else {
 			ia64_setreg(_IA64_REG_CR_TPR, vector);
diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c
index 108bb858acf2..b279e142c633 100644
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
 static int xen_slab_ready;
 
 #ifdef CONFIG_SMP
+#include <linux/sched.h>
+
 /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
  * it ends up to issue several memory accesses upon percpu data and
  * thus adds unnecessary traffic to other paths.
@@ -99,7 +101,13 @@ static int xen_slab_ready;
 static irqreturn_t
 xen_dummy_handler(int irq, void *dev_id)
 {
+	return IRQ_HANDLED;
+}
 
+static irqreturn_t
+xen_resched_handler(int irq, void *dev_id)
+{
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
 };
 
 static struct irqaction xen_resched_irqaction = {
-	.handler =	xen_dummy_handler,
+	.handler =	xen_resched_handler,
 	.flags =	IRQF_DISABLED,
 	.name =		"resched"
 };
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 31cef20b2996..fc10b39893d4 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
  *
  * Description:  This routine executes on CPU which received
  *               'RESCHEDULE_IPI'.
- *               Rescheduling is processed at the exit of interrupt
- *               operation.
  *
  * Born on Date: 2002.02.05
  *
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
  *==========================================================================*/
 void smp_reschedule_interrupt(void)
 {
-	/* nothing to do */
+	scheduler_ipi();
 }
 
 /*==========================================================================*
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index ba78b21cc8d0..76923eeb58b9 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -44,6 +44,8 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	/* Check if we've been told to flush the icache */
 	if (action & SMP_ICACHE_FLUSH)
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 5a88cc4ccd5a..cedac4633741 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
 
 static void ipi_resched_interrupt(void)
 {
-	/* Return from interrupt should be enough to cause scheduler check */
+	scheduler_ipi();
 }
 
 static void ipi_call_interrupt(void)
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 9027061f0ead..7d93e6fbfa5a 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -309,6 +309,8 @@ static void ipi_call_dispatch(void)
 
 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 {
+	scheduler_ipi();
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index efc9e889b349..2608752898c0 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 
 	case 1:
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
 
 		if (status & 0x2)
 			smp_call_function_interrupt();
+		if (status & 0x4)
+			scheduler_ipi();
 		break;
 	}
 }
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 0a04603d577c..b18b04e48577 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
 #ifdef CONFIG_SMP
 	if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
+		scheduler_ipi();
 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 47b347c992ea..d667875be564 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c00a5cb1128d..38e7f6bd7922 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
 	/* Clear the mailbox to clear the interrupt */
 	____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
 
-	/*
-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
-	 * interrupt will do the reschedule for us
-	 */
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
 
 	if (action & SMP_CALL_FUNCTION)
 		smp_call_function_interrupt();
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 226c826a2194..83fb27912231 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -494,14 +494,11 @@ void smp_send_stop(void)
  * @irq: The interrupt number.
  * @dev_id: The device ID.
  *
- * We need do nothing here, since the scheduling will be effected on our way
- * back through entry.S.
- *
  * Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
  */
 static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
 {
-	/* do nothing */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 69d63d354ef0..828305f19cff 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
 				
 			case IPI_RESCHEDULE:
 				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
-				/*
-				 * Reschedule callback.  Everything to be
-				 * done is done by the interrupt return path.
-				 */
+				scheduler_ipi();
 				break;
 
 			case IPI_CALL_FUNC:
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b0..9f9c204bef69 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -116,7 +116,7 @@ void smp_message_recv(int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case PPC_MSG_RESCHEDULE:
-		/* we notice need_resched on exit */
+		scheduler_ipi();
 		break;
 	case PPC_MSG_CALL_FUNC_SINGLE:
 		generic_smp_call_function_single_interrupt();
@@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data)
 
 static irqreturn_t reschedule_action(int irq, void *data)
 {
-	/* we just need the return path side effect of checking need_resched */
+	scheduler_ipi();
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 63a97db83f96..63c7d9ff220d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
 	/*
 	 * handle bit signal external calls
-	 *
-	 * For the ec_schedule signal we have to do nothing. All the work
-	 * is done automatically when we return from the interrupt.
 	 */
 	bits = xchg(&S390_lowcore.ext_call_fast, 0);
 
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+
 	if (test_bit(ec_call_function, &bits))
 		generic_smp_call_function_interrupt();
 
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 509b36b45115..6207561ea34a 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
 		generic_smp_call_function_interrupt();
 		break;
 	case SMP_MSG_RESCHEDULE:
+		scheduler_ipi();
 		break;
 	case SMP_MSG_FUNCTION_SINGLE:
 		generic_smp_call_function_single_interrupt();
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb70858..f95690c167b6 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -125,7 +125,9 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 
 void smp_send_reschedule(int cpu)
 {
-	/* See sparc64 */
+	/*
+	 * XXX missing reschedule IPI, see scheduler_ipi()
+	 */
 }
 
 void smp_send_stop(void)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 3e94a8c23238..9478da7fdb3e 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu)
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
+	scheduler_ipi();
 }
 
 /* This is a nop because we capture all other cpus
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index a4293102ef81..c52224d5ed45 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
 {
-	/*
-	 * Nothing to do here; when we return from interrupt, the
-	 * rescheduling will occur there. But do bump the interrupt
-	 * profiler count in the meantime.
-	 */
 	__get_cpu_var(irq_stat).irq_resched_count++;
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index 106bf27e2a9a..eefb107d2d73 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
 			break;
 
 		case 'R':
-			set_tsk_need_resched(current);
+			scheduler_ipi();
 			break;
 
 		case 'S':
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 513deac7228d..013e7eba83bb 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
 }
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 void smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 	/*
 	 * KVM uses this interrupt to force a cpu out of guest mode
 	 */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed99..762b46ab14d5 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,13 +46,12 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
 
 /*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back.
  */
 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 {
 	inc_irq_stat(irq_resched_count);
+	scheduler_ipi();
 
 	return IRQ_HANDLED;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ec2c027e92c..758e27afcda5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2189,8 +2189,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+static inline void scheduler_ipi(void) { }
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void scheduler_ipi(void) { }
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
-- 
cgit v1.2.3


From c55fa78b13b32d3f19e19cd0c8b9378fdc09e521 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 8 Nov 2010 14:13:35 -0500
Subject: xen/pci: Add xen_[find|register|unregister]_device_domain_owner
 functions.

When the Xen PCI backend is told to enable or disable MSI/MSI-X functions,
the initial domain performs these operations. The initial domain needs
to know which domain (guest) is going to use the PCI device so when it
makes the appropiate hypercall to retrieve the MSI/MSI-X vector it will
also assign the PCI device to the appropiate domain (guest).

This boils down to us needing a mechanism to find, set and unset the domain
id that will be using the device.

[v2: EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL.]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/pci.h | 16 +++++++++
 arch/x86/pci/xen.c             | 73 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index aa8620989162..4fbda9a3f339 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -15,10 +15,26 @@ static inline int pci_xen_hvm_init(void)
 #endif
 #if defined(CONFIG_XEN_DOM0)
 void __init xen_setup_pirqs(void);
+int xen_find_device_domain_owner(struct pci_dev *dev);
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
+int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
 static inline void __init xen_setup_pirqs(void)
 {
 }
+static inline int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
+static inline int xen_register_device_domain_owner(struct pci_dev *dev,
+						   uint16_t domain)
+{
+	return -1;
+}
+static inline int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	return -1;
+}
 #endif
 
 #if defined(CONFIG_PCI_MSI)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e37b407a0ee8..6075f2d65335 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -461,3 +461,76 @@ void __init xen_setup_pirqs(void)
 	}
 }
 #endif
+
+struct xen_device_domain_owner {
+	domid_t domain;
+	struct pci_dev *dev;
+	struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_domain_list_spinlock);
+static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
+
+static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	list_for_each_entry(owner, &dev_domain_list, list) {
+		if (owner->dev == dev)
+			return owner;
+	}
+	return NULL;
+}
+
+int xen_find_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+	int domain = -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (owner)
+		domain = owner->domain;
+	spin_unlock(&dev_domain_list_spinlock);
+	return domain;
+}
+EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
+
+int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
+{
+	struct xen_device_domain_owner *owner;
+
+	owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
+	if (!owner)
+		return -ENODEV;
+
+	spin_lock(&dev_domain_list_spinlock);
+	if (find_device(dev)) {
+		spin_unlock(&dev_domain_list_spinlock);
+		kfree(owner);
+		return -EEXIST;
+	}
+	owner->domain = domain;
+	owner->dev = dev;
+	list_add_tail(&owner->list, &dev_domain_list);
+	spin_unlock(&dev_domain_list_spinlock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
+
+int xen_unregister_device_domain_owner(struct pci_dev *dev)
+{
+	struct xen_device_domain_owner *owner;
+
+	spin_lock(&dev_domain_list_spinlock);
+	owner = find_device(dev);
+	if (!owner) {
+		spin_unlock(&dev_domain_list_spinlock);
+		return -ENODEV;
+	}
+	list_del(&owner->list);
+	spin_unlock(&dev_domain_list_spinlock);
+	kfree(owner);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
-- 
cgit v1.2.3


From beafbdc1df02877612dc9039c1de0639921fddec Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 14 Apr 2011 11:17:36 -0400
Subject: xen/irq: Check if the PCI device is owned by a domain different than
 DOMID_SELF.

We check if there is a domain owner for the PCI device. In case of failure
(meaning no domain has registered for this device) we make DOMID_SELF the owner.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
[v2: deal with rebasing on v2.6.37-1]
[v3: deal with rebasing on stable/irq.cleanup]
[v4: deal with rebasing on stable/irq.ween_of_nr_irqs]
[v5: deal with rebasing on v2.6.39-rc3]
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
---
 arch/x86/pci/xen.c   | 21 ++++++++++++++++-----
 drivers/xen/events.c | 12 ++++++++----
 include/xen/events.h |  3 ++-
 3 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 6075f2d65335..393981feb12f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -108,7 +108,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		}
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
 					       (type == PCI_CAP_ID_MSIX) ?
-					       "msi-x" : "msi");
+					       "msi-x" : "msi",
+					       DOMID_SELF);
 		if (irq < 0)
 			goto error;
 		dev_dbg(&dev->dev,
@@ -148,7 +149,8 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
 					       (type == PCI_CAP_ID_MSIX) ?
 					       "pcifront-msi-x" :
-					       "pcifront-msi");
+					       "pcifront-msi",
+						DOMID_SELF);
 		if (irq < 0)
 			goto free;
 		i++;
@@ -190,9 +192,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		struct physdev_map_pirq map_irq;
+		domid_t domid;
+
+		domid = ret = xen_find_device_domain_owner(dev);
+		/* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
+		 * hence check ret value for < 0. */
+		if (ret < 0)
+			domid = DOMID_SELF;
 
 		memset(&map_irq, 0, sizeof(map_irq));
-		map_irq.domid = DOMID_SELF;
+		map_irq.domid = domid;
 		map_irq.type = MAP_PIRQ_TYPE_MSI;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
@@ -215,14 +224,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
 		if (ret) {
-			dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
+				 ret, domid);
 			goto out;
 		}
 
 		ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
 					       map_irq.pirq, map_irq.index,
 					       (type == PCI_CAP_ID_MSIX) ?
-					       "msi-x" : "msi");
+					       "msi-x" : "msi",
+						domid);
 		if (ret < 0)
 			goto out;
 	}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 42d6c930cc87..ac0e22826357 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -101,6 +101,7 @@ struct irq_info
 			unsigned short gsi;
 			unsigned char vector;
 			unsigned char flags;
+			uint16_t domid;
 		} pirq;
 	} u;
 };
@@ -184,6 +185,7 @@ static void xen_irq_info_pirq_init(unsigned irq,
 				   unsigned short pirq,
 				   unsigned short gsi,
 				   unsigned short vector,
+				   uint16_t domid,
 				   unsigned char flags)
 {
 	struct irq_info *info = info_for_irq(irq);
@@ -193,6 +195,7 @@ static void xen_irq_info_pirq_init(unsigned irq,
 	info->u.pirq.pirq = pirq;
 	info->u.pirq.gsi = gsi;
 	info->u.pirq.vector = vector;
+	info->u.pirq.domid = domid;
 	info->u.pirq.flags = flags;
 }
 
@@ -655,7 +658,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 		goto out;
 	}
 
-	xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector,
+	xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
 			       shareable ? PIRQ_SHAREABLE : 0);
 
 out:
@@ -680,7 +683,8 @@ int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
 }
 
 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			     int pirq, int vector, const char *name)
+			     int pirq, int vector, const char *name,
+			     domid_t domid)
 {
 	int irq, ret;
 
@@ -693,7 +697,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq,
 				      name);
 
-	xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0);
+	xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
 	ret = irq_set_msi_desc(irq, msidesc);
 	if (ret < 0)
 		goto error_irq;
@@ -722,7 +726,7 @@ int xen_destroy_irq(int irq)
 
 	if (xen_initial_domain()) {
 		unmap_irq.pirq = info->u.pirq.pirq;
-		unmap_irq.domid = DOMID_SELF;
+		unmap_irq.domid = info->u.pirq.domid;
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
 		if (rc) {
 			printk(KERN_WARNING "unmap irq failed %d\n", rc);
diff --git a/include/xen/events.h b/include/xen/events.h
index f1b87ad48ac7..9aecc0b5a0e6 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -85,7 +85,8 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
 /* Bind an PSI pirq to an irq. */
 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			     int pirq, int vector, const char *name);
+			     int pirq, int vector, const char *name,
+			     domid_t domid);
 #endif
 
 /* De-allocates the above mentioned physical interrupt. */
-- 
cgit v1.2.3


From 0d58a2824d777923b2438107053c6e073c9c5ec1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 15 Apr 2011 13:26:40 +0100
Subject: ARM: Add new syscalls

Add syscalls for name_to_handle_at, open_by_handle_at, clock_adjtime
and syncfs.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/unistd.h | 4 ++++
 arch/arm/kernel/calls.S       | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index c891eb76c0e3..87dbe3e21970 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -396,6 +396,10 @@
 #define __NR_fanotify_init		(__NR_SYSCALL_BASE+367)
 #define __NR_fanotify_mark		(__NR_SYSCALL_BASE+368)
 #define __NR_prlimit64			(__NR_SYSCALL_BASE+369)
+#define __NR_name_to_handle_at		(__NR_SYSCALL_BASE+370)
+#define __NR_open_by_handle_at		(__NR_SYSCALL_BASE+371)
+#define __NR_clock_adjtime		(__NR_SYSCALL_BASE+372)
+#define __NR_syncfs			(__NR_SYSCALL_BASE+373)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 5c26eccef998..7fbf28c35bb2 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -379,6 +379,10 @@
 		CALL(sys_fanotify_init)
 		CALL(sys_fanotify_mark)
 		CALL(sys_prlimit64)
+/* 370 */	CALL(sys_name_to_handle_at)
+		CALL(sys_open_by_handle_at)
+		CALL(sys_clock_adjtime)
+		CALL(sys_syncfs)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
-- 
cgit v1.2.3


From b54cd0d5053633373cd3c374aa203024cbf125a0 Mon Sep 17 00:00:00 2001
From: Meelis Roos <mroos@linux.ee>
Date: Mon, 21 Mar 2011 22:47:15 +0200
Subject: [PARISC] fix pacache .size with new binutils

Fix style of flush_user_dcache_range_asm procedure declaration in
arch/parisc/kernel/pacache.s to be consistent with other assembly
procedures.

Signed-off-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 arch/parisc/kernel/pacache.S | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index a85823668cba..93ff3d90edd1 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -817,10 +817,7 @@ ENTRY(purge_kernel_dcache_page)
 	.procend
 ENDPROC(purge_kernel_dcache_page)
 
-
-	.export flush_user_dcache_range_asm
-
-flush_user_dcache_range_asm:
+ENTRY(flush_user_dcache_range_asm)
 	.proc
 	.callinfo NO_CALLS
 	.entry
@@ -839,6 +836,7 @@ flush_user_dcache_range_asm:
 	.exit
 
 	.procend
+ENDPROC(flush_user_dcache_range_asm)
 
 ENTRY(flush_kernel_dcache_range_asm)
 	.proc
-- 
cgit v1.2.3


From d7dd2ff11b7fcd425aca5a875983c862d19a67ae Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Thu, 14 Apr 2011 18:25:21 -0500
Subject: [PARISC] only make executable areas executable

Currently parisc has the whole kernel marked as RWX, meaning any
kernel page at all is eligible to be executed.  This can cause a
theoretical problem on systems with combined I/D TLB because the act
of referencing a page causes a TLB insertion with an executable bit.
This TLB entry may be used by the CPU as the basis for speculating the
page into the I-Cache.  If this speculated page is subsequently used
for a user process, there is the possibility we will get a stale
I-cache line picked up as the binary executes.

As a point of good practise, only mark actual kernel text pages as
executable.  The same has to be done for init_text pages, but they're
converted to data pages (and the I-Cache flushed) when the init memory
is released.

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 arch/parisc/include/asm/pgtable.h |   9 +-
 arch/parisc/kernel/entry.S        |   3 +
 arch/parisc/kernel/head.S         |   5 +-
 arch/parisc/kernel/module.c       |  10 +-
 arch/parisc/kernel/vmlinux.lds.S  |   1 +
 arch/parisc/mm/init.c             | 260 +++++++++++++++++++++-----------------
 6 files changed, 166 insertions(+), 122 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 5d7b8ce9fdf3..22dadeb58695 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -177,7 +177,10 @@ struct vm_area_struct;
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_ACCESSED)
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _PAGE_KERNEL	(_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL_RO	(_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define _PAGE_KERNEL_EXEC	(_PAGE_KERNEL_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX	(_PAGE_KERNEL_EXEC | _PAGE_WRITE)
+#define _PAGE_KERNEL		(_PAGE_KERNEL_RO | _PAGE_WRITE)
 
 /* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds
  * are page-aligned, we don't care about the PAGE_OFFSET bits, except
@@ -208,7 +211,9 @@ struct vm_area_struct;
 #define PAGE_COPY       PAGE_EXECREAD
 #define PAGE_RWX        __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED)
 #define PAGE_KERNEL	__pgprot(_PAGE_KERNEL)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
+#define PAGE_KERNEL_EXEC	__pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RWX	__pgprot(_PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_KERNEL_RO)
 #define PAGE_KERNEL_UNC	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
 #define PAGE_GATEWAY    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ)
 
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index ead8d2a1034c..6f0594439143 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -692,6 +692,9 @@ ENTRY(fault_vector_11)
 END(fault_vector_11)
 
 #endif
+	/* Fault vector is separately protected and *must* be on its own page */
+	.align		PAGE_SIZE
+ENTRY(end_fault_vector)
 
 	.import		handle_interruption,code
 	.import		do_cpu_irq_mask,code
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index 145c5e4caaa0..37aabd772fbb 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -106,8 +106,9 @@ $bss_loop:
 #endif
 
 
-	/* Now initialize the PTEs themselves */
-	ldo		0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */
+	/* Now initialize the PTEs themselves.  We use RWX for
+	 * everything ... it will get remapped correctly later */
+	ldo		0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */
 	ldi		(1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
 	load32		PA(pg0),%r1
 
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index 6e81bb596e5b..cedbbb8b18d9 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -61,8 +61,10 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
 
+#include <asm/pgtable.h>
 #include <asm/unwind.h>
 
 #if 0
@@ -214,7 +216,13 @@ void *module_alloc(unsigned long size)
 {
 	if (size == 0)
 		return NULL;
-	return vmalloc(size);
+	/* using RWX means less protection for modules, but it's
+	 * easier than trying to map the text, data, init_text and
+	 * init_data correctly */
+	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+				    GFP_KERNEL | __GFP_HIGHMEM,
+				    PAGE_KERNEL_RWX, -1,
+				    __builtin_return_address(0));
 }
 
 #ifndef CONFIG_64BIT
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 8f1e4efd143e..bf6a43a322ec 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -134,6 +134,7 @@ SECTIONS
 	. = ALIGN(16384);
 	__init_begin = .;
 	INIT_TEXT_SECTION(16384)
+	. = ALIGN(PAGE_SIZE);
 	INIT_DATA_SECTION(16)
 	/* we have to discard exit text and such at runtime, not link time */
 	.exit.text :
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b7ed8d7a9b33..7e6b4656f3d7 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -369,24 +369,158 @@ static void __init setup_bootmem(void)
 	request_resource(&sysram_resources[0], &pdcdata_resource);
 }
 
+static void __init map_pages(unsigned long start_vaddr,
+			     unsigned long start_paddr, unsigned long size,
+			     pgprot_t pgprot, int force)
+{
+	pgd_t *pg_dir;
+	pmd_t *pmd;
+	pte_t *pg_table;
+	unsigned long end_paddr;
+	unsigned long start_pmd;
+	unsigned long start_pte;
+	unsigned long tmp1;
+	unsigned long tmp2;
+	unsigned long address;
+	unsigned long vaddr;
+	unsigned long ro_start;
+	unsigned long ro_end;
+	unsigned long fv_addr;
+	unsigned long gw_addr;
+	extern const unsigned long fault_vector_20;
+	extern void * const linux_gateway_page;
+
+	ro_start = __pa((unsigned long)_text);
+	ro_end   = __pa((unsigned long)&data_start);
+	fv_addr  = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
+	gw_addr  = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
+
+	end_paddr = start_paddr + size;
+
+	pg_dir = pgd_offset_k(start_vaddr);
+
+#if PTRS_PER_PMD == 1
+	start_pmd = 0;
+#else
+	start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
+#endif
+	start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
+
+	address = start_paddr;
+	vaddr = start_vaddr;
+	while (address < end_paddr) {
+#if PTRS_PER_PMD == 1
+		pmd = (pmd_t *)__pa(pg_dir);
+#else
+		pmd = (pmd_t *)pgd_address(*pg_dir);
+
+		/*
+		 * pmd is physical at this point
+		 */
+
+		if (!pmd) {
+			pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE << PMD_ORDER);
+			pmd = (pmd_t *) __pa(pmd);
+		}
+
+		pgd_populate(NULL, pg_dir, __va(pmd));
+#endif
+		pg_dir++;
+
+		/* now change pmd to kernel virtual addresses */
+
+		pmd = (pmd_t *)__va(pmd) + start_pmd;
+		for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++, pmd++) {
+
+			/*
+			 * pg_table is physical at this point
+			 */
+
+			pg_table = (pte_t *)pmd_address(*pmd);
+			if (!pg_table) {
+				pg_table = (pte_t *)
+					alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE);
+				pg_table = (pte_t *) __pa(pg_table);
+			}
+
+			pmd_populate_kernel(NULL, pmd, __va(pg_table));
+
+			/* now change pg_table to kernel virtual addresses */
+
+			pg_table = (pte_t *) __va(pg_table) + start_pte;
+			for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) {
+				pte_t pte;
+
+				/*
+				 * Map the fault vector writable so we can
+				 * write the HPMC checksum.
+				 */
+				if (force)
+					pte =  __mk_pte(address, pgprot);
+				else if (core_kernel_text(vaddr) &&
+					 address != fv_addr)
+					pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+				else
+#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+				if (address >= ro_start && address < ro_end
+							&& address != fv_addr
+							&& address != gw_addr)
+					pte = __mk_pte(address, PAGE_KERNEL_RO);
+				else
+#endif
+					pte = __mk_pte(address, pgprot);
+
+				if (address >= end_paddr) {
+					if (force)
+						break;
+					else
+						pte_val(pte) = 0;
+				}
+
+				set_pte(pg_table, pte);
+
+				address += PAGE_SIZE;
+				vaddr += PAGE_SIZE;
+			}
+			start_pte = 0;
+
+			if (address >= end_paddr)
+			    break;
+		}
+		start_pmd = 0;
+	}
+}
+
 void free_initmem(void)
 {
 	unsigned long addr;
 	unsigned long init_begin = (unsigned long)__init_begin;
 	unsigned long init_end = (unsigned long)__init_end;
 
-#ifdef CONFIG_DEBUG_KERNEL
+	/* The init text pages are marked R-X.  We have to
+	 * flush the icache and mark them RW-
+	 *
+	 * This is tricky, because map_pages is in the init section.
+	 * Do a dummy remap of the data section first (the data
+	 * section is already PAGE_KERNEL) to pull in the TLB entries
+	 * for map_kernel */
+	map_pages(init_begin, __pa(init_begin), init_end - init_begin,
+		  PAGE_KERNEL_RWX, 1);
+	/* now remap at PAGE_KERNEL since the TLB is pre-primed to execute
+	 * map_pages */
+	map_pages(init_begin, __pa(init_begin), init_end - init_begin,
+		  PAGE_KERNEL, 1);
+
+	/* force the kernel to see the new TLB entries */
+	__flush_tlb_range(0, init_begin, init_end);
 	/* Attempt to catch anyone trying to execute code here
 	 * by filling the page with BRK insns.
 	 */
 	memset((void *)init_begin, 0x00, init_end - init_begin);
+	/* finally dump all the instructions which were cached, since the
+	 * pages are no-longer executable */
 	flush_icache_range(init_begin, init_end);
-#endif
 	
-	/* align __init_begin and __init_end to page size,
-	   ignoring linker script where we might have tried to save RAM */
-	init_begin = PAGE_ALIGN(init_begin);
-	init_end = PAGE_ALIGN(init_end);
 	for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
@@ -616,114 +750,6 @@ void show_mem(unsigned int filter)
 #endif
 }
 
-
-static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot)
-{
-	pgd_t *pg_dir;
-	pmd_t *pmd;
-	pte_t *pg_table;
-	unsigned long end_paddr;
-	unsigned long start_pmd;
-	unsigned long start_pte;
-	unsigned long tmp1;
-	unsigned long tmp2;
-	unsigned long address;
-	unsigned long ro_start;
-	unsigned long ro_end;
-	unsigned long fv_addr;
-	unsigned long gw_addr;
-	extern const unsigned long fault_vector_20;
-	extern void * const linux_gateway_page;
-
-	ro_start = __pa((unsigned long)_text);
-	ro_end   = __pa((unsigned long)&data_start);
-	fv_addr  = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
-	gw_addr  = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
-
-	end_paddr = start_paddr + size;
-
-	pg_dir = pgd_offset_k(start_vaddr);
-
-#if PTRS_PER_PMD == 1
-	start_pmd = 0;
-#else
-	start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
-#endif
-	start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
-
-	address = start_paddr;
-	while (address < end_paddr) {
-#if PTRS_PER_PMD == 1
-		pmd = (pmd_t *)__pa(pg_dir);
-#else
-		pmd = (pmd_t *)pgd_address(*pg_dir);
-
-		/*
-		 * pmd is physical at this point
-		 */
-
-		if (!pmd) {
-			pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER);
-			pmd = (pmd_t *) __pa(pmd);
-		}
-
-		pgd_populate(NULL, pg_dir, __va(pmd));
-#endif
-		pg_dir++;
-
-		/* now change pmd to kernel virtual addresses */
-
-		pmd = (pmd_t *)__va(pmd) + start_pmd;
-		for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) {
-
-			/*
-			 * pg_table is physical at this point
-			 */
-
-			pg_table = (pte_t *)pmd_address(*pmd);
-			if (!pg_table) {
-				pg_table = (pte_t *)
-					alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE);
-				pg_table = (pte_t *) __pa(pg_table);
-			}
-
-			pmd_populate_kernel(NULL, pmd, __va(pg_table));
-
-			/* now change pg_table to kernel virtual addresses */
-
-			pg_table = (pte_t *) __va(pg_table) + start_pte;
-			for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) {
-				pte_t pte;
-
-				/*
-				 * Map the fault vector writable so we can
-				 * write the HPMC checksum.
-				 */
-#if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-				if (address >= ro_start && address < ro_end
-							&& address != fv_addr
-							&& address != gw_addr)
-				    pte = __mk_pte(address, PAGE_KERNEL_RO);
-				else
-#endif
-				    pte = __mk_pte(address, pgprot);
-
-				if (address >= end_paddr)
-					pte_val(pte) = 0;
-
-				set_pte(pg_table, pte);
-
-				address += PAGE_SIZE;
-			}
-			start_pte = 0;
-
-			if (address >= end_paddr)
-			    break;
-		}
-		start_pmd = 0;
-	}
-}
-
 /*
  * pagetable_init() sets up the page tables
  *
@@ -748,14 +774,14 @@ static void __init pagetable_init(void)
 		size = pmem_ranges[range].pages << PAGE_SHIFT;
 
 		map_pages((unsigned long)__va(start_paddr), start_paddr,
-			size, PAGE_KERNEL);
+			  size, PAGE_KERNEL, 0);
 	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_end && initrd_end > mem_limit) {
 		printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end);
 		map_pages(initrd_start, __pa(initrd_start),
-			initrd_end - initrd_start, PAGE_KERNEL);
+			  initrd_end - initrd_start, PAGE_KERNEL, 0);
 	}
 #endif
 
@@ -780,7 +806,7 @@ static void __init gateway_init(void)
 	 */
 
 	map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page),
-		PAGE_SIZE, PAGE_GATEWAY);
+		  PAGE_SIZE, PAGE_GATEWAY, 1);
 }
 
 #ifdef CONFIG_HPUX
-- 
cgit v1.2.3


From b7d45818444a31948cfc7849136013a0ea54b2fb Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 15 Apr 2011 12:37:22 -0500
Subject: [PARISC] prevent speculative re-read on cache flush

According to Appendix F, the TLB is the primary arbiter of speculation.
Thus, if a page has a TLB entry, it may be speculatively read into the
cache.  On linux, this can cause us incoherencies because if we're about
to do a disk read, we call get_user_pages() to do the flush/invalidate
in user space, but we still potentially have the user TLB entries, and
the cache could speculate the lines back into userspace (thus causing
stale data to be used).  This is fixed by purging the TLB entries before
we flush through the tmpalias space.  Now, the only way the line could
be re-speculated is if the user actually tries to touch it (which is not
allowed).

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 arch/parisc/include/asm/cacheflush.h |  5 ++++-
 arch/parisc/kernel/cache.c           | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index d18328b3f938..da601dd34c05 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -3,6 +3,7 @@
 
 #include <linux/mm.h>
 #include <linux/uaccess.h>
+#include <asm/tlbflush.h>
 
 /* The usual comment is "Caches aren't brain-dead on the <architecture>".
  * Unfortunately, that doesn't apply to PA-RISC. */
@@ -112,8 +113,10 @@ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
 static inline void
 flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
 {
-	if (PageAnon(page))
+	if (PageAnon(page)) {
+		flush_tlb_page(vma, vmaddr);
 		flush_dcache_page_asm(page_to_phys(page), vmaddr);
+	}
 }
 
 #ifdef CONFIG_DEBUG_RODATA
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 3f11331c2775..83335f3da5fc 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -304,10 +304,20 @@ void flush_dcache_page(struct page *page)
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		addr = mpnt->vm_start + offset;
 
+		/* The TLB is the engine of coherence on parisc: The
+		 * CPU is entitled to speculate any page with a TLB
+		 * mapping, so here we kill the mapping then flush the
+		 * page along a special flush only alias mapping.
+		 * This guarantees that the page is no-longer in the
+		 * cache for any process and nor may it be
+		 * speculatively read in (until the user or kernel
+		 * specifically accesses it, of course) */
+
+		flush_tlb_page(mpnt, addr);
 		if (old_addr == 0 || (old_addr & (SHMLBA - 1)) != (addr & (SHMLBA - 1))) {
 			__flush_cache_page(mpnt, addr, page_to_phys(page));
 			if (old_addr)
-				printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
+				printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? (char *)mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
 			old_addr = addr;
 		}
 	}
@@ -499,6 +509,7 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 {
 	BUG_ON(!vma->vm_mm->context);
 
+	flush_tlb_page(vma, vmaddr);
 	__flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn)));
 
 }
-- 
cgit v1.2.3


From 1824074b07ee66fa0f714e08579ad85075132d7b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 15 Apr 2011 08:55:44 -0700
Subject: [PARISC] wire up fanotify syscalls

Cc: stable@kernel.org
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 arch/parisc/include/asm/unistd.h   | 4 +++-
 arch/parisc/kernel/sys_parisc32.c  | 8 ++++++++
 arch/parisc/kernel/syscall_table.S | 2 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3eb82c2a5ec3..09f62a6eb069 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -814,8 +814,10 @@
 #define __NR_recvmmsg		(__NR_Linux + 319)
 #define __NR_accept4		(__NR_Linux + 320)
 #define __NR_prlimit64		(__NR_Linux + 321)
+#define __NR_fanotify_init	(__NR_Linux + 322)
+#define __NR_fanotify_mark	(__NR_Linux + 323)
 
-#define __NR_Linux_syscalls	(__NR_prlimit64 + 1)
+#define __NR_Linux_syscalls	(__NR_fanotify_mark + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 88a0ad14a9c9..dc9a62462323 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -228,3 +228,11 @@ asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
         return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
                              ((loff_t)lenhi << 32) | lenlo);
 }
+
+asmlinkage long compat_sys_fanotify_mark(int fan_fd, int flags, u32 mask_hi,
+					 u32 mask_lo, int fd,
+					 const char __user *pathname)
+{
+	return sys_fanotify_mark(fan_fd, flags, ((u64)mask_hi << 32) | mask_lo,
+				 fd, pathname);
+}
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 4be85ee10b85..c5b01e80981a 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -420,6 +420,8 @@
 	ENTRY_COMP(recvmmsg)
 	ENTRY_SAME(accept4)		/* 320 */
 	ENTRY_SAME(prlimit64)
+	ENTRY_SAME(fanotify_init)
+	ENTRY_COMP(fanotify_mark)
 
 	/* Nothing yet */
 
-- 
cgit v1.2.3


From c3f957a22eca106bd28136943305b390b4337ebf Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 15 Apr 2011 08:55:45 -0700
Subject: [PARISC] wire up clock_adjtime syscall

Cc: stable@kernel.org
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 arch/parisc/include/asm/unistd.h   | 3 ++-
 arch/parisc/kernel/syscall_table.S | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 09f62a6eb069..9af5fab2befc 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -816,8 +816,9 @@
 #define __NR_prlimit64		(__NR_Linux + 321)
 #define __NR_fanotify_init	(__NR_Linux + 322)
 #define __NR_fanotify_mark	(__NR_Linux + 323)
+#define __NR_clock_adjtime	(__NR_Linux + 324)
 
-#define __NR_Linux_syscalls	(__NR_fanotify_mark + 1)
+#define __NR_Linux_syscalls	(__NR_clock_adjtime + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index c5b01e80981a..473bf41f2682 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -422,6 +422,7 @@
 	ENTRY_SAME(prlimit64)
 	ENTRY_SAME(fanotify_init)
 	ENTRY_COMP(fanotify_mark)
+	ENTRY_COMP(clock_adjtime)
 
 	/* Nothing yet */
 
-- 
cgit v1.2.3


From a71aae4cec120ee85cf32608fca40a4605461214 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 15 Apr 2011 08:55:46 -0700
Subject: [PARISC] wire up the fhandle syscalls

Cc: stable@kernel.org
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 arch/parisc/include/asm/unistd.h   | 4 +++-
 arch/parisc/kernel/syscall_table.S | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 9af5fab2befc..4266e44a8503 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -817,8 +817,10 @@
 #define __NR_fanotify_init	(__NR_Linux + 322)
 #define __NR_fanotify_mark	(__NR_Linux + 323)
 #define __NR_clock_adjtime	(__NR_Linux + 324)
+#define __NR_name_to_handle_at	(__NR_Linux + 325)
+#define __NR_open_by_handle_at	(__NR_Linux + 326)
 
-#define __NR_Linux_syscalls	(__NR_clock_adjtime + 1)
+#define __NR_Linux_syscalls	(__NR_open_by_handle_at + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 473bf41f2682..b5d298209ecf 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -423,6 +423,8 @@
 	ENTRY_SAME(fanotify_init)
 	ENTRY_COMP(fanotify_mark)
 	ENTRY_COMP(clock_adjtime)
+	ENTRY_SAME(name_to_handle_at)	/* 325 */
+	ENTRY_COMP(open_by_handle_at)
 
 	/* Nothing yet */
 
-- 
cgit v1.2.3


From 2e7bad5f34b5beed47542490c760ed26574e38ba Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 15 Apr 2011 08:55:47 -0700
Subject: [PARISC] wire up syncfs syscall

Cc: stable@kernel.org
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 arch/parisc/include/asm/unistd.h   | 3 ++-
 arch/parisc/kernel/syscall_table.S | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 4266e44a8503..9cbc2c3bf630 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -819,8 +819,9 @@
 #define __NR_clock_adjtime	(__NR_Linux + 324)
 #define __NR_name_to_handle_at	(__NR_Linux + 325)
 #define __NR_open_by_handle_at	(__NR_Linux + 326)
+#define __NR_syncfs		(__NR_Linux + 327)
 
-#define __NR_Linux_syscalls	(__NR_open_by_handle_at + 1)
+#define __NR_Linux_syscalls	(__NR_syncfs + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index b5d298209ecf..a5b02ce4d41e 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -425,6 +425,7 @@
 	ENTRY_COMP(clock_adjtime)
 	ENTRY_SAME(name_to_handle_at)	/* 325 */
 	ENTRY_COMP(open_by_handle_at)
+	ENTRY_SAME(syncfs)
 
 	/* Nothing yet */
 
-- 
cgit v1.2.3


From e9569c1511d2590a27b46b94bafb7acece034e5c Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@cam.ac.uk>
Date: Thu, 14 Apr 2011 12:11:42 +0100
Subject: ARM: 6881/1: cputype.h uses __attribute_const__ which requires
 including kernel.h

Issue manifests as:

In file included from arch/arm/mach-pxa/include/mach/hardware.h:62,
                  from arch/arm/mach-pxa/include/mach/gpio.h:28,
                  from /home/jic23/src/kernel/temp-remove/arch/arm/include/asm/gpio.h:5,
                  from include/linux/gpio.h:7,
                  from drivers/staging/iio/gyro/adis16080_core.c:8:
 /home/jic23/src/kernel/temp-remove/arch/arm/include/asm/cputype.h:57: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'read_cpuid_id'
...

Signed-off-by: Jonathan Cameron <jic23@cam.ac.uk>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cputype.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index ed5bc9e05a4e..cd4458f64171 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -2,6 +2,7 @@
 #define __ASM_ARM_CPUTYPE_H
 
 #include <linux/stringify.h>
+#include <linux/kernel.h>
 
 #define CPUID_ID	0
 #define CPUID_CACHETYPE	1
-- 
cgit v1.2.3


From 7c7a81b53e581d727d069cc45df5510516faac31 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 13 Apr 2011 06:30:08 +0000
Subject: powerpc/kexec: Fix regression causing compile failure on UP

Recent commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce caused
a compile failure on UP because a considerably large block
of the file was included within CONFIG_SMP, hence making a stub
function not exposed on UP builds when it needed to be.

Relocate the stub to the #else /* ! CONFIG_SMP */ section
and also annotate the relevant else/endif so that nobody
else falls into the same trap I did.

Reported-by: Michael Guntsche <mike@it-loops.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 3d3d416339dd..5b5e1f002a8e 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -163,7 +163,7 @@ static void crash_kexec_prepare_cpus(int cpu)
 }
 
 /* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP)
+#ifdef CONFIG_PPC_STD_MMU_64
 static void crash_kexec_wait_realmode(int cpu)
 {
 	unsigned int msecs;
@@ -188,9 +188,7 @@ static void crash_kexec_wait_realmode(int cpu)
 	}
 	mb();
 }
-#else
-static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif
+#endif	/* CONFIG_PPC_STD_MMU_64 */
 
 /*
  * This function will be called by secondary cpus or by kexec cpu
@@ -235,7 +233,9 @@ void crash_kexec_secondary(struct pt_regs *regs)
 	crash_ipi_callback(regs);
 }
 
-#else
+#else	/* ! CONFIG_SMP */
+static inline void crash_kexec_wait_realmode(int cpu) {}
+
 static void crash_kexec_prepare_cpus(int cpu)
 {
 	/*
@@ -255,7 +255,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 {
 	cpus_in_sr = CPU_MASK_NONE;
 }
-#endif
+#endif	/* CONFIG_SMP */
 
 /*
  * Register a function to be called on shutdown.  Only use this if you
-- 
cgit v1.2.3


From 127493d5dc73589cbe00ea5ec8357cc2a4c0d82a Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 13 Apr 2011 19:45:59 +0000
Subject: powerpc/pseries: Use a kmem cache for DTL buffers

PAPR specifies that DTL buffers can not cross AMS environments (aka CMO
in the PAPR) and can not cross a memory entitlement granule boundary
(4k). This is found in section 14.11.3.2 H_REGISTER_VPA of the PAPR.
kmalloc does not guarantee an alignment of the allocation, though,
beyond 8 bytes (at least in my understanding). Create a special kmem
cache for DTL buffers with the alignment requirement.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/setup.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 000724149089..6c42cfde8415 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -287,14 +287,22 @@ static int alloc_dispatch_logs(void)
 	int cpu, ret;
 	struct paca_struct *pp;
 	struct dtl_entry *dtl;
+	struct kmem_cache *dtl_cache;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
+	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+						DISPATCH_LOG_BYTES, 0, NULL);
+	if (!dtl_cache) {
+		pr_warn("Failed to create dispatch trace log buffer cache\n");
+		pr_warn("Stolen time statistics will be unreliable\n");
+		return 0;
+	}
+
 	for_each_possible_cpu(cpu) {
 		pp = &paca[cpu];
-		dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
-				   cpu_to_node(cpu));
+		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
 		if (!dtl) {
 			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
 				cpu);
-- 
cgit v1.2.3


From 84ffae55af79d7b8834fd0c08d0d1ebf2c77f91e Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 7 Apr 2011 21:44:21 +0000
Subject: powerpc: Fix oops if scan_dispatch_log is called too early

We currently enable interrupts before the dispatch log for the boot
cpu is setup. If a timer interrupt comes in early enough we oops in
scan_dispatch_log:

Unable to handle kernel paging request for data at address 0x00000010

...

.scan_dispatch_log+0xb0/0x170
.account_system_vtime+0xa0/0x220
.irq_enter+0x88/0xc0
.do_IRQ+0x48/0x230

The patch below adds a check to scan_dispatch_log to ensure the
dispatch log has been allocated.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: <stable@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 375480c56eb9..f33acfd872ad 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -229,6 +229,9 @@ static u64 scan_dispatch_log(u64 stop_tb)
 	u64 stolen = 0;
 	u64 dtb;
 
+	if (!dtl)
+		return 0;
+
 	if (i == vpa->dtl_idx)
 		return 0;
 	while (i < vpa->dtl_idx) {
-- 
cgit v1.2.3


From 09597cfe93d3cc2c6e064a3ead5956b882511560 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 14 Apr 2011 23:49:53 +0000
Subject: powerpc: Don't write protect kernel text with CONFIG_DYNAMIC_FTRACE
 enabled

This problem was noticed on an MPC855T platform. Ftrace did oops
when trying to write to the kernel text segment.

Many thanks to Joakim for finding the root cause of this problem.

Signed-off-by: Stefan Roese <sr@denx.de>
Cc: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pte-common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 811f04ac3660..8d1569c29042 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -162,7 +162,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
  * on platforms where such control is possible.
  */
 #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-	defined(CONFIG_KPROBES)
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
 #define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
 #else
 #define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
-- 
cgit v1.2.3


From 86c74ab317c1ef4d37325e0d7ca8a01a796b0bd7 Mon Sep 17 00:00:00 2001
From: Eric B Munson <emunson@mgebm.net>
Date: Fri, 15 Apr 2011 08:12:30 +0000
Subject: powerpc/perf_event: Skip updating kernel counters if register value
 shrinks

Because of speculative event roll back, it is possible for some event coutners
to decrease between reads on POWER7.  This causes a problem with the way that
counters are updated.  Delta calues are calculated in a 64 bit value and the
top 32 bits are masked.  If the register value has decreased, this leaves us
with a very large positive value added to the kernel counters.  This patch
protects against this by skipping the update if the delta would be negative.
This can lead to a lack of precision in the coutner values, but from my testing
the value is typcially fewer than 10 samples at a time.

Signed-off-by: Eric B Munson <emunson@mgebm.net>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/perf_event.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c4063b7f49a0..822f63008ae1 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -398,6 +398,25 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
 	return 0;
 }
 
+static u64 check_and_compute_delta(u64 prev, u64 val)
+{
+	u64 delta = (val - prev) & 0xfffffffful;
+
+	/*
+	 * POWER7 can roll back counter values, if the new value is smaller
+	 * than the previous value it will cause the delta and the counter to
+	 * have bogus values unless we rolled a counter over.  If a coutner is
+	 * rolled back, it will be smaller, but within 256, which is the maximum
+	 * number of events to rollback at once.  If we dectect a rollback
+	 * return 0.  This can lead to a small lack of precision in the
+	 * counters.
+	 */
+	if (prev > val && (prev - val) < 256)
+		delta = 0;
+
+	return delta;
+}
+
 static void power_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
@@ -416,10 +435,11 @@ static void power_pmu_read(struct perf_event *event)
 		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		val = read_pmc(event->hw.idx);
+		delta = check_and_compute_delta(prev, val);
+		if (!delta)
+			return;
 	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
-	/* The counters are only 32 bits wide */
-	delta = (val - prev) & 0xfffffffful;
 	local64_add(delta, &event->count);
 	local64_sub(delta, &event->hw.period_left);
 }
@@ -449,8 +469,9 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
 		prev = local64_read(&event->hw.prev_count);
 		event->hw.idx = 0;
-		delta = (val - prev) & 0xfffffffful;
-		local64_add(delta, &event->count);
+		delta = check_and_compute_delta(prev, val);
+		if (delta)
+			local64_add(delta, &event->count);
 	}
 }
 
@@ -458,14 +479,16 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
 				  unsigned long pmc5, unsigned long pmc6)
 {
 	struct perf_event *event;
-	u64 val;
+	u64 val, prev;
 	int i;
 
 	for (i = 0; i < cpuhw->n_limited; ++i) {
 		event = cpuhw->limited_counter[i];
 		event->hw.idx = cpuhw->limited_hwidx[i];
 		val = (event->hw.idx == 5) ? pmc5 : pmc6;
-		local64_set(&event->hw.prev_count, val);
+		prev = local64_read(&event->hw.prev_count);
+		if (check_and_compute_delta(prev, val))
+			local64_set(&event->hw.prev_count, val);
 		perf_event_update_userpage(event);
 	}
 }
@@ -1197,7 +1220,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
-	delta = (val - prev) & 0xfffffffful;
+	delta = check_and_compute_delta(prev, val);
 	local64_add(delta, &event->count);
 
 	/*
-- 
cgit v1.2.3


From 7b84b29b8c2711fe64e0dba4db22f02ce0f16015 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 18 Apr 2011 15:46:35 +1000
Subject: powerpc/powermac: Build fix with SMP and CPU hotplug

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powermac/smp.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index a830c5e80657..bc5f0dc6ae1e 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -842,6 +842,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr)
 	mpic_setup_this_cpu();
 }
 
+#ifdef CONFIG_PPC64
 #ifdef CONFIG_HOTPLUG_CPU
 static int smp_core99_cpu_notify(struct notifier_block *self,
 				 unsigned long action, void *hcpu)
@@ -879,7 +880,6 @@ static struct notifier_block __cpuinitdata smp_core99_cpu_nb = {
 
 static void __init smp_core99_bringup_done(void)
 {
-#ifdef CONFIG_PPC64
 	extern void g5_phy_disable_cpu1(void);
 
 	/* Close i2c bus if it was used for tb sync */
@@ -894,14 +894,14 @@ static void __init smp_core99_bringup_done(void)
 		set_cpu_present(1, false);
 		g5_phy_disable_cpu1();
 	}
-#endif /* CONFIG_PPC64 */
-
 #ifdef CONFIG_HOTPLUG_CPU
 	register_cpu_notifier(&smp_core99_cpu_nb);
 #endif
+
 	if (ppc_md.progress)
 		ppc_md.progress("smp_core99_bringup_done", 0x349);
 }
+#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_HOTPLUG_CPU
 
@@ -975,7 +975,9 @@ static void pmac_cpu_die(void)
 struct smp_ops_t core99_smp_ops = {
 	.message_pass	= smp_mpic_message_pass,
 	.probe		= smp_core99_probe,
+#ifdef CONFIG_PPC64
 	.bringup_done	= smp_core99_bringup_done,
+#endif
 	.kick_cpu	= smp_core99_kick_cpu,
 	.setup_cpu	= smp_core99_setup_cpu,
 	.give_timebase	= smp_core99_give_timebase,
-- 
cgit v1.2.3


From cf8d91633ddef9e816ccbf3da833c79ce508988d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 28 Feb 2011 17:58:48 -0500
Subject: xen/p2m/m2p/gnttab: Support GNTMAP_host_map in the M2P override.

We only supported the M2P (and P2M) override only for the
GNTMAP_contains_pte type mappings. Meaning that we grants
operations would "contain the machine address of the PTE to update"
If the flag is unset, then the grant operation is
"contains a host virtual address". The latter case means that
the Hypervisor takes care of updating our page table
(specifically the PTE entry) with the guest's MFN. As such we should
not try to do anything with the PTE. Previous to this patch
we would try to clear the PTE which resulted in Xen hypervisor
being upset with us:

(XEN) mm.c:1066:d0 Attempt to implicitly unmap a granted PTE c0100000ccc59067
(XEN) domain_crash called from mm.c:1067
(XEN) Domain 0 (vcpu#0) crashed on cpu#3:
(XEN) ----[ Xen-4.0-110228  x86_64  debug=y  Not tainted ]----

and crashing us.

This patch allows us to inhibit the PTE clearing in the PV guest
if the GNTMAP_contains_pte is not set.

On the m2p_remove_override path we provide the same parameter.

Sadly in the grant-table driver we do not have a mechanism to
tell m2p_remove_override whether to clear the PTE or not. Since
the grant-table driver is used by user-space, we can safely assume
that it operates only on PTE's. Hence the implementation for
it to work on !GNTMAP_contains_pte returns -EOPNOTSUPP. In the future
we can implement the support for this. It will require some extra
accounting structure to keep track of the page[i], and the flag.

[v1: Added documentation details, made it return -EOPNOTSUPP instead
 of trying to do a half-way implementation]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/page.h |  5 +++--
 arch/x86/xen/p2m.c              | 10 ++++------
 drivers/xen/grant-table.c       | 31 ++++++++++++++++++++++++-------
 3 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index c61934fbf22a..64a619d47d34 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -47,8 +47,9 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 					     unsigned long pfn_e);
 
-extern int m2p_add_override(unsigned long mfn, struct page *page);
-extern int m2p_remove_override(struct page *page);
+extern int m2p_add_override(unsigned long mfn, struct page *page,
+			    bool clear_pte);
+extern int m2p_remove_override(struct page *page, bool clear_pte);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141eb0de8b06..2d2b32af3a1d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -650,7 +650,7 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page)
+int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -662,7 +662,6 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	if (!PageHighMem(page)) {
 		address = (unsigned long)__va(pfn << PAGE_SHIFT);
 		ptep = lookup_address(address, &level);
-
 		if (WARN(ptep == NULL || level != PG_LEVEL_4K,
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
@@ -674,10 +673,9 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
 		return -ENOMEM;
 
-	if (!PageHighMem(page))
+	if (clear_pte && !PageHighMem(page))
 		/* Just zap old mapping for now */
 		pte_clear(&init_mm, address, ptep);
-
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -685,7 +683,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
 	return 0;
 }
 
-int m2p_remove_override(struct page *page)
+int m2p_remove_override(struct page *page, bool clear_pte)
 {
 	unsigned long flags;
 	unsigned long mfn;
@@ -713,7 +711,7 @@ int m2p_remove_override(struct page *page)
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
 	set_phys_to_machine(pfn, page->index);
 
-	if (!PageHighMem(page))
+	if (clear_pte && !PageHighMem(page))
 		set_pte_at(&init_mm, address, ptep,
 				pfn_pte(pfn, PAGE_KERNEL));
 		/* No tlb flush necessary because the caller already
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 3745a318defc..fd725cde6ad1 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -466,13 +466,30 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		if (map_ops[i].status)
 			continue;
 
-		/* m2p override only supported for GNTMAP_contains_pte mappings */
-		if (!(map_ops[i].flags & GNTMAP_contains_pte))
-			continue;
-		pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+		if (map_ops[i].flags & GNTMAP_contains_pte) {
+			pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
 				(map_ops[i].host_addr & ~PAGE_MASK));
-		mfn = pte_mfn(*pte);
-		ret = m2p_add_override(mfn, pages[i]);
+			mfn = pte_mfn(*pte);
+		} else {
+			/* If you really wanted to do this:
+			 * mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
+			 *
+			 * The reason we do not implement it is b/c on the
+			 * unmap path (gnttab_unmap_refs) we have no means of
+			 * checking whether the page is !GNTMAP_contains_pte.
+			 *
+			 * That is without some extra data-structure to carry
+			 * the struct page, bool clear_pte, and list_head next
+			 * tuples and deal with allocation/delallocation, etc.
+			 *
+			 * The users of this API set the GNTMAP_contains_pte
+			 * flag so lets just return not supported until it
+			 * becomes neccessary to implement.
+			 */
+			return -EOPNOTSUPP;
+		}
+		ret = m2p_add_override(mfn, pages[i],
+				       map_ops[i].flags & GNTMAP_contains_pte);
 		if (ret)
 			return ret;
 	}
@@ -494,7 +511,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		return ret;
 
 	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i]);
+		ret = m2p_remove_override(pages[i], true /* clear the PTE */);
 		if (ret)
 			return ret;
 	}
-- 
cgit v1.2.3


From af289bfe15fc92ecfbf6d8312713815b33e452c0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 18 Apr 2011 15:45:44 +0200
Subject: x86, gart: Convert spaces to tabs in enable_gart_translation

Probably by copy&paste this function was indented by spaces.
Convert this to tabs.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/1303134346-5805-3-git-send-email-joerg.roedel@amd.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/gart.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 43085bfc99c3..88c1ebee0db2 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -75,17 +75,17 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
 {
 	u32 tmp, ctl;
 
-        /* address of the mappings table */
-        addr >>= 12;
-        tmp = (u32) addr<<4;
-        tmp &= ~0xf;
-        pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
-
-        /* Enable GART translation for this hammer. */
-        pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
-        ctl |= GARTEN;
-        ctl &= ~(DISGARTCPU | DISGARTIO);
-        pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+	/* address of the mappings table */
+	addr >>= 12;
+	tmp = (u32) addr<<4;
+	tmp &= ~0xf;
+	pci_write_config_dword(dev, AMD64_GARTTABLEBASE, tmp);
+
+	/* Enable GART translation for this hammer. */
+	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
+	ctl |= GARTEN;
+	ctl &= ~(DISGARTCPU | DISGARTIO);
+	pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 }
 
 static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size)
-- 
cgit v1.2.3


From c34151a742d84ae65db2088ea30495063f697fbe Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 18 Apr 2011 15:45:45 +0200
Subject: x86, gart: Set DISTLBWALKPRB bit always

The DISTLBWALKPRB bit must be set for the GART because the
gatt table is mapped UC. But the current code does not set
the bit at boot when the BIOS setup the aperture correctly.
Fix that by setting this bit when enabling the GART instead
of the other places.

Cc: <stable@kernel.org>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/1303134346-5805-4-git-send-email-joerg.roedel@amd.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/gart.h   | 4 ++--
 arch/x86/kernel/aperture_64.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 88c1ebee0db2..156cd5d18d2a 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -66,7 +66,7 @@ static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
 	 * Don't enable translation but enable GART IO and CPU accesses.
 	 * Also, set DISTLBWALKPRB since GART tables memory is UC.
 	 */
-	ctl = DISTLBWALKPRB | order << 1;
+	ctl = order << 1;
 
 	pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 }
@@ -83,7 +83,7 @@ static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
 
 	/* Enable GART translation for this hammer. */
 	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
-	ctl |= GARTEN;
+	ctl |= GARTEN | DISTLBWALKPRB;
 	ctl &= ~(DISGARTCPU | DISGARTIO);
 	pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
 }
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 86d1ad4962a7..73fb469908c6 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -499,7 +499,7 @@ out:
 		 * Don't enable translation yet but enable GART IO and CPU
 		 * accesses and set DISTLBWALKPRB since GART table memory is UC.
 		 */
-		u32 ctl = DISTLBWALKPRB | aper_order << 1;
+		u32 ctl = aper_order << 1;
 
 		bus = amd_nb_bus_dev_ranges[i].bus;
 		dev_base = amd_nb_bus_dev_ranges[i].dev_base;
-- 
cgit v1.2.3


From 665d3e2af83c8fbd149534db8f57d82fa6fa6753 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 18 Apr 2011 15:45:46 +0200
Subject: x86, gart: Make sure GART does not map physmem above 1TB

The GART can only map physical memory below 1TB. Make sure
the gart driver in the kernel does not try to map memory
above 1TB.

Cc: <stable@kernel.org>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/1303134346-5805-5-git-send-email-joerg.roedel@amd.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/pci-gart_64.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 82ada01625b9..b117efd24f71 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -81,6 +81,9 @@ static u32 gart_unmapped_entry;
 #define AGPEXTERN
 #endif
 
+/* GART can only remap to physical addresses < 1TB */
+#define GART_MAX_PHYS_ADDR	(1ULL << 40)
+
 /* backdoor interface to AGP driver */
 AGPEXTERN int agp_memory_reserved;
 AGPEXTERN __u32 *agp_gatt_table;
@@ -212,9 +215,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 				size_t size, int dir, unsigned long align_mask)
 {
 	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
-	unsigned long iommu_page = alloc_iommu(dev, npages, align_mask);
+	unsigned long iommu_page;
 	int i;
 
+	if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
+		return bad_dma_addr;
+
+	iommu_page = alloc_iommu(dev, npages, align_mask);
 	if (iommu_page == -1) {
 		if (!nonforced_iommu(dev, phys_mem, size))
 			return phys_mem;
-- 
cgit v1.2.3


From c387aa3a1a910ce00b86f3a85082d24f144db256 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 18 Apr 2011 15:45:43 +0200
Subject: x86, gart: Don't enforce GART aperture lower-bound by alignment

This patch changes the allocation of the GART aperture to
enforce only natural alignment instead of aligning it on
512MB. This big alignment was used to force the GART
aperture to be over 512MB. This is enforced by using 512MB
as the lower-bound address in the allocation range.

[ hpa: The actual number 512 MiB needs to be revisited, too. ]

Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Link: http://lkml.kernel.org/r/1303134346-5805-2-git-send-email-joerg.roedel@amd.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/aperture_64.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 73fb469908c6..3d2661ca6542 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -30,6 +30,22 @@
 #include <asm/amd_nb.h>
 #include <asm/x86_init.h>
 
+/*
+ * Using 512M as goal, in case kexec will load kernel_big
+ * that will do the on-position decompress, and could overlap with
+ * with the gart aperture that is used.
+ * Sequence:
+ * kernel_small
+ * ==> kexec (with kdump trigger path or gart still enabled)
+ * ==> kernel_small (gart area become e820_reserved)
+ * ==> kexec (with kdump trigger path or gart still enabled)
+ * ==> kerne_big (uncompressed size will be big than 64M or 128M)
+ * So don't use 512M below as gart iommu, leave the space for kernel
+ * code for safe.
+ */
+#define GART_MIN_ADDR	(512ULL << 20)
+#define GART_MAX_ADDR	(1ULL   << 32)
+
 int gart_iommu_aperture;
 int gart_iommu_aperture_disabled __initdata;
 int gart_iommu_aperture_allowed __initdata;
@@ -70,21 +86,9 @@ static u32 __init allocate_aperture(void)
 	 * memory. Unfortunately we cannot move it up because that would
 	 * make the IOMMU useless.
 	 */
-	/*
-	 * using 512M as goal, in case kexec will load kernel_big
-	 * that will do the on position decompress, and  could overlap with
-	 * that position with gart that is used.
-	 * sequende:
-	 * kernel_small
-	 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
-	 * ==> kernel_small(gart area become e820_reserved)
-	 * ==> kexec (with kdump trigger path or previous doesn't shutdown gart)
-	 * ==> kerne_big (uncompressed size will be big than 64M or 128M)
-	 * so don't use 512M below as gart iommu, leave the space for kernel
-	 * code for safe
-	 */
-	addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
-	if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+	addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
+				      aper_size, aper_size);
+	if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
 		printk(KERN_ERR
 			"Cannot allocate aperture memory hole (%lx,%uK)\n",
 				addr, aper_size>>10);
-- 
cgit v1.2.3


From bcb22a94f68cde70e820dc7280d1c731e8e695f7 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sun, 3 Apr 2011 22:47:23 -0300
Subject: ARM: mx5/mx53_loco: Fix build warning related to gpio_keys_button
 structure

Fix the following warning:

CC      arch/arm/mach-mx5/board-mx53_loco.o
arch/arm/mach-mx5/board-mx53_loco.c:203: warning: initialization discards qualifiers from pointer target type

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-mx5/board-mx53_loco.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-mx5/board-mx53_loco.c b/arch/arm/mach-mx5/board-mx53_loco.c
index 10a1bea10548..6206b1191fe8 100644
--- a/arch/arm/mach-mx5/board-mx53_loco.c
+++ b/arch/arm/mach-mx5/board-mx53_loco.c
@@ -193,7 +193,7 @@ static iomux_v3_cfg_t mx53_loco_pads[] = {
 	.wakeup		= wake,					\
 }
 
-static const struct gpio_keys_button loco_buttons[] __initconst = {
+static struct gpio_keys_button loco_buttons[] = {
 	GPIO_BUTTON(MX53_LOCO_POWER, KEY_POWER, 1, "power", 0),
 	GPIO_BUTTON(MX53_LOCO_UI1, KEY_VOLUMEUP, 1, "volume-up", 0),
 	GPIO_BUTTON(MX53_LOCO_UI2, KEY_VOLUMEDOWN, 1, "volume-down", 0),
-- 
cgit v1.2.3


From b1e7734f024c9ce4393016a97c8d821e1f18d9b4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 18 Apr 2011 15:18:02 -0700
Subject: x86, percpu: Use ASM_NOP4 instead of hardcoding P6_NOP4

For use in assembly constants, use the ASM_NOP* defines.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1303166160-10315-2-git-send-email-hpa@linux.intel.com
---
 arch/x86/include/asm/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d475b4398d8b..751e7f3f705c 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -517,7 +517,7 @@ do {									\
 	typeof(o2) __o2 = o2;						\
 	typeof(o2) __n2 = n2;						\
 	typeof(o2) __dummy;						\
-	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,	\
+	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP4,	\
 		       "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",	\
 		       X86_FEATURE_CX16,				\
 		       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),		\
-- 
cgit v1.2.3


From dc326fca2b640fc41aed7c015d0f456935a66255 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 18 Apr 2011 15:19:51 -0700
Subject: x86, cpu: Clean up and unify the NOP selection infrastructure

Clean up and unify the NOP selection infrastructure:

- Make the atomic 5-byte NOP a part of the selection system.
- Pick NOPs once during early boot and then be done with it.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jason Baron <jbaron@redhat.com>
Link: http://lkml.kernel.org/r/1303166160-10315-3-git-send-email-hpa@linux.intel.com
---
 arch/x86/include/asm/alternative.h |   8 --
 arch/x86/include/asm/nops.h        | 146 ++++++++++++++++-------------
 arch/x86/kernel/alternative.c      | 182 ++++++++++++++++++++-----------------
 arch/x86/kernel/ftrace.c           |   4 +-
 arch/x86/kernel/jump_label.c       |   5 +-
 arch/x86/kernel/setup.c            |   6 +-
 6 files changed, 190 insertions(+), 161 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13009d1af99a..7da168225a9d 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -191,12 +191,4 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-#define IDEAL_NOP_SIZE_5 5
-extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
-extern void arch_init_ideal_nop5(void);
-#else
-static inline void arch_init_ideal_nop5(void) {}
-#endif
-
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index af788496020b..405b4032a60b 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -1,7 +1,13 @@
 #ifndef _ASM_X86_NOPS_H
 #define _ASM_X86_NOPS_H
 
-/* Define nops for use with alternative() */
+/*
+ * Define nops for use with alternative() and for tracing.
+ *
+ * *_NOP5_ATOMIC must be a single instruction.
+ */
+
+#define NOP_DS_PREFIX 0x3e
 
 /* generic versions from gas
    1: nop
@@ -13,14 +19,15 @@
    6: leal 0x00000000(%esi),%esi
    7: leal 0x00000000(,%esi,1),%esi
 */
-#define GENERIC_NOP1 ".byte 0x90\n"
-#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
-#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
+#define GENERIC_NOP1 0x90
+#define GENERIC_NOP2 0x89,0xf6
+#define GENERIC_NOP3 0x8d,0x76,0x00
+#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
+#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
+#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
+#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
 
 /* Opteron 64bit nops
    1: nop
@@ -29,13 +36,14 @@
    4: osp osp osp nop
 */
 #define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2	".byte 0x66,0x90\n"
-#define K8_NOP3	".byte 0x66,0x66,0x90\n"
-#define K8_NOP4	".byte 0x66,0x66,0x66,0x90\n"
-#define K8_NOP5	K8_NOP3 K8_NOP2
-#define K8_NOP6	K8_NOP3 K8_NOP3
-#define K8_NOP7	K8_NOP4 K8_NOP3
-#define K8_NOP8	K8_NOP4 K8_NOP4
+#define K8_NOP2	0x66,K8_NOP1
+#define K8_NOP3	0x66,K8_NOP2
+#define K8_NOP4	0x66,K8_NOP3
+#define K8_NOP5	K8_NOP3,K8_NOP2
+#define K8_NOP6	K8_NOP3,K8_NOP3
+#define K8_NOP7	K8_NOP4,K8_NOP3
+#define K8_NOP8	K8_NOP4,K8_NOP4
+#define K8_NOP5_ATOMIC 0x66,K8_NOP4
 
 /* K7 nops
    uses eax dependencies (arbitrary choice)
@@ -47,13 +55,14 @@
    7: leal 0x00000000(,%eax,1),%eax
 */
 #define K7_NOP1	GENERIC_NOP1
-#define K7_NOP2	".byte 0x8b,0xc0\n"
-#define K7_NOP3	".byte 0x8d,0x04,0x20\n"
-#define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
-#define K7_NOP5	K7_NOP4 ASM_NOP1
-#define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7	".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8	K7_NOP7 ASM_NOP1
+#define K7_NOP2	0x8b,0xc0
+#define K7_NOP3	0x8d,0x04,0x20
+#define K7_NOP4	0x8d,0x44,0x20,0x00
+#define K7_NOP5	K7_NOP4,K7_NOP1
+#define K7_NOP6	0x8d,0x80,0,0,0,0
+#define K7_NOP7	0x8D,0x04,0x05,0,0,0,0
+#define K7_NOP8	K7_NOP7,K7_NOP1
+#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
 
 /* P6 nops
    uses eax dependencies (Intel-recommended choice)
@@ -69,52 +78,65 @@
 	There is kernel code that depends on this.
 */
 #define P6_NOP1	GENERIC_NOP1
-#define P6_NOP2	".byte 0x66,0x90\n"
-#define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-#define P6_NOP4	".byte 0x0f,0x1f,0x40,0\n"
-#define P6_NOP5	".byte 0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP6	".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
-#define P6_NOP7	".byte 0x0f,0x1f,0x80,0,0,0,0\n"
-#define P6_NOP8	".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
+#define P6_NOP2	0x66,0x90
+#define P6_NOP3	0x0f,0x1f,0x00
+#define P6_NOP4	0x0f,0x1f,0x40,0
+#define P6_NOP5	0x0f,0x1f,0x44,0x00,0
+#define P6_NOP6	0x66,0x0f,0x1f,0x44,0x00,0
+#define P6_NOP7	0x0f,0x1f,0x80,0,0,0,0
+#define P6_NOP8	0x0f,0x1f,0x84,0x00,0,0,0,0
+#define P6_NOP5_ATOMIC P6_NOP5
+
+#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
 
 #if defined(CONFIG_MK7)
-#define ASM_NOP1 K7_NOP1
-#define ASM_NOP2 K7_NOP2
-#define ASM_NOP3 K7_NOP3
-#define ASM_NOP4 K7_NOP4
-#define ASM_NOP5 K7_NOP5
-#define ASM_NOP6 K7_NOP6
-#define ASM_NOP7 K7_NOP7
-#define ASM_NOP8 K7_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
 #elif defined(CONFIG_X86_P6_NOP)
-#define ASM_NOP1 P6_NOP1
-#define ASM_NOP2 P6_NOP2
-#define ASM_NOP3 P6_NOP3
-#define ASM_NOP4 P6_NOP4
-#define ASM_NOP5 P6_NOP5
-#define ASM_NOP6 P6_NOP6
-#define ASM_NOP7 P6_NOP7
-#define ASM_NOP8 P6_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
 #elif defined(CONFIG_X86_64)
-#define ASM_NOP1 K8_NOP1
-#define ASM_NOP2 K8_NOP2
-#define ASM_NOP3 K8_NOP3
-#define ASM_NOP4 K8_NOP4
-#define ASM_NOP5 K8_NOP5
-#define ASM_NOP6 K8_NOP6
-#define ASM_NOP7 K8_NOP7
-#define ASM_NOP8 K8_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
 #else
-#define ASM_NOP1 GENERIC_NOP1
-#define ASM_NOP2 GENERIC_NOP2
-#define ASM_NOP3 GENERIC_NOP3
-#define ASM_NOP4 GENERIC_NOP4
-#define ASM_NOP5 GENERIC_NOP5
-#define ASM_NOP6 GENERIC_NOP6
-#define ASM_NOP7 GENERIC_NOP7
-#define ASM_NOP8 GENERIC_NOP8
+#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
+#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
 #endif
 
 #define ASM_NOP_MAX 8
+#define NOP_ATOMIC5 (ASM_NOP_MAX+1)	/* Entry for the 5-byte atomic NOP */
+
+#ifndef __ASSEMBLY__
+extern const unsigned char * const *ideal_nops;
+extern void arch_init_ideal_nops(void);
+#endif
 
 #endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..846f61eb89c1 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -67,17 +67,30 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #define DPRINTK(fmt, args...) if (debug_alternative) \
 	printk(KERN_DEBUG fmt, args)
 
+/*
+ * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
+ * that correspond to that nop. Getting from one nop to the next, we
+ * add to the array the offset that is equal to the sum of all sizes of
+ * nops preceding the one we are after.
+ *
+ * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
+ * nice symmetry of sizes of the previous nops.
+ */
 #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
-/* Use inline assembly to define this because the nops are defined
-   as inline assembly strings in the include files and we cannot
-   get them easily into strings. */
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
-	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
-	GENERIC_NOP7 GENERIC_NOP8
-    "\t.previous");
-extern const unsigned char intelnops[];
-static const unsigned char *const __initconst_or_module
-intel_nops[ASM_NOP_MAX+1] = {
+static const unsigned char intelnops[] =
+{
+	GENERIC_NOP1,
+	GENERIC_NOP2,
+	GENERIC_NOP3,
+	GENERIC_NOP4,
+	GENERIC_NOP5,
+	GENERIC_NOP6,
+	GENERIC_NOP7,
+	GENERIC_NOP8,
+	GENERIC_NOP5_ATOMIC
+};
+static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	intelnops,
 	intelnops + 1,
@@ -87,17 +100,25 @@ intel_nops[ASM_NOP_MAX+1] = {
 	intelnops + 1 + 2 + 3 + 4 + 5,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #ifdef K8_NOP1
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
-	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-	K8_NOP7 K8_NOP8
-    "\t.previous");
-extern const unsigned char k8nops[];
-static const unsigned char *const __initconst_or_module
-k8_nops[ASM_NOP_MAX+1] = {
+static const unsigned char k8nops[] =
+{
+	K8_NOP1,
+	K8_NOP2,
+	K8_NOP3,
+	K8_NOP4,
+	K8_NOP5,
+	K8_NOP6,
+	K8_NOP7,
+	K8_NOP8,
+	K8_NOP5_ATOMIC
+};
+static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	k8nops,
 	k8nops + 1,
@@ -107,17 +128,25 @@ k8_nops[ASM_NOP_MAX+1] = {
 	k8nops + 1 + 2 + 3 + 4 + 5,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #if defined(K7_NOP1) && !defined(CONFIG_X86_64)
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
-	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-	K7_NOP7 K7_NOP8
-    "\t.previous");
-extern const unsigned char k7nops[];
-static const unsigned char *const __initconst_or_module
-k7_nops[ASM_NOP_MAX+1] = {
+static const unsigned char k7nops[] =
+{
+	K7_NOP1,
+	K7_NOP2,
+	K7_NOP3,
+	K7_NOP4,
+	K7_NOP5,
+	K7_NOP6,
+	K7_NOP7,
+	K7_NOP8,
+	K7_NOP5_ATOMIC
+};
+static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	k7nops,
 	k7nops + 1,
@@ -127,17 +156,25 @@ k7_nops[ASM_NOP_MAX+1] = {
 	k7nops + 1 + 2 + 3 + 4 + 5,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
 #ifdef P6_NOP1
-asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
-	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
-	P6_NOP7 P6_NOP8
-    "\t.previous");
-extern const unsigned char p6nops[];
-static const unsigned char *const __initconst_or_module
-p6_nops[ASM_NOP_MAX+1] = {
+static const unsigned char  __initconst_or_module p6nops[] =
+{
+	P6_NOP1,
+	P6_NOP2,
+	P6_NOP3,
+	P6_NOP4,
+	P6_NOP5,
+	P6_NOP6,
+	P6_NOP7,
+	P6_NOP8,
+	P6_NOP5_ATOMIC
+};
+static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
+{
 	NULL,
 	p6nops,
 	p6nops + 1,
@@ -147,47 +184,53 @@ p6_nops[ASM_NOP_MAX+1] = {
 	p6nops + 1 + 2 + 3 + 4 + 5,
 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 };
 #endif
 
+/* Initialize these to a safe default */
 #ifdef CONFIG_X86_64
+const unsigned char * const *ideal_nops = p6_nops;
+#else
+const unsigned char * const *ideal_nops = intel_nops;
+#endif
 
-extern char __vsyscall_0;
-static const unsigned char *const *__init_or_module find_nop_table(void)
+void __init arch_init_ideal_nops(void)
 {
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-	    boot_cpu_has(X86_FEATURE_NOPL))
-		return p6_nops;
-	else
-		return k8_nops;
-}
-
-#else /* CONFIG_X86_64 */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_has(X86_FEATURE_NOPL)) {
+			   ideal_nops = p6_nops;
+		} else {
+#ifdef CONFIG_X86_64
+			ideal_nops = k8_nops;
+#else
+			ideal_nops = intel_nops;
+#endif
+		}
 
-static const unsigned char *const *__init_or_module find_nop_table(void)
-{
-	if (boot_cpu_has(X86_FEATURE_K8))
-		return k8_nops;
-	else if (boot_cpu_has(X86_FEATURE_K7))
-		return k7_nops;
-	else if (boot_cpu_has(X86_FEATURE_NOPL))
-		return p6_nops;
-	else
-		return intel_nops;
+	default:
+#ifdef CONFIG_X86_64
+		ideal_nops = k8_nops;
+#else
+		if (boot_cpu_has(X86_FEATURE_K8))
+			ideal_nops = k8_nops;
+		else if (boot_cpu_has(X86_FEATURE_K7))
+			ideal_nops = k7_nops;
+		else
+			ideal_nops = intel_nops;
+#endif
+	}
 }
 
-#endif /* CONFIG_X86_64 */
-
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
 static void __init_or_module add_nops(void *insns, unsigned int len)
 {
-	const unsigned char *const *noptable = find_nop_table();
-
 	while (len > 0) {
 		unsigned int noplen = len;
 		if (noplen > ASM_NOP_MAX)
 			noplen = ASM_NOP_MAX;
-		memcpy(insns, noptable[noplen], noplen);
+		memcpy(insns, ideal_nops[noplen], noplen);
 		insns += noplen;
 		len -= noplen;
 	}
@@ -195,6 +238,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
+extern char __vsyscall_0;
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 /* Replace instructions with better alternatives for this CPU type.
@@ -678,29 +722,3 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
 	wrote_text = 0;
 	__stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
 }
-
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
-
-#ifdef CONFIG_X86_64
-unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
-#else
-unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
-#endif
-
-void __init arch_init_ideal_nop5(void)
-{
-	/*
-	 * There is no good nop for all x86 archs.  This selection
-	 * algorithm should be unified with the one in find_nop_table(),
-	 * but this should be good enough for now.
-	 *
-	 * For cases other than the ones below, use the safe (as in
-	 * always functional) defaults above.
-	 */
-#ifdef CONFIG_X86_64
-	/* Don't use these on 32 bits due to broken virtualizers */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		memcpy(ideal_nop5, p6_nops[5], 5);
-#endif
-}
-#endif
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index a93742a57468..0ba15a6cc57e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -260,9 +260,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	return mod_code_status;
 }
 
-static unsigned char *ftrace_nop_replace(void)
+static const unsigned char *ftrace_nop_replace(void)
 {
-	return ideal_nop5;
+	return ideal_nops[NOP_ATOMIC5];
 }
 
 static int
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 961b6b30ba90..3fee346ef545 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -34,7 +34,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 		code.offset = entry->target -
 				(entry->code + JUMP_LABEL_NOP_SIZE);
 	} else
-		memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
 	get_online_cpus();
 	mutex_lock(&text_mutex);
 	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
@@ -44,7 +44,8 @@ void arch_jump_label_transform(struct jump_entry *entry,
 
 void arch_jump_label_text_poke_early(jump_label_t addr)
 {
-	text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
+	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
+			JUMP_LABEL_NOP_SIZE);
 }
 
 #endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 5a0484a95ad6..390a663894d8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow);
 
 void __init setup_arch(char **cmdline_p)
 {
-	unsigned long flags;
-
 #ifdef CONFIG_X86_32
 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
 	visws_early_detect();
@@ -1036,9 +1034,7 @@ void __init setup_arch(char **cmdline_p)
 
 	mcheck_init();
 
-	local_irq_save(flags);
-	arch_init_ideal_nop5();
-	local_irq_restore(flags);
+	arch_init_ideal_nops();
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3


From d8d9766c8c29f71c37bc4b74cc9fcf6a192c9bfd Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 18 Apr 2011 15:31:57 -0700
Subject: x86, cpu: Change NOP selection for certain Intel CPUs

Due to a decoder implementation quirk, some specific Intel CPUs
actually perform better with the "k8_nops" than with the
SDM-recommended NOPs.  For runtime-selected NOPs, if we detect those
specific CPUs then use the k8_nops instead of the ones we would
normally use.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jason Baron <jbaron@redhat.com>
Link: http://lkml.kernel.org/r/1303166160-10315-4-git-send-email-hpa@linux.intel.com
---
 arch/x86/kernel/alternative.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 846f61eb89c1..c0501ea6b634 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -199,7 +199,19 @@ void __init arch_init_ideal_nops(void)
 {
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_INTEL:
-		if (boot_cpu_has(X86_FEATURE_NOPL)) {
+		/*
+		 * Due to a decoder implementation quirk, some
+		 * specific Intel CPUs actually perform better with
+		 * the "k8_nops" than with the SDM-recommended NOPs.
+		 */
+		if (boot_cpu_data.x86 == 6 &&
+		    boot_cpu_data.x86_model >= 0x0f &&
+		    boot_cpu_data.x86_model != 0x1c &&
+		    boot_cpu_data.x86_model != 0x26 &&
+		    boot_cpu_data.x86_model != 0x27 &&
+		    boot_cpu_data.x86_model < 0x30) {
+			ideal_nops = k8_nops;
+		} else if (boot_cpu_has(X86_FEATURE_NOPL)) {
 			   ideal_nops = p6_nops;
 		} else {
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From 83112e688f5f05dea1e63787db9a6c16b2887a1d Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Sat, 16 Apr 2011 02:27:53 +0200
Subject: perf, x86: Fix pre-defined cache-misses event for AMD family 15h cpus

With AMD cpu family 15h a unit mask was introduced for the Data Cache
Miss event (0x041/L1-dcache-load-misses). We need to enable bit 0
(first data cache miss or streaming store to a 64 B cache line) of
this mask to proper count data cache misses.

Now we set this bit for all families and models. In case a PMU does
not implement a unit mask for event 0x041 the bit is ignored.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1302913676-14352-2-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 461f62bbd774..4e1613845b9f 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids
  [ C(L1D) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
-- 
cgit v1.2.3


From 855357a21744e488cbee23a47d2b124035160a87 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Sat, 16 Apr 2011 02:27:54 +0200
Subject: perf, x86: Fix AMD family 15h FPU event constraints

Depending on the unit mask settings some FPU events may be scheduled
only on cpu counter #3. This patch fixes this.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@googlemail.com>
Link: http://lkml.kernel.org/r/1302913676-14352-3-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_amd.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4e1613845b9f..cf4e369cea67 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -427,7 +427,9 @@ static __initconst const struct x86_pmu amd_pmu = {
  *
  * Exceptions:
  *
+ * 0x000	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
  * 0x003	FP	PERF_CTL[3]
+ * 0x004	FP	PERF_CTL[3], PERF_CTL[5:3] (*)
  * 0x00B	FP	PERF_CTL[3]
  * 0x00D	FP	PERF_CTL[3]
  * 0x023	DE	PERF_CTL[2:0]
@@ -448,6 +450,8 @@ static __initconst const struct x86_pmu amd_pmu = {
  * 0x0DF	LS	PERF_CTL[5:0]
  * 0x1D6	EX	PERF_CTL[5:0]
  * 0x1D8	EX	PERF_CTL[5:0]
+ *
+ * (*) depending on the umask all FPU counters may be used
  */
 
 static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
@@ -460,18 +464,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
 static struct event_constraint *
 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
-	unsigned int event_code = amd_get_event_code(&event->hw);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int event_code = amd_get_event_code(hwc);
 
 	switch (event_code & AMD_EVENT_TYPE_MASK) {
 	case AMD_EVENT_FP:
 		switch (event_code) {
+		case 0x000:
+			if (!(hwc->config & 0x0000F000ULL))
+				break;
+			if (!(hwc->config & 0x00000F00ULL))
+				break;
+			return &amd_f15_PMC3;
+		case 0x004:
+			if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+				break;
+			return &amd_f15_PMC3;
 		case 0x003:
 		case 0x00B:
 		case 0x00D:
 			return &amd_f15_PMC3;
-		default:
-			return &amd_f15_PMC53;
 		}
+		return &amd_f15_PMC53;
 	case AMD_EVENT_LS:
 	case AMD_EVENT_DC:
 	case AMD_EVENT_EX_LS:
-- 
cgit v1.2.3


From c8e5910edf8bbe2e5c6c35a4ef2a578cc7893b25 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Sat, 16 Apr 2011 02:27:55 +0200
Subject: perf, x86: Use ALTERNATIVE() to check for X86_FEATURE_PERFCTR_CORE

Using ALTERNATIVE() when checking for X86_FEATURE_PERFCTR_CORE avoids
an extra pointer chase and data cache hit.

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1302913676-14352-4-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index eed3673a8656..224a84f7080c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,6 +31,7 @@
 #include <asm/nmi.h>
 #include <asm/compat.h>
 #include <asm/smp.h>
+#include <asm/alternative.h>
 
 #if 0
 #undef wrmsrl
@@ -363,12 +364,18 @@ again:
 	return new_raw_count;
 }
 
-/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
 static inline int x86_pmu_addr_offset(int index)
 {
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-		return index << 1;
-	return index;
+	int offset;
+
+	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
+	alternative_io(ASM_NOP2,
+		       "shll $1, %%eax",
+		       X86_FEATURE_PERFCTR_CORE,
+		       "=a" (offset),
+		       "a"  (index));
+
+	return offset;
 }
 
 static inline unsigned int x86_pmu_config_addr(int index)
-- 
cgit v1.2.3


From 1286eeb2fd22ddb4c56390f957e854ec06bab9fd Mon Sep 17 00:00:00 2001
From: Benoit Cousson <b-cousson@ti.com>
Date: Tue, 19 Apr 2011 10:15:36 -0600
Subject: OMAP2+: hwmod data: Fix wrong dma_system end address

OMAP2420, 2430 and 3xxx were using the OMAP4 end address
that unfortunately is not located at the same base address.
Moreover the OMAP4 size was set to 256 instead of 4096.

Change all .pa_end to set them to .pa_start + 0xfff

Cc: "G, Manjunath Kondaiah" <manjugk@ti.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Reported-by: Michael Fillinger <m-fillinger@ti.com>
Signed-off-by: Benoit Cousson <b-cousson@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/omap_hwmod_2420_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_2430_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 2 +-
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index 8eb3ce1bbfbe..f50e56a8115b 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -1782,7 +1782,7 @@ static struct omap_hwmod_irq_info omap2420_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap2420_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index e6e3810db77f..96753f79a754 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -1915,7 +1915,7 @@ static struct omap_hwmod_irq_info omap2430_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap2430_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index b98e2dfcba28..9bee15575988 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2386,7 +2386,7 @@ static struct omap_hwmod_irq_info omap3xxx_dma_system_irqs[] = {
 static struct omap_hwmod_addr_space omap3xxx_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x48056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x48056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 3e88dd3f8ef3..abc548a0c98d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -885,7 +885,7 @@ static struct omap_hwmod_ocp_if *omap44xx_dma_system_masters[] = {
 static struct omap_hwmod_addr_space omap44xx_dma_system_addrs[] = {
 	{
 		.pa_start	= 0x4a056000,
-		.pa_end		= 0x4a0560ff,
+		.pa_end		= 0x4a056fff,
 		.flags		= ADDR_TYPE_RT
 	},
 };
-- 
cgit v1.2.3


From 2b398bd9f8f73be706b41adcbb240ce95793049a Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Thu, 14 Apr 2011 14:36:08 +0800
Subject: x86, apic: Print verbose error interrupt reason on apic=debug

End users worry about the error interrupt printout we generate
currently:

	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
	smp_processor_id(), v , v1);

... and would like to know the reason why error interrupts are generated.

This patch prints out more detailed debug information.

Another practical problem is that dynamic debug is not initialized yet
when the APIC initializes, so the pr_debug() will not output the error
interrupt debug information on bootup. In this patch, we use
apic_printk(APIC_DEBUG, ...), so the apic=debug boot option will print
verbose error interupts during bootup.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: hpa@linux.intel.com
Cc: suresh.b.siddha@intel.com
Cc: yong.y.wang@linux.intel.com
Cc: jbaron@redhat.com
Cc: trenn@suse.de
Cc: kent.liu@intel.com
Cc: chaohong.guo@intel.com
Link: http://lkml.kernel.org/r/1302762968-24380-2-git-send-email-youquan.song@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/apic/apic.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fabf01eff771..ae147126b7b7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1812,30 +1812,41 @@ void smp_spurious_interrupt(struct pt_regs *regs)
  */
 void smp_error_interrupt(struct pt_regs *regs)
 {
-	u32 v, v1;
+	u32 v0, v1;
+	u32 i = 0;
+	static const char * const error_interrupt_reason[] = {
+		"Send CS error",		/* APIC Error Bit 0 */
+		"Receive CS error",		/* APIC Error Bit 1 */
+		"Send accept error",		/* APIC Error Bit 2 */
+		"Receive accept error",		/* APIC Error Bit 3 */
+		"Redirectable IPI",		/* APIC Error Bit 4 */
+		"Send illegal vector",		/* APIC Error Bit 5 */
+		"Received illegal vector",	/* APIC Error Bit 6 */
+		"Illegal register address",	/* APIC Error Bit 7 */
+	};
 
 	exit_idle();
 	irq_enter();
 	/* First tickle the hardware, only then report what went on. -- REW */
-	v = apic_read(APIC_ESR);
+	v0 = apic_read(APIC_ESR);
 	apic_write(APIC_ESR, 0);
 	v1 = apic_read(APIC_ESR);
 	ack_APIC_irq();
 	atomic_inc(&irq_err_count);
 
-	/*
-	 * Here is what the APIC error bits mean:
-	 * 0: Send CS error
-	 * 1: Receive CS error
-	 * 2: Send accept error
-	 * 3: Receive accept error
-	 * 4: Reserved
-	 * 5: Send illegal vector
-	 * 6: Received illegal vector
-	 * 7: Illegal register address
-	 */
-	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
-		smp_processor_id(), v , v1);
+	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
+		    smp_processor_id(), v0 , v1);
+
+	v1 = v1 & 0xff;
+	while (v1) {
+		if (v1 & 0x1)
+			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+		i++;
+		v1 >>= 1;
+	};
+
+	apic_printk(APIC_DEBUG, KERN_CONT "\n");
+
 	irq_exit();
 }
 
-- 
cgit v1.2.3


From 7b70bd3441437b7bc04fc9d321e17c8ed0e8f958 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 18 Apr 2011 16:00:21 +0200
Subject: x86, MCE: Do not taint when handling correctable errors

Correctable errors are considered something rather normal on
modern hardware these days. Even more importantly, correctable
errors mean exactly that - they've been corrected by the
hardware - and there's no need to taint the kernel since
execution hasn't been compromised so far.

Also, drop tainting in the thermal throttling code for a similar
reason: crossing a thermal threshold does not mean corruption.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Nagananda Chumbalkar <Nagananda.Chumbalkar@hp.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Russ Anderson <rja@sgi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1303135222-17118-1-git-send-email-bp@amd64.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce.c         | 1 -
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3385ea26f684..68e230327d65 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -590,7 +590,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
 			mce_log(&m);
 			atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
-			add_taint(TAINT_MACHINE_CHECK);
 		}
 
 		/*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..5846a797b970 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -187,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level)
 				this_cpu,
 				level == CORE_LEVEL ? "Core" : "Package",
 				state->count);
-
-		add_taint(TAINT_MACHINE_CHECK);
 		return 1;
 	}
 	if (old_event) {
@@ -393,7 +391,6 @@ static void unexpected_thermal_interrupt(void)
 {
 	printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
 			smp_processor_id());
-	add_taint(TAINT_MACHINE_CHECK);
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-- 
cgit v1.2.3


From 2ea4db65be3c4027ed39da73e1b6a59c8aa6c7c9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 Apr 2011 22:52:58 +0200
Subject: xtensa: Fixup irq conversion fallout and nmi_count

Some unnamed moron fatfingered the arguments of the irq chip callbacks
to irq_chip instead of irq_data.

While at it remove the nmi_count() print in arch_show_interrupts()
which has been broken before the irq conversion already.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/xtensa/kernel/irq.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index d77089df412e..4340ee076bd5 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -64,47 +64,41 @@ asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	int j;
-
-	seq_printf(p, "%*s: ", prec, "NMI");
-	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", nmi_count(j));
-	seq_putc(p, '\n');
 	seq_printf(p, "%*s: ", prec, "ERR");
 	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
 	return 0;
 }
 
-static void xtensa_irq_mask(struct irq_chip *d)
+static void xtensa_irq_mask(struct irq_data *d)
 {
 	cached_irq_mask &= ~(1 << d->irq);
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void xtensa_irq_unmask(struct irq_chip *d)
+static void xtensa_irq_unmask(struct irq_data *d)
 {
 	cached_irq_mask |= 1 << d->irq;
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void xtensa_irq_enable(struct irq_chip *d)
+static void xtensa_irq_enable(struct irq_data *d)
 {
 	variant_irq_enable(d->irq);
 	xtensa_irq_unmask(d->irq);
 }
 
-static void xtensa_irq_disable(struct irq_chip *d)
+static void xtensa_irq_disable(struct irq_data *d)
 {
 	xtensa_irq_mask(d->irq);
 	variant_irq_disable(d->irq);
 }
 
-static void xtensa_irq_ack(struct irq_chip *d)
+static void xtensa_irq_ack(struct irq_data *d)
 {
 	set_sr(1 << d->irq, INTCLEAR);
 }
 
-static int xtensa_irq_retrigger(struct irq_chip *d)
+static int xtensa_irq_retrigger(struct irq_data *d)
 {
 	set_sr (1 << d->irq, INTSET);
 	return 1;
-- 
cgit v1.2.3


From 19234c0819da0e043a02710488dfd9b242b42eba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 20 Apr 2011 00:36:11 +0200
Subject: PM: Add missing syscore_suspend() and syscore_resume() calls

Device suspend/resume infrastructure is used not only by the suspend
and hibernate code in kernel/power, but also by APM, Xen and the
kexec jump feature.  However, commit 40dc166cb5dddbd36aa4ad11c03915ea
(PM / Core: Introduce struct syscore_ops for core subsystems PM)
failed to add syscore_suspend() and syscore_resume() calls to that
code, which generally leads to breakage when the features in question
are used.

To fix this problem, add the missing syscore_suspend() and
syscore_resume() calls to arch/x86/kernel/apm_32.c, kernel/kexec.c
and drivers/xen/manage.c.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
 arch/x86/kernel/apm_32.c | 5 +++++
 drivers/base/syscore.c   | 2 ++
 drivers/xen/manage.c     | 9 ++++++++-
 kernel/kexec.c           | 7 +++++++
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0b4be431c620..adee12e0da1f 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -228,6 +228,7 @@
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
 #include <linux/acpi.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -1238,6 +1239,7 @@ static int suspend(int vetoable)
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
+	syscore_suspend();
 
 	local_irq_enable();
 
@@ -1255,6 +1257,7 @@ static int suspend(int vetoable)
 		apm_error("suspend", err);
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
 
+	syscore_resume();
 	sysdev_resume();
 	local_irq_enable();
 
@@ -1280,6 +1283,7 @@ static void standby(void)
 
 	local_irq_disable();
 	sysdev_suspend(PMSG_SUSPEND);
+	syscore_suspend();
 	local_irq_enable();
 
 	err = set_system_power_state(APM_STATE_STANDBY);
@@ -1287,6 +1291,7 @@ static void standby(void)
 		apm_error("standby", err);
 
 	local_irq_disable();
+	syscore_resume();
 	sysdev_resume();
 	local_irq_enable();
 
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index 90af2943f9e4..c126db3cb7d1 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -73,6 +73,7 @@ int syscore_suspend(void)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(syscore_suspend);
 
 /**
  * syscore_resume - Execute all the registered system core resume callbacks.
@@ -95,6 +96,7 @@ void syscore_resume(void)
 				"Interrupts enabled after %pF\n", ops->resume);
 		}
 }
+EXPORT_SYMBOL_GPL(syscore_resume);
 #endif /* CONFIG_PM_SLEEP */
 
 /**
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 1ac94125bf93..a2eee574784e 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -8,6 +8,7 @@
 #include <linux/sysrq.h>
 #include <linux/stop_machine.h>
 #include <linux/freezer.h>
+#include <linux/syscore_ops.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -70,8 +71,13 @@ static int xen_suspend(void *data)
 	BUG_ON(!irqs_disabled());
 
 	err = sysdev_suspend(PMSG_FREEZE);
+	if (!err) {
+		err = syscore_suspend();
+		if (err)
+			sysdev_resume();
+	}
 	if (err) {
-		printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n",
+		printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n",
 			err);
 		return err;
 	}
@@ -95,6 +101,7 @@ static int xen_suspend(void *data)
 		xen_timer_resume();
 	}
 
+	syscore_resume();
 	sysdev_resume();
 
 	return 0;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 55936f9cb251..87b77de03dd3 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -33,6 +33,7 @@
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
 #include <linux/kmsg_dump.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -1532,6 +1533,11 @@ int kernel_kexec(void)
 		local_irq_disable();
 		/* Suspend system devices */
 		error = sysdev_suspend(PMSG_FREEZE);
+		if (!error) {
+			error = syscore_suspend();
+			if (error)
+				sysdev_resume();
+		}
 		if (error)
 			goto Enable_irqs;
 	} else
@@ -1546,6 +1552,7 @@ int kernel_kexec(void)
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
+		syscore_resume();
 		sysdev_resume();
  Enable_irqs:
 		local_irq_enable();
-- 
cgit v1.2.3


From 5cf4c80a145d79db928ff55226e731de55c87644 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Tue, 19 Apr 2011 16:52:56 -0700
Subject: davinci: fix DEBUG_LL code for p2v changes

Fixup davinci UART low-level debug code for new ARM generic p2v changes.

Based on OMAP changes by Tony Lindgren

Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kevin Hilman <khilman@ti.com>
---
 arch/arm/mach-davinci/include/mach/debug-macro.S | 13 ++++++++-----
 arch/arm/mach-davinci/include/mach/serial.h      |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/include/mach/debug-macro.S b/arch/arm/mach-davinci/include/mach/debug-macro.S
index 9f1befc5ac38..f8b7ea4f6235 100644
--- a/arch/arm/mach-davinci/include/mach/debug-macro.S
+++ b/arch/arm/mach-davinci/include/mach/debug-macro.S
@@ -24,6 +24,9 @@
 
 #define UART_SHIFT	2
 
+#define davinci_uart_v2p(x)	((x) - PAGE_OFFSET + PLAT_PHYS_OFFSET)
+#define davinci_uart_p2v(x)	((x) - PLAT_PHYS_OFFSET + PAGE_OFFSET)
+
 		.pushsection .data
 davinci_uart_phys:	.word	0
 davinci_uart_virt:	.word	0
@@ -34,7 +37,7 @@ davinci_uart_virt:	.word	0
 		/* Use davinci_uart_phys/virt if already configured */
 10:		mrc	p15, 0, \rp, c1, c0
 		tst	\rp, #1			@ MMU enabled?
-		ldreq	\rp, =__virt_to_phys(davinci_uart_phys)
+		ldreq	\rp, =davinci_uart_v2p(davinci_uart_phys)
 		ldrne	\rp, =davinci_uart_phys
 		add	\rv, \rp, #4		@ davinci_uart_virt
 		ldr	\rp, [\rp, #0]
@@ -48,18 +51,18 @@ davinci_uart_virt:	.word	0
 		tst	\rp, #1			@ MMU enabled?
 
 		/* Copy uart phys address from decompressor uart info */
-		ldreq	\rv, =__virt_to_phys(davinci_uart_phys)
+		ldreq	\rv, =davinci_uart_v2p(davinci_uart_phys)
 		ldrne	\rv, =davinci_uart_phys
 		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =__phys_to_virt(DAVINCI_UART_INFO)
+		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
 		ldr	\rp, [\rp, #0]
 		str	\rp, [\rv]
 
 		/* Copy uart virt address from decompressor uart info */
-		ldreq	\rv, =__virt_to_phys(davinci_uart_virt)
+		ldreq	\rv, =davinci_uart_v2p(davinci_uart_virt)
 		ldrne	\rv, =davinci_uart_virt
 		ldreq	\rp, =DAVINCI_UART_INFO
-		ldrne	\rp, =__phys_to_virt(DAVINCI_UART_INFO)
+		ldrne	\rp, =davinci_uart_p2v(DAVINCI_UART_INFO)
 		ldr	\rp, [\rp, #4]
 		str	\rp, [\rv]
 
diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h
index 8051110b8ac3..c9e6ce185a66 100644
--- a/arch/arm/mach-davinci/include/mach/serial.h
+++ b/arch/arm/mach-davinci/include/mach/serial.h
@@ -22,7 +22,7 @@
  *
  * This area sits just below the page tables (see arch/arm/kernel/head.S).
  */
-#define DAVINCI_UART_INFO	(PHYS_OFFSET + 0x3ff8)
+#define DAVINCI_UART_INFO	(PLAT_PHYS_OFFSET + 0x3ff8)
 
 #define DAVINCI_UART0_BASE	(IO_PHYS + 0x20000)
 #define DAVINCI_UART1_BASE	(IO_PHYS + 0x20400)
-- 
cgit v1.2.3


From 0b05ac6e24807f0c26f763b3a546c0bcbf84125f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 4 Apr 2011 13:46:58 +1000
Subject: powerpc/xics: Rewrite XICS driver

This is a significant rework of the XICS driver, too significant to
conveniently break it up into a series of smaller patches to be honest.

The driver is moved to a more generic location to allow new platforms
to use it, and is broken up into separate ICP and ICS "backends". For
now we have the native and "hypervisor" ICP backends and one common
RTAS ICS backend.

The driver supports one ICP backend instanciation, and many ICS ones,
in order to accomodate future platforms with multiple possibly different
interrupt "sources" mechanisms.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/irq.h                  |   6 +
 arch/powerpc/include/asm/xics.h                 | 139 ++++
 arch/powerpc/platforms/pseries/Kconfig          |   5 +-
 arch/powerpc/platforms/pseries/Makefile         |   1 -
 arch/powerpc/platforms/pseries/hotplug-cpu.c    |   3 +-
 arch/powerpc/platforms/pseries/kexec.c          |   5 +-
 arch/powerpc/platforms/pseries/plpar_wrappers.h |  27 -
 arch/powerpc/platforms/pseries/setup.c          |   8 +-
 arch/powerpc/platforms/pseries/smp.c            |  17 +-
 arch/powerpc/platforms/pseries/xics.c           | 949 ------------------------
 arch/powerpc/platforms/pseries/xics.h           |  23 -
 arch/powerpc/sysdev/Kconfig                     |   3 +
 arch/powerpc/sysdev/Makefile                    |   4 +
 arch/powerpc/sysdev/xics/Kconfig                |  12 +
 arch/powerpc/sysdev/xics/Makefile               |   6 +
 arch/powerpc/sysdev/xics/icp-hv.c               | 184 +++++
 arch/powerpc/sysdev/xics/icp-native.c           | 312 ++++++++
 arch/powerpc/sysdev/xics/ics-rtas.c             | 229 ++++++
 arch/powerpc/sysdev/xics/xics-common.c          | 461 ++++++++++++
 19 files changed, 1377 insertions(+), 1017 deletions(-)
 create mode 100644 arch/powerpc/include/asm/xics.h
 delete mode 100644 arch/powerpc/platforms/pseries/xics.c
 delete mode 100644 arch/powerpc/platforms/pseries/xics.h
 create mode 100644 arch/powerpc/sysdev/xics/Kconfig
 create mode 100644 arch/powerpc/sysdev/xics/Makefile
 create mode 100644 arch/powerpc/sysdev/xics/icp-hv.c
 create mode 100644 arch/powerpc/sysdev/xics/icp-native.c
 create mode 100644 arch/powerpc/sysdev/xics/ics-rtas.c
 create mode 100644 arch/powerpc/sysdev/xics/xics-common.c

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 67ab5fb7d153..47b7905a6369 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -142,6 +142,12 @@ extern struct irq_map_entry irq_map[NR_IRQS];
 
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
 
+/* This will eventually -replace- virq_to_hw if/when we stash the
+ * HW number in the irq_data itself. We use a macro so we can inline
+ * it as irq_data isn't defined yet
+ */
+#define irq_data_to_hw(d) (irq_map[(d)->irq].hwirq)
+
 /**
  * irq_alloc_host - Allocate a new irq_host data structure
  * @of_node: optional device-tree node of the interrupt controller
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
new file mode 100644
index 000000000000..146aad8534de
--- /dev/null
+++ b/arch/powerpc/include/asm/xics.h
@@ -0,0 +1,139 @@
+/*
+ * Common definitions accross all variants of ICP and ICS interrupt
+ * controllers.
+ */
+
+#ifndef _XICS_H
+#define _XICS_H
+
+#define XICS_IPI		2
+#define XICS_IRQ_SPURIOUS	0
+
+/* Want a priority other than 0.  Various HW issues require this. */
+#define	DEFAULT_PRIORITY	5
+
+/*
+ * Mark IPIs as higher priority so we can take them inside interrupts that
+ * arent marked IRQF_DISABLED
+ */
+#define IPI_PRIORITY		4
+
+/* The least favored priority */
+#define LOWEST_PRIORITY		0xFF
+
+/* The number of priorities defined above */
+#define MAX_NUM_PRIORITIES	3
+
+/* Native ICP */
+extern int icp_native_init(void);
+
+/* PAPR ICP */
+extern int icp_hv_init(void);
+
+/* ICP ops */
+struct icp_ops {
+	unsigned int (*get_irq)(void);
+	void (*eoi)(struct irq_data *d);
+	void (*set_priority)(unsigned char prio);
+	void (*teardown_cpu)(void);
+	void (*flush_ipi)(void);
+#ifdef CONFIG_SMP
+	void (*message_pass)(int target, int msg);
+	irq_handler_t ipi_action;
+#endif
+};
+
+extern const struct icp_ops *icp_ops;
+
+/* Native ICS */
+extern int ics_native_init(void);
+
+/* RTAS ICS */
+extern int ics_rtas_init(void);
+
+/* ICS instance, hooked up to chip_data of an irq */
+struct ics {
+	struct list_head link;
+	int (*map)(struct ics *ics, unsigned int virq);
+	void (*mask_unknown)(struct ics *ics, unsigned long vec);
+	long (*get_server)(struct ics *ics, unsigned long vec);
+	char data[];
+};
+
+/* Commons */
+extern unsigned int xics_default_server;
+extern unsigned int xics_default_distrib_server;
+extern unsigned int xics_interrupt_server_size;
+extern struct irq_host *xics_host;
+
+struct xics_cppr {
+	unsigned char stack[MAX_NUM_PRIORITIES];
+	int index;
+};
+
+DECLARE_PER_CPU(struct xics_cppr, xics_cppr);
+
+static inline void xics_push_cppr(unsigned int vec)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
+		return;
+
+	if (vec == XICS_IPI)
+		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
+	else
+		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
+}
+
+static inline unsigned char xics_pop_cppr(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	if (WARN_ON(os_cppr->index < 1))
+		return LOWEST_PRIORITY;
+
+	return os_cppr->stack[--os_cppr->index];
+}
+
+static inline void xics_set_base_cppr(unsigned char cppr)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	/* we only really want to set the priority when there's
+	 * just one cppr value on the stack
+	 */
+	WARN_ON(os_cppr->index != 0);
+
+	os_cppr->stack[0] = cppr;
+}
+
+static inline unsigned char xics_cppr_top(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+	
+	return os_cppr->stack[os_cppr->index];
+}
+
+DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
+
+extern void xics_init(void);
+extern void xics_setup_cpu(void);
+extern void xics_update_irq_servers(void);
+extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
+extern void xics_mask_unknown_vec(unsigned int vec);
+extern irqreturn_t xics_ipi_dispatch(int cpu);
+extern int xics_smp_probe(void);
+extern void xics_register_ics(struct ics *ics);
+extern void xics_teardown_cpu(void);
+extern void xics_kexec_teardown_cpu(int secondary);
+extern void xics_migrate_irqs_away(void);
+#ifdef CONFIG_SMP
+extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			       unsigned int strict_check);
+#else
+#define xics_get_irq_server(virq, cpumask, strict_check) (xics_default_server)
+#endif
+
+
+#endif /* _XICS_H */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5b3da4b4ea79..b0449229836e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -3,7 +3,10 @@ config PPC_PSERIES
 	bool "IBM pSeries & new (POWER5-based) iSeries"
 	select MPIC
 	select PCI_MSI
-	select XICS
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	select PPC_ICP_HV
+	select PPC_ICS_RTAS
 	select PPC_I8259
 	select PPC_RTAS
 	select PPC_RTAS_DAEMON
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fc5237810ece..4cfefbaccd5f 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,7 +5,6 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
 			   firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)	+= smp.o
-obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
 obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ef8c45489e20..ae6c27df4dc4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/system.h>
@@ -28,7 +29,7 @@
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
 #include <asm/pSeries_reconfig.h>
-#include "xics.h"
+#include <asm/xics.h>
 #include "plpar_wrappers.h"
 #include "offline_states.h"
 
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 77d38a5e2ff9..54cf3a4aa16b 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -7,15 +7,18 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
 #include <asm/machdep.h>
 #include <asm/page.h>
 #include <asm/firmware.h>
 #include <asm/kexec.h>
 #include <asm/mpic.h>
+#include <asm/xics.h>
 #include <asm/smp.h>
 
 #include "pseries.h"
-#include "xics.h"
 #include "plpar_wrappers.h"
 
 static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d9801117124b..4bf21207d7d3 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
 			lbuf[1]);
 }
 
-static inline long plpar_eoi(unsigned long xirr)
-{
-	return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
-	return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
-	return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr)
-{
-	long rc;
-	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-	rc = plpar_hcall(H_XIRR, retbuf, cppr);
-
-	*xirr_ret = retbuf[0];
-
-	return rc;
-}
-
 #endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6c42cfde8415..ab73ad2ff59d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -53,9 +53,9 @@
 #include <asm/irq.h>
 #include <asm/time.h>
 #include <asm/nvram.h>
-#include "xics.h"
 #include <asm/pmc.h>
 #include <asm/mpic.h>
+#include <asm/xics.h>
 #include <asm/ppc-pci.h>
 #include <asm/i8259.h>
 #include <asm/udbg.h>
@@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void)
 		mpic_assign_isu(mpic, n, isuaddr);
 	}
 
+	/* Setup top-level get_irq */
+	ppc_md.get_irq = mpic_get_irq;
+
 	/* All ISUs are setup, complete initialization */
 	mpic_init(mpic);
 
@@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void)
 
 static void __init pseries_xics_init_IRQ(void)
 {
-	xics_init_IRQ();
+	xics_init();
 	pseries_setup_i8259_cascade();
 }
 
@@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void)
 		if (strstr(typep, "open-pic")) {
 			pSeries_mpic_node = of_node_get(np);
 			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
-			ppc_md.get_irq        = mpic_get_irq;
 			setup_kexec_cpu_down_mpic();
 			smp_init_pseries_mpic();
 			return;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index a509c5292a67..fc72bfce7320 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,10 +44,11 @@
 #include <asm/mpic.h>
 #include <asm/vdso_datapage.h>
 #include <asm/cputhreads.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
-#include "xics.h"
 #include "offline_states.h"
 
 
@@ -136,7 +137,6 @@ out:
 	return 1;
 }
 
-#ifdef CONFIG_XICS
 static void __devinit smp_xics_setup_cpu(int cpu)
 {
 	if (cpu != boot_cpuid)
@@ -151,7 +151,6 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	set_default_offline_state(cpu);
 #endif
 }
-#endif /* CONFIG_XICS */
 
 static void __devinit smp_pSeries_kick_cpu(int nr)
 {
@@ -197,23 +196,21 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
 
 	return 1;
 }
-#ifdef CONFIG_MPIC
+
 static struct smp_ops_t pSeries_mpic_smp_ops = {
 	.message_pass	= smp_mpic_message_pass,
 	.probe		= smp_mpic_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_mpic_setup_cpu,
 };
-#endif
-#ifdef CONFIG_XICS
+
 static struct smp_ops_t pSeries_xics_smp_ops = {
-	.message_pass	= smp_xics_message_pass,
-	.probe		= smp_xics_probe,
+	.message_pass	= NULL,	/* Filled at runtime by xics_smp_probe() */
+	.probe		= xics_smp_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_xics_setup_cpu,
 	.cpu_bootable	= smp_pSeries_cpu_bootable,
 };
-#endif
 
 /* This is called very early */
 static void __init smp_init_pseries(void)
@@ -245,14 +242,12 @@ static void __init smp_init_pseries(void)
 	pr_debug(" <- smp_init_pSeries()\n");
 }
 
-#ifdef CONFIG_MPIC
 void __init smp_init_pseries_mpic(void)
 {
 	smp_ops = &pSeries_mpic_smp_ops;
 
 	smp_init_pseries();
 }
-#endif
 
 void __init smp_init_pseries_xics(void)
 {
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
deleted file mode 100644
index d6901334d66e..000000000000
--- a/arch/powerpc/platforms/pseries/xics.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.c
- *
- * Copyright 2000 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/radix-tree.h>
-#include <linux/cpu.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/percpu.h>
-
-#include <asm/firmware.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/smp.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
-
-#include "xics.h"
-#include "plpar_wrappers.h"
-
-static struct irq_host *xics_host;
-
-#define XICS_IPI		2
-#define XICS_IRQ_SPURIOUS	0
-
-/* Want a priority other than 0.  Various HW issues require this. */
-#define	DEFAULT_PRIORITY	5
-
-/*
- * Mark IPIs as higher priority so we can take them inside interrupts that
- * arent marked IRQF_DISABLED
- */
-#define IPI_PRIORITY		4
-
-/* The least favored priority */
-#define LOWEST_PRIORITY		0xFF
-
-/* The number of priorities defined above */
-#define MAX_NUM_PRIORITIES	3
-
-static unsigned int default_server = 0xFF;
-static unsigned int default_distrib_server = 0;
-static unsigned int interrupt_server_size = 8;
-
-/* RTAS service tokens */
-static int ibm_get_xive;
-static int ibm_set_xive;
-static int ibm_int_on;
-static int ibm_int_off;
-
-struct xics_cppr {
-	unsigned char stack[MAX_NUM_PRIORITIES];
-	int index;
-};
-
-static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
-
-/* Direct hardware low level accessors */
-
-/* The part of the interrupt presentation layer that we care about */
-struct xics_ipl {
-	union {
-		u32 word;
-		u8 bytes[4];
-	} xirr_poll;
-	union {
-		u32 word;
-		u8 bytes[4];
-	} xirr;
-	u32 dummy;
-	union {
-		u32 word;
-		u8 bytes[4];
-	} qirr;
-};
-
-static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
-
-static inline unsigned int direct_xirr_info_get(void)
-{
-	int cpu = smp_processor_id();
-
-	return in_be32(&xics_per_cpu[cpu]->xirr.word);
-}
-
-static inline void direct_xirr_info_set(unsigned int value)
-{
-	int cpu = smp_processor_id();
-
-	out_be32(&xics_per_cpu[cpu]->xirr.word, value);
-}
-
-static inline void direct_cppr_info(u8 value)
-{
-	int cpu = smp_processor_id();
-
-	out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value);
-}
-
-static inline void direct_qirr_info(int n_cpu, u8 value)
-{
-	out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
-}
-
-
-/* LPAR low level accessors */
-
-static inline unsigned int lpar_xirr_info_get(unsigned char cppr)
-{
-	unsigned long lpar_rc;
-	unsigned long return_value;
-
-	lpar_rc = plpar_xirr(&return_value, cppr);
-	if (lpar_rc != H_SUCCESS)
-		panic(" bad return code xirr - rc = %lx\n", lpar_rc);
-	return (unsigned int)return_value;
-}
-
-static inline void lpar_xirr_info_set(unsigned int value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_eoi(value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc,
-		      value);
-}
-
-static inline void lpar_cppr_info(u8 value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_cppr(value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code cppr - rc = %lx\n", lpar_rc);
-}
-
-static inline void lpar_qirr_info(int n_cpu , u8 value)
-{
-	unsigned long lpar_rc;
-
-	lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
-	if (lpar_rc != H_SUCCESS)
-		panic("bad return code qirr - rc = %lx\n", lpar_rc);
-}
-
-
-/* Interface to generic irq subsystem */
-
-#ifdef CONFIG_SMP
-/*
- * For the moment we only implement delivery to all cpus or one cpu.
- *
- * If the requested affinity is cpu_all_mask, we set global affinity.
- * If not we set it to the first cpu in the mask, even if multiple cpus
- * are set. This is so things like irqbalance (which set core and package
- * wide affinities) do the right thing.
- */
-static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
-			  unsigned int strict_check)
-{
-
-	if (!distribute_irqs)
-		return default_server;
-
-	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
-		int server = cpumask_first_and(cpu_online_mask, cpumask);
-
-		if (server < nr_cpu_ids)
-			return get_hard_smp_processor_id(server);
-
-		if (strict_check)
-			return -1;
-	}
-
-	/*
-	 * Workaround issue with some versions of JS20 firmware that
-	 * deliver interrupts to cpus which haven't been started. This
-	 * happens when using the maxcpus= boot option.
-	 */
-	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
-		return default_distrib_server;
-
-	return default_server;
-}
-#else
-#define get_irq_server(virq, cpumask, strict_check) (default_server)
-#endif
-
-static void xics_unmask_irq(struct irq_data *d)
-{
-	unsigned int hwirq;
-	int call_status;
-	int server;
-
-	pr_devel("xics: unmask virq %d\n", d->irq);
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	pr_devel(" -> map to hwirq 0x%x\n", hwirq);
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return;
-
-	server = get_irq_server(d->irq, d->affinity, 0);
-
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server,
-				DEFAULT_PRIORITY);
-	if (call_status != 0) {
-		printk(KERN_ERR
-			"%s: ibm_set_xive irq %u server %x returned %d\n",
-			__func__, hwirq, server, call_status);
-		return;
-	}
-
-	/* Now unmask the interrupt (often a no-op) */
-	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-}
-
-static unsigned int xics_startup(struct irq_data *d)
-{
-	/*
-	 * The generic MSI code returns with the interrupt disabled on the
-	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
-	 * at that level, so we do it here by hand.
-	 */
-	if (d->msi_desc)
-		unmask_msi_irq(d);
-
-	/* unmask it */
-	xics_unmask_irq(d);
-	return 0;
-}
-
-static void xics_mask_real_irq(unsigned int hwirq)
-{
-	int call_status;
-
-	if (hwirq == XICS_IPI)
-		return;
-
-	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-
-	/* Have to set XIVE to 0xff to be able to remove a slot */
-	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq,
-				default_server, 0xff);
-	if (call_status != 0) {
-		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
-			__func__, hwirq, call_status);
-		return;
-	}
-}
-
-static void xics_mask_irq(struct irq_data *d)
-{
-	unsigned int hwirq;
-
-	pr_devel("xics: mask virq %d\n", d->irq);
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return;
-	xics_mask_real_irq(hwirq);
-}
-
-static void xics_mask_unknown_vec(unsigned int vec)
-{
-	printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
-	xics_mask_real_irq(vec);
-}
-
-static inline unsigned int xics_xirr_vector(unsigned int xirr)
-{
-	/*
-	 * The top byte is the old cppr, to be restored on EOI.
-	 * The remaining 24 bits are the vector.
-	 */
-	return xirr & 0x00ffffff;
-}
-
-static void push_cppr(unsigned int vec)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
-		return;
-
-	if (vec == XICS_IPI)
-		os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
-	else
-		os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
-}
-
-static unsigned int xics_get_irq_direct(void)
-{
-	unsigned int xirr = direct_xirr_info_get();
-	unsigned int vec = xics_xirr_vector(xirr);
-	unsigned int irq;
-
-	if (vec == XICS_IRQ_SPURIOUS)
-		return NO_IRQ;
-
-	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ)) {
-		push_cppr(vec);
-		return irq;
-	}
-
-	/* We don't have a linux mapping, so have rtas mask it. */
-	xics_mask_unknown_vec(vec);
-
-	/* We might learn about it later, so EOI it */
-	direct_xirr_info_set(xirr);
-	return NO_IRQ;
-}
-
-static unsigned int xics_get_irq_lpar(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-	unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]);
-	unsigned int vec = xics_xirr_vector(xirr);
-	unsigned int irq;
-
-	if (vec == XICS_IRQ_SPURIOUS)
-		return NO_IRQ;
-
-	irq = irq_radix_revmap_lookup(xics_host, vec);
-	if (likely(irq != NO_IRQ)) {
-		push_cppr(vec);
-		return irq;
-	}
-
-	/* We don't have a linux mapping, so have RTAS mask it. */
-	xics_mask_unknown_vec(vec);
-
-	/* We might learn about it later, so EOI it */
-	lpar_xirr_info_set(xirr);
-	return NO_IRQ;
-}
-
-static unsigned char pop_cppr(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	if (WARN_ON(os_cppr->index < 1))
-		return LOWEST_PRIORITY;
-
-	return os_cppr->stack[--os_cppr->index];
-}
-
-static void xics_eoi_direct(struct irq_data *d)
-{
-	unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
-	iosync();
-	direct_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static void xics_eoi_lpar(struct irq_data *d)
-{
-	unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
-	iosync();
-	lpar_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static int
-xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
-{
-	unsigned int hwirq;
-	int status;
-	int xics_status[2];
-	int irq_server;
-
-	hwirq = (unsigned int)irq_map[d->irq].hwirq;
-	if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-		return -1;
-
-	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-
-	if (status) {
-		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
-			__func__, hwirq, status);
-		return -1;
-	}
-
-	irq_server = get_irq_server(d->irq, cpumask, 1);
-	if (irq_server == -1) {
-		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
-		printk(KERN_WARNING
-			"%s: No online cpus in the mask %s for irq %d\n",
-			__func__, cpulist, d->irq);
-		return -1;
-	}
-
-	status = rtas_call(ibm_set_xive, 3, 1, NULL,
-				hwirq, irq_server, xics_status[1]);
-
-	if (status) {
-		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
-			__func__, hwirq, status);
-		return -1;
-	}
-
-	return 0;
-}
-
-static struct irq_chip xics_pic_direct = {
-	.name = "XICS",
-	.irq_startup = xics_startup,
-	.irq_mask = xics_mask_irq,
-	.irq_unmask = xics_unmask_irq,
-	.irq_eoi = xics_eoi_direct,
-	.irq_set_affinity = xics_set_affinity
-};
-
-static struct irq_chip xics_pic_lpar = {
-	.name = "XICS",
-	.irq_startup = xics_startup,
-	.irq_mask = xics_mask_irq,
-	.irq_unmask = xics_unmask_irq,
-	.irq_eoi = xics_eoi_lpar,
-	.irq_set_affinity = xics_set_affinity
-};
-
-
-/* Interface to arch irq controller subsystem layer */
-
-/* Points to the irq_chip we're actually using */
-static struct irq_chip *xics_irq_chip;
-
-static int xics_host_match(struct irq_host *h, struct device_node *node)
-{
-	/* IBM machines have interrupt parents of various funky types for things
-	 * like vdevices, events, etc... The trick we use here is to match
-	 * everything here except the legacy 8259 which is compatible "chrp,iic"
-	 */
-	return !of_device_is_compatible(node, "chrp,iic");
-}
-
-static int xics_host_map(struct irq_host *h, unsigned int virq,
-			 irq_hw_number_t hw)
-{
-	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
-
-	/* Insert the interrupt mapping into the radix tree for fast lookup */
-	irq_radix_revmap_insert(xics_host, virq, hw);
-
-	irq_set_status_flags(virq, IRQ_LEVEL);
-	irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
-	return 0;
-}
-
-static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
-			   const u32 *intspec, unsigned int intsize,
-			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
-
-{
-	/* Current xics implementation translates everything
-	 * to level. It is not technically right for MSIs but this
-	 * is irrelevant at this point. We might get smarter in the future
-	 */
-	*out_hwirq = intspec[0];
-	*out_flags = IRQ_TYPE_LEVEL_LOW;
-
-	return 0;
-}
-
-static struct irq_host_ops xics_host_ops = {
-	.match = xics_host_match,
-	.map = xics_host_map,
-	.xlate = xics_host_xlate,
-};
-
-static void __init xics_init_host(void)
-{
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		xics_irq_chip = &xics_pic_lpar;
-	else
-		xics_irq_chip = &xics_pic_direct;
-
-	xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
-				   XICS_IRQ_SPURIOUS);
-	BUG_ON(xics_host == NULL);
-	irq_set_default_host(xics_host);
-}
-
-
-/* Inter-processor interrupt support */
-
-#ifdef CONFIG_SMP
-/*
- * XICS only has a single IPI, so encode the messages per CPU
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
-
-static inline void smp_xics_do_message(int cpu, int msg)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	set_bit(msg, tgt);
-	mb();
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_qirr_info(cpu, IPI_PRIORITY);
-	else
-		direct_qirr_info(cpu, IPI_PRIORITY);
-}
-
-void smp_xics_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		smp_xics_do_message(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			smp_xics_do_message(i, msg);
-		}
-	}
-}
-
-static irqreturn_t xics_ipi_dispatch(int cpu)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	mb();	/* order mmio clearing qirr */
-	while (*tgt) {
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNCTION);
-		}
-		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
-			smp_message_recv(PPC_MSG_RESCHEDULE);
-		}
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
-		}
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
-			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
-		}
-#endif
-	}
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	direct_qirr_info(cpu, 0xff);
-
-	return xics_ipi_dispatch(cpu);
-}
-
-static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id)
-{
-	int cpu = smp_processor_id();
-
-	lpar_qirr_info(cpu, 0xff);
-
-	return xics_ipi_dispatch(cpu);
-}
-
-static void xics_request_ipi(void)
-{
-	unsigned int ipi;
-	int rc;
-
-	ipi = irq_create_mapping(xics_host, XICS_IPI);
-	BUG_ON(ipi == NO_IRQ);
-
-	/*
-	 * IPIs are marked IRQF_DISABLED as they must run with irqs
-	 * disabled
-	 */
-	irq_set_handler(ipi, handle_percpu_irq);
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		rc = request_irq(ipi, xics_ipi_action_lpar,
-				IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
-	else
-		rc = request_irq(ipi, xics_ipi_action_direct,
-				IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
-	BUG_ON(rc);
-}
-
-int __init smp_xics_probe(void)
-{
-	xics_request_ipi();
-
-	return cpumask_weight(cpu_possible_mask);
-}
-
-#endif /* CONFIG_SMP */
-
-
-/* Initialization */
-
-static void xics_update_irq_servers(void)
-{
-	int i, j;
-	struct device_node *np;
-	u32 ilen;
-	const u32 *ireg;
-	u32 hcpuid;
-
-	/* Find the server numbers for the boot cpu. */
-	np = of_get_cpu_node(boot_cpuid, NULL);
-	BUG_ON(!np);
-
-	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
-	if (!ireg) {
-		of_node_put(np);
-		return;
-	}
-
-	i = ilen / sizeof(int);
-	hcpuid = get_hard_smp_processor_id(boot_cpuid);
-
-	/* Global interrupt distribution server is specified in the last
-	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
-	 * entry fom this property for current boot cpu id and use it as
-	 * default distribution server
-	 */
-	for (j = 0; j < i; j += 2) {
-		if (ireg[j] == hcpuid) {
-			default_server = hcpuid;
-			default_distrib_server = ireg[j+1];
-		}
-	}
-
-	of_node_put(np);
-}
-
-static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
-				     unsigned long size)
-{
-	int i;
-
-	/* This may look gross but it's good enough for now, we don't quite
-	 * have a hard -> linux processor id matching.
-	 */
-	for_each_possible_cpu(i) {
-		if (!cpu_present(i))
-			continue;
-		if (hw_id == get_hard_smp_processor_id(i)) {
-			xics_per_cpu[i] = ioremap(addr, size);
-			return;
-		}
-	}
-}
-
-static void __init xics_init_one_node(struct device_node *np,
-				      unsigned int *indx)
-{
-	unsigned int ilen;
-	const u32 *ireg;
-
-	/* This code does the theorically broken assumption that the interrupt
-	 * server numbers are the same as the hard CPU numbers.
-	 * This happens to be the case so far but we are playing with fire...
-	 * should be fixed one of these days. -BenH.
-	 */
-	ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL);
-
-	/* Do that ever happen ? we'll know soon enough... but even good'old
-	 * f80 does have that property ..
-	 */
-	WARN_ON(ireg == NULL);
-	if (ireg) {
-		/*
-		 * set node starting index for this node
-		 */
-		*indx = *ireg;
-	}
-	ireg = of_get_property(np, "reg", &ilen);
-	if (!ireg)
-		panic("xics_init_IRQ: can't find interrupt reg property");
-
-	while (ilen >= (4 * sizeof(u32))) {
-		unsigned long addr, size;
-
-		/* XXX Use proper OF parsing code here !!! */
-		addr = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(u32);
-		addr |= *ireg++;
-		ilen -= sizeof(u32);
-		size = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(u32);
-		size |= *ireg++;
-		ilen -= sizeof(u32);
-		xics_map_one_cpu(*indx, addr, size);
-		(*indx)++;
-	}
-}
-
-void __init xics_init_IRQ(void)
-{
-	struct device_node *np;
-	u32 indx = 0;
-	int found = 0;
-	const u32 *isize;
-
-	ppc64_boot_msg(0x20, "XICS Init");
-
-	ibm_get_xive = rtas_token("ibm,get-xive");
-	ibm_set_xive = rtas_token("ibm,set-xive");
-	ibm_int_on  = rtas_token("ibm,int-on");
-	ibm_int_off = rtas_token("ibm,int-off");
-
-	for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
-		found = 1;
-		if (firmware_has_feature(FW_FEATURE_LPAR)) {
-			of_node_put(np);
-			break;
-			}
-		xics_init_one_node(np, &indx);
-	}
-	if (found == 0)
-		return;
-
-	/* get the bit size of server numbers */
-	found = 0;
-
-	for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
-		isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-
-		if (!isize)
-			continue;
-
-		if (!found) {
-			interrupt_server_size = *isize;
-			found = 1;
-		} else if (*isize != interrupt_server_size) {
-			printk(KERN_WARNING "XICS: "
-			       "mismatched ibm,interrupt-server#-size\n");
-			interrupt_server_size = max(*isize,
-						    interrupt_server_size);
-		}
-	}
-
-	xics_update_irq_servers();
-	xics_init_host();
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		ppc_md.get_irq = xics_get_irq_lpar;
-	else
-		ppc_md.get_irq = xics_get_irq_direct;
-
-	xics_setup_cpu();
-
-	ppc64_boot_msg(0x21, "XICS Done");
-}
-
-/* Cpu startup, shutdown, and hotplug */
-
-static void xics_set_cpu_priority(unsigned char cppr)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
-	/*
-	 * we only really want to set the priority when there's
-	 * just one cppr value on the stack
-	 */
-	WARN_ON(os_cppr->index != 0);
-
-	os_cppr->stack[0] = cppr;
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_cppr_info(cppr);
-	else
-		direct_cppr_info(cppr);
-	iosync();
-}
-
-/* Have the calling processor join or leave the specified global queue */
-static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
-{
-	int index;
-	int status;
-
-	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
-		return;
-
-	index = (1UL << interrupt_server_size) - 1 - gserver;
-
-	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
-
-	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
-	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
-}
-
-void xics_setup_cpu(void)
-{
-	xics_set_cpu_priority(LOWEST_PRIORITY);
-
-	xics_set_cpu_giq(default_distrib_server, 1);
-}
-
-void xics_teardown_cpu(void)
-{
-	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-	int cpu = smp_processor_id();
-
-	/*
-	 * we have to reset the cppr index to 0 because we're
-	 * not going to return from the IPI
-	 */
-	os_cppr->index = 0;
-	xics_set_cpu_priority(0);
-
-	/* Clear any pending IPI request */
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_qirr_info(cpu, 0xff);
-	else
-		direct_qirr_info(cpu, 0xff);
-}
-
-void xics_kexec_teardown_cpu(int secondary)
-{
-	xics_teardown_cpu();
-
-	/*
-	 * we take the ipi irq but and never return so we
-	 * need to EOI the IPI, but want to leave our priority 0
-	 *
-	 * should we check all the other interrupts too?
-	 * should we be flagging idle loop instead?
-	 * or creating some task to be scheduled?
-	 */
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		lpar_xirr_info_set((0x00 << 24) | XICS_IPI);
-	else
-		direct_xirr_info_set((0x00 << 24) | XICS_IPI);
-
-	/*
-	 * Some machines need to have at least one cpu in the GIQ,
-	 * so leave the master cpu in the group.
-	 */
-	if (secondary)
-		xics_set_cpu_giq(default_distrib_server, 0);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Interrupts are disabled. */
-void xics_migrate_irqs_away(void)
-{
-	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
-	int virq;
-
-	/* If we used to be the default server, move to the new "boot_cpuid" */
-	if (hw_cpu == default_server)
-		xics_update_irq_servers();
-
-	/* Reject any interrupt that was queued to us... */
-	xics_set_cpu_priority(0);
-
-	/* Remove ourselves from the global interrupt queue */
-	xics_set_cpu_giq(default_distrib_server, 0);
-
-	/* Allow IPIs again... */
-	xics_set_cpu_priority(DEFAULT_PRIORITY);
-
-	for_each_irq(virq) {
-		struct irq_desc *desc;
-		struct irq_chip *chip;
-		unsigned int hwirq;
-		int xics_status[2];
-		int status;
-		unsigned long flags;
-
-		/* We can't set affinity on ISA interrupts */
-		if (virq < NUM_ISA_INTERRUPTS)
-			continue;
-		if (irq_map[virq].host != xics_host)
-			continue;
-		hwirq = (unsigned int)irq_map[virq].hwirq;
-		/* We need to get IPIs still. */
-		if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
-			continue;
-
-		desc = irq_to_desc(virq);
-
-		/* We only need to migrate enabled IRQS */
-		if (desc == NULL || desc->action == NULL)
-			continue;
-
-		chip = irq_desc_get_chip(desc);
-		if (chip == NULL || chip->irq_set_affinity == NULL)
-			continue;
-
-		raw_spin_lock_irqsave(&desc->lock, flags);
-
-		status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-		if (status) {
-			printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
-					__func__, hwirq, status);
-			goto unlock;
-		}
-
-		/*
-		 * We only support delivery to all cpus or to one cpu.
-		 * The irq has to be migrated only in the single cpu
-		 * case.
-		 */
-		if (xics_status[0] != hw_cpu)
-			goto unlock;
-
-		/* This is expected during cpu offline. */
-		if (cpu_online(cpu))
-			printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
-			       virq, cpu);
-
-		/* Reset affinity to all cpus */
-		cpumask_setall(desc->irq_data.affinity);
-		chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
-unlock:
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-	}
-}
-#endif
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
deleted file mode 100644
index d1d5a83039ae..000000000000
--- a/arch/powerpc/platforms/pseries/xics.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.h
- *
- * Copyright 2000 IBM Corporation.
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#ifndef _POWERPC_KERNEL_XICS_H
-#define _POWERPC_KERNEL_XICS_H
-
-extern void xics_init_IRQ(void);
-extern void xics_setup_cpu(void);
-extern void xics_teardown_cpu(void);
-extern void xics_kexec_teardown_cpu(int secondary);
-extern void xics_migrate_irqs_away(void);
-extern int smp_xics_probe(void);
-extern void smp_xics_message_pass(int target, int msg);
-
-#endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index 396582835cb5..cfc18770af79 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -12,3 +12,6 @@ config PPC_MSI_BITMAP
 	depends on PCI_MSI
 	default y if MPIC
 	default y if FSL_PCI
+
+source "arch/powerpc/sysdev/xics/Kconfig"
+
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 1e0c933ef772..9516e7598573 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -57,3 +57,7 @@ obj-$(CONFIG_PPC_MPC52xx)	+= mpc5xxx_clocks.o
 ifeq ($(CONFIG_SUSPEND),y)
 obj-$(CONFIG_6xx)		+= 6xx-suspend.o
 endif
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-$(CONFIG_PPC_XICS)		+= xics/
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
new file mode 100644
index 000000000000..123b8ddf2816
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -0,0 +1,12 @@
+config PPC_XICS
+       def_bool n
+
+config PPC_ICP_NATIVE
+       def_bool n
+
+config PPC_ICP_HV
+       def_bool n
+
+config PPC_ICS_RTAS
+       def_bool n
+
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile
new file mode 100644
index 000000000000..b75a6059337f
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -0,0 +1,6 @@
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y				+= xics-common.o
+obj-$(CONFIG_PPC_ICP_NATIVE)	+= icp-native.o
+obj-$(CONFIG_PPC_ICP_HV)	+= icp-hv.o
+obj-$(CONFIG_PPC_ICS_RTAS)	+= ics-rtas.o
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
new file mode 100644
index 000000000000..b03d348b19a5
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/io.h>
+#include <asm/hvcall.h>
+
+static inline unsigned int icp_hv_get_xirr(unsigned char cppr)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_XIRR, retbuf, cppr);
+	if (rc != H_SUCCESS)
+		panic(" bad return code xirr - rc = %lx\n", rc);
+	return (unsigned int)retbuf[0];
+}
+
+static inline void icp_hv_set_xirr(unsigned int value)
+{
+	long rc = plpar_hcall_norets(H_EOI, value);
+	if (rc != H_SUCCESS)
+		panic("bad return code EOI - rc = %ld, value=%x\n", rc, value);
+}
+
+static inline void icp_hv_set_cppr(u8 value)
+{
+	long rc = plpar_hcall_norets(H_CPPR, value);
+	if (rc != H_SUCCESS)
+		panic("bad return code cppr - rc = %lx\n", rc);
+}
+
+static inline void icp_hv_set_qirr(int n_cpu , u8 value)
+{
+	long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu),
+				     value);
+	if (rc != H_SUCCESS)
+		panic("bad return code qirr - rc = %lx\n", rc);
+}
+
+static void icp_hv_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+
+	iosync();
+	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_hv_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_hv_set_qirr(cpu, 0xff);
+}
+
+static void icp_hv_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_hv_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_hv_get_irq(void)
+{
+	unsigned int xirr = icp_hv_get_xirr(xics_cppr_top());
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = irq_radix_revmap_lookup(xics_host, vec);
+	if (likely(irq != NO_IRQ)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_hv_set_xirr(xirr);
+
+	return NO_IRQ;
+}
+
+static void icp_hv_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_hv_set_cppr(cppr);
+	iosync();
+}
+
+#ifdef CONFIG_SMP
+
+static inline void icp_hv_do_message(int cpu, int msg)
+{
+	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
+
+	set_bit(msg, tgt);
+	mb();
+	icp_hv_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static void icp_hv_message_pass(int target, int msg)
+{
+	unsigned int i;
+
+	if (target < NR_CPUS) {
+		icp_hv_do_message(target, msg);
+	} else {
+		for_each_online_cpu(i) {
+			if (target == MSG_ALL_BUT_SELF
+			    && i == smp_processor_id())
+				continue;
+			icp_hv_do_message(i, msg);
+		}
+	}
+}
+
+static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_hv_set_qirr(cpu, 0xff);
+
+	return xics_ipi_dispatch(cpu);
+}
+
+#endif /* CONFIG_SMP */
+
+static const struct icp_ops icp_hv_ops = {
+	.get_irq	= icp_hv_get_irq,
+	.eoi		= icp_hv_eoi,
+	.set_priority	= icp_hv_set_cpu_priority,
+	.teardown_cpu	= icp_hv_teardown_cpu,
+	.flush_ipi	= icp_hv_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_hv_ipi_action,
+	.message_pass	= icp_hv_message_pass,
+#endif
+};
+
+int icp_hv_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xicp");
+	if (!np)
+		np = of_find_node_by_type(NULL,
+				    "PowerPC-External-Interrupt-Presentation");
+	if (!np)
+		return -ENODEV;
+
+	icp_ops = &icp_hv_ops;
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
new file mode 100644
index 000000000000..be5e3d748edb
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+
+struct icp_ipl {
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr_poll;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} xirr;
+	u32 dummy;
+	union {
+		u32 word;
+		u8 bytes[4];
+	} qirr;
+	u32 link_a;
+	u32 link_b;
+	u32 link_c;
+};
+
+static struct icp_ipl __iomem *icp_native_regs[NR_CPUS];
+
+static inline unsigned int icp_native_get_xirr(void)
+{
+	int cpu = smp_processor_id();
+
+	return in_be32(&icp_native_regs[cpu]->xirr.word);
+}
+
+static inline void icp_native_set_xirr(unsigned int value)
+{
+	int cpu = smp_processor_id();
+
+	out_be32(&icp_native_regs[cpu]->xirr.word, value);
+}
+
+static inline void icp_native_set_cppr(u8 value)
+{
+	int cpu = smp_processor_id();
+
+	out_8(&icp_native_regs[cpu]->xirr.bytes[0], value);
+}
+
+static inline void icp_native_set_qirr(int n_cpu, u8 value)
+{
+	out_8(&icp_native_regs[n_cpu]->qirr.bytes[0], value);
+}
+
+static void icp_native_set_cpu_priority(unsigned char cppr)
+{
+	xics_set_base_cppr(cppr);
+	icp_native_set_cppr(cppr);
+	iosync();
+}
+
+static void icp_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
+static void icp_native_teardown_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	/* Clear any pending IPI */
+	icp_native_set_qirr(cpu, 0xff);
+}
+
+static void icp_native_flush_ipi(void)
+{
+	/* We take the ipi irq but and never return so we
+	 * need to EOI the IPI, but want to leave our priority 0
+	 *
+	 * should we check all the other interrupts too?
+	 * should we be flagging idle loop instead?
+	 * or creating some task to be scheduled?
+	 */
+
+	icp_native_set_xirr((0x00 << 24) | XICS_IPI);
+}
+
+static unsigned int icp_native_get_irq(void)
+{
+	unsigned int xirr = icp_native_get_xirr();
+	unsigned int vec = xirr & 0x00ffffff;
+	unsigned int irq;
+
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = irq_radix_revmap_lookup(xics_host, vec);
+	if (likely(irq != NO_IRQ)) {
+		xics_push_cppr(vec);
+		return irq;
+	}
+
+	/* We don't have a linux mapping, so have rtas mask it. */
+	xics_mask_unknown_vec(vec);
+
+	/* We might learn about it later, so EOI it */
+	icp_native_set_xirr(xirr);
+
+	return NO_IRQ;
+}
+
+#ifdef CONFIG_SMP
+
+static inline void icp_native_do_message(int cpu, int msg)
+{
+	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
+
+	set_bit(msg, tgt);
+	mb();
+	icp_native_set_qirr(cpu, IPI_PRIORITY);
+}
+
+static void icp_native_message_pass(int target, int msg)
+{
+	unsigned int i;
+
+	if (target < NR_CPUS) {
+		icp_native_do_message(target, msg);
+	} else {
+		for_each_online_cpu(i) {
+			if (target == MSG_ALL_BUT_SELF
+			    && i == smp_processor_id())
+				continue;
+			icp_native_do_message(i, msg);
+		}
+	}
+}
+
+static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+
+	icp_native_set_qirr(cpu, 0xff);
+
+	return xics_ipi_dispatch(cpu);
+}
+
+#endif /* CONFIG_SMP */
+
+static int __init icp_native_map_one_cpu(int hw_id, unsigned long addr,
+					 unsigned long size)
+{
+	char *rname;
+	int i, cpu = -1;
+
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			cpu = i;
+			break;
+		}
+	}
+
+	/* Fail, skip that CPU. Don't print, it's normal, some XICS come up
+	 * with way more entries in there than you have CPUs
+	 */
+	if (cpu == -1)
+		return 0;
+
+	rname = kasprintf(GFP_KERNEL, "CPU %d [0x%x] Interrupt Presentation",
+			  cpu, hw_id);
+
+	if (!request_mem_region(addr, size, rname)) {
+		pr_warning("icp_native: Could not reserve ICP MMIO"
+			   " for CPU %d, interrupt server #0x%x\n",
+			   cpu, hw_id);
+		return -EBUSY;
+	}
+
+	icp_native_regs[cpu] = ioremap(addr, size);
+	if (!icp_native_regs[cpu]) {
+		pr_warning("icp_native: Failed ioremap for CPU %d, "
+			   "interrupt server #0x%x, addr %#lx\n",
+			   cpu, hw_id, addr);
+		release_mem_region(addr, size);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __init icp_native_init_one_node(struct device_node *np,
+					   unsigned int *indx)
+{
+	unsigned int ilen;
+	const u32 *ireg;
+	int i;
+	int reg_tuple_size;
+	int num_servers = 0;
+
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = of_get_property(np, "ibm,interrupt-server-ranges", &ilen);
+
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON((ireg == NULL) || (ilen != 2*sizeof(u32)));
+
+	if (ireg) {
+		*indx = of_read_number(ireg, 1);
+		if (ilen >= 2*sizeof(u32))
+			num_servers = of_read_number(ireg + 1, 1);
+	}
+
+	ireg = of_get_property(np, "reg", &ilen);
+	if (!ireg) {
+		pr_err("icp_native: Can't find interrupt reg property");
+		return -1;
+	}
+
+	reg_tuple_size = (of_n_addr_cells(np) + of_n_size_cells(np)) * 4;
+	if (((ilen % reg_tuple_size) != 0)
+	    || (num_servers && (num_servers != (ilen / reg_tuple_size)))) {
+		pr_err("icp_native: ICP reg len (%d) != num servers (%d)",
+		       ilen / reg_tuple_size, num_servers);
+		return -1;
+	}
+
+	for (i = 0; i < (ilen / reg_tuple_size); i++) {
+		struct resource r;
+		int err;
+
+		err = of_address_to_resource(np, i, &r);
+		if (err) {
+			pr_err("icp_native: Could not translate ICP MMIO"
+			       " for interrupt server 0x%x (%d)\n", *indx, err);
+			return -1;
+		}
+
+		if (icp_native_map_one_cpu(*indx, r.start, r.end - r.start))
+			return -1;
+
+		(*indx)++;
+	}
+	return 0;
+}
+
+static const struct icp_ops icp_native_ops = {
+	.get_irq	= icp_native_get_irq,
+	.eoi		= icp_native_eoi,
+	.set_priority	= icp_native_set_cpu_priority,
+	.teardown_cpu	= icp_native_teardown_cpu,
+	.flush_ipi	= icp_native_flush_ipi,
+#ifdef CONFIG_SMP
+	.ipi_action	= icp_native_ipi_action,
+	.message_pass	= icp_native_message_pass,
+#endif
+};
+
+int icp_native_init(void)
+{
+	struct device_node *np;
+	u32 indx = 0;
+	int found = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ppc-xicp")
+		if (icp_native_init_one_node(np, &indx) == 0)
+			found = 1;
+	if (!found) {
+		for_each_node_by_type(np,
+			"PowerPC-External-Interrupt-Presentation") {
+				if (icp_native_init_one_node(np, &indx) == 0)
+					found = 1;
+		}
+	}
+
+	if (found == 0)
+		return -ENODEV;
+
+	icp_ops = &icp_native_ops;
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
new file mode 100644
index 000000000000..5b3ee387e89d
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -0,0 +1,229 @@
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/msi.h>
+
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/xics.h>
+#include <asm/rtas.h>
+
+/* RTAS service tokens */
+static int ibm_get_xive;
+static int ibm_set_xive;
+static int ibm_int_on;
+static int ibm_int_off;
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq);
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
+
+/* Only one global & state struct ics */
+static struct ics ics_rtas = {
+	.map		= ics_rtas_map,
+	.mask_unknown	= ics_rtas_mask_unknown,
+	.get_server	= ics_rtas_get_server,
+};
+
+static void ics_rtas_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	int call_status;
+	int server;
+
+	pr_devel("xics: unmask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	server = xics_get_irq_server(d->irq, d->affinity, 0);
+
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
+	if (call_status != 0) {
+		printk(KERN_ERR
+			"%s: ibm_set_xive irq %u server %x returned %d\n",
+			__func__, hw_irq, server, call_status);
+		return;
+	}
+
+	/* Now unmask the interrupt (often a no-op) */
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static unsigned int ics_rtas_startup(struct irq_data *d)
+{
+#ifdef CONFIG_PCI_MSI
+	/*
+	 * The generic MSI code returns with the interrupt disabled on the
+	 * card, using the MSI mask bits. Firmware doesn't appear to unmask
+	 * at that level, so we do it here by hand.
+	 */
+	if (d->msi_desc)
+		unmask_msi_irq(d);
+#endif
+	/* unmask it */
+	ics_rtas_unmask_irq(d);
+	return 0;
+}
+
+static void ics_rtas_mask_real_irq(unsigned int hw_irq)
+{
+	int call_status;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+
+	/* Have to set XIVE to 0xff to be able to remove a slot */
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
+	if (call_status != 0) {
+		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
+			__func__, hw_irq, call_status);
+		return;
+	}
+}
+
+static void ics_rtas_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+
+	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+	ics_rtas_mask_real_irq(hw_irq);
+}
+
+static int ics_rtas_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask,
+				 bool force)
+{
+	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	int status;
+	int xics_status[2];
+	int irq_server;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	irq_server = xics_get_irq_server(d->irq, cpumask, 1);
+	if (irq_server == -1) {
+		char cpulist[128];
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		printk(KERN_WARNING
+			"%s: No online cpus in the mask %s for irq %d\n",
+			__func__, cpulist, d->irq);
+		return -1;
+	}
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
+			__func__, hw_irq, status);
+		return -1;
+	}
+
+	return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip ics_rtas_irq_chip = {
+	.name = "XICS",
+	.irq_startup = ics_rtas_startup,
+	.irq_mask = ics_rtas_mask_irq,
+	.irq_unmask = ics_rtas_unmask_irq,
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_set_affinity = ics_rtas_set_affinity
+};
+
+static int ics_rtas_map(struct ics *ics, unsigned int virq)
+{
+	unsigned int hw_irq = (unsigned int)irq_map[virq].hwirq;
+	int status[2];
+	int rc;
+
+	if (WARN_ON(hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS))
+		return -EINVAL;
+
+	/* Check if RTAS knows about this interrupt */
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
+	if (rc)
+		return -ENXIO;
+
+	irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq);
+	irq_set_chip_data(virq, &ics_rtas);
+
+	return 0;
+}
+
+static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec)
+{
+	ics_rtas_mask_real_irq(vec);
+}
+
+static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
+{
+	int rc, status[2];
+
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
+	if (rc)
+		return -1;
+	return status[0];
+}
+
+int ics_rtas_init(void)
+{
+	ibm_get_xive = rtas_token("ibm,get-xive");
+	ibm_set_xive = rtas_token("ibm,set-xive");
+	ibm_int_on  = rtas_token("ibm,int-on");
+	ibm_int_off = rtas_token("ibm,int-off");
+
+	/* We enable the RTAS "ICS" if RTAS is present with the
+	 * appropriate tokens
+	 */
+	if (ibm_get_xive == RTAS_UNKNOWN_SERVICE ||
+	    ibm_set_xive == RTAS_UNKNOWN_SERVICE)
+		return -ENODEV;
+
+	/* We need to patch our irq chip's EOI to point to the
+	 * right ICP
+	 */
+	ics_rtas_irq_chip.irq_eoi = icp_ops->eoi;
+
+	/* Register ourselves */
+	xics_register_ics(&ics_rtas);
+
+	return 0;
+}
+
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
new file mode 100644
index 000000000000..a2be84de5237
--- /dev/null
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright 2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/errno.h>
+#include <asm/rtas.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+
+/* Globals common to all ICP/ICS implementations */
+const struct icp_ops	*icp_ops;
+
+unsigned int xics_default_server		= 0xff;
+unsigned int xics_default_distrib_server	= 0;
+unsigned int xics_interrupt_server_size		= 8;
+
+DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
+
+struct irq_host *xics_host;
+
+static LIST_HEAD(ics_list);
+
+void xics_update_irq_servers(void)
+{
+	int i, j;
+	struct device_node *np;
+	u32 ilen;
+	const u32 *ireg;
+	u32 hcpuid;
+
+	/* Find the server numbers for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hcpuid = get_hard_smp_processor_id(boot_cpuid);
+	xics_default_server = hcpuid;
+
+	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
+	if (!ireg) {
+		of_node_put(np);
+		return;
+	}
+
+	i = ilen / sizeof(int);
+
+	/* Global interrupt distribution server is specified in the last
+	 * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
+	 * entry fom this property for current boot cpu id and use it as
+	 * default distribution server
+	 */
+	for (j = 0; j < i; j += 2) {
+		if (ireg[j] == hcpuid) {
+			xics_default_distrib_server = ireg[j+1];
+		}
+	}
+
+	of_node_put(np);
+}
+
+/* GIQ stuff, currently only supported on RTAS setups, will have
+ * to be sorted properly for bare metal
+ */
+void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
+{
+#ifdef CONFIG_PPC_RTAS
+	int index;
+	int status;
+
+	if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
+		return;
+
+	index = (1UL << xics_interrupt_server_size) - 1 - gserver;
+
+	status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
+
+	WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
+	     GLOBAL_INTERRUPT_QUEUE, index, join, status);
+#endif
+}
+
+void xics_setup_cpu(void)
+{
+	icp_ops->set_priority(LOWEST_PRIORITY);
+
+	xics_set_cpu_giq(xics_default_distrib_server, 1);
+}
+
+void xics_mask_unknown_vec(unsigned int vec)
+{
+	struct ics *ics;
+
+	pr_err("Interrupt %u (real) is invalid, disabling it.\n", vec);
+
+	list_for_each_entry(ics, &ics_list, link)
+		ics->mask_unknown(ics, vec);
+}
+
+
+#ifdef CONFIG_SMP
+
+DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
+
+irqreturn_t xics_ipi_dispatch(int cpu)
+{
+	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
+
+	mb();	/* order mmio clearing qirr */
+	while (*tgt) {
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
+			smp_message_recv(PPC_MSG_CALL_FUNCTION);
+		}
+		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
+			smp_message_recv(PPC_MSG_RESCHEDULE);
+		}
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
+			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
+		}
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
+			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+		}
+#endif
+	}
+	return IRQ_HANDLED;
+}
+
+static void xics_request_ipi(void)
+{
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI);
+	BUG_ON(ipi == NO_IRQ);
+
+	/*
+	 * IPIs are marked IRQF_DISABLED as they must run with irqs
+	 * disabled
+	 */
+	irq_set_handler(ipi, handle_percpu_irq);
+	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
+			   IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL));
+}
+
+int __init xics_smp_probe(void)
+{
+	/* Setup message_pass callback  based on which ICP is used */
+	smp_ops->message_pass = icp_ops->message_pass;
+
+	/* Register all the IPIs */
+	xics_request_ipi();
+
+	return cpumask_weight(cpu_possible_mask);
+}
+
+#endif /* CONFIG_SMP */
+
+void xics_teardown_cpu(void)
+{
+	struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+
+	/*
+	 * we have to reset the cppr index to 0 because we're
+	 * not going to return from the IPI
+	 */
+	os_cppr->index = 0;
+	icp_ops->set_priority(0);
+	icp_ops->teardown_cpu();
+}
+
+void xics_kexec_teardown_cpu(int secondary)
+{
+	xics_teardown_cpu();
+
+	icp_ops->flush_ipi();
+
+	/*
+	 * Some machines need to have at least one cpu in the GIQ,
+	 * so leave the master cpu in the group.
+	 */
+	if (secondary)
+		xics_set_cpu_giq(xics_default_distrib_server, 0);
+}
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* Interrupts are disabled. */
+void xics_migrate_irqs_away(void)
+{
+	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
+	unsigned int irq, virq;
+
+	/* If we used to be the default server, move to the new "boot_cpuid" */
+	if (hw_cpu == xics_default_server)
+		xics_update_irq_servers();
+
+	/* Reject any interrupt that was queued to us... */
+	icp_ops->set_priority(0);
+
+	/* Remove ourselves from the global interrupt queue */
+	xics_set_cpu_giq(xics_default_distrib_server, 0);
+
+	/* Allow IPIs again... */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
+	for_each_irq(virq) {
+		struct irq_desc *desc;
+		struct irq_chip *chip;
+		long server;
+		unsigned long flags;
+		struct ics *ics;
+
+		/* We can't set affinity on ISA interrupts */
+		if (virq < NUM_ISA_INTERRUPTS)
+			continue;
+		if (irq_map[virq].host != xics_host)
+			continue;
+		irq = (unsigned int)irq_map[virq].hwirq;
+		/* We need to get IPIs still. */
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+			continue;
+		desc = irq_to_desc(virq);
+		/* We only need to migrate enabled IRQS */
+		if (!desc || !desc->action)
+			continue;
+		chip = irq_desc_get_chip(desc);
+		if (!chip || !chip->irq_set_affinity)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		/* Locate interrupt server */
+		server = -1;
+		ics = irq_get_chip_data(virq);
+		if (ics)
+			server = ics->get_server(ics, irq);
+		if (server < 0) {
+			printk(KERN_ERR "%s: Can't find server for irq %d\n",
+			       __func__, irq);
+			goto unlock;
+		}
+
+		/* We only support delivery to all cpus or to one cpu.
+		 * The irq has to be migrated only in the single cpu
+		 * case.
+		 */
+		if (server != hw_cpu)
+			goto unlock;
+
+		/* This is expected during cpu offline. */
+		if (cpu_online(cpu))
+			pr_warning("IRQ %u affinity broken off cpu %u\n",
+			       virq, cpu);
+
+		/* Reset affinity to all cpus */
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		irq_set_affinity(virq, cpu_all_mask);
+		continue;
+unlock:
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+/*
+ * For the moment we only implement delivery to all cpus or one cpu.
+ *
+ * If the requested affinity is cpu_all_mask, we set global affinity.
+ * If not we set it to the first cpu in the mask, even if multiple cpus
+ * are set. This is so things like irqbalance (which set core and package
+ * wide affinities) do the right thing.
+ */
+int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
+			unsigned int strict_check)
+{
+
+	if (!distribute_irqs)
+		return xics_default_server;
+
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
+		int server = cpumask_first_and(cpu_online_mask, cpumask);
+
+		if (server < nr_cpu_ids)
+			return get_hard_smp_processor_id(server);
+
+		if (strict_check)
+			return -1;
+	}
+
+	/*
+	 * Workaround issue with some versions of JS20 firmware that
+	 * deliver interrupts to cpus which haven't been started. This
+	 * happens when using the maxcpus= boot option.
+	 */
+	if (cpumask_equal(cpu_online_mask, cpu_present_mask))
+		return xics_default_distrib_server;
+
+	return xics_default_server;
+}
+#endif /* CONFIG_SMP */
+
+static int xics_host_match(struct irq_host *h, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
+/* Dummies */
+static void xics_ipi_unmask(struct irq_data *d) { }
+static void xics_ipi_mask(struct irq_data *d) { }
+
+static struct irq_chip xics_ipi_chip = {
+	.name = "XICS",
+	.irq_eoi = NULL, /* Patched at init time */
+	.irq_mask = xics_ipi_mask,
+	.irq_unmask = xics_ipi_unmask,
+};
+
+static int xics_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	struct ics *ics;
+
+	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
+
+	/* Insert the interrupt mapping into the radix tree for fast lookup */
+	irq_radix_revmap_insert(xics_host, virq, hw);
+
+	/* They aren't all level sensitive but we just don't really know */
+	irq_set_status_flags(virq, IRQ_LEVEL);
+
+	/* Don't call into ICS for IPIs */
+	if (hw == XICS_IPI) {
+		irq_set_chip_and_handler(virq, &xics_ipi_chip,
+					 handle_fasteoi_irq);
+		return 0;
+	}
+
+	/* Let the ICS setup the chip data */
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->map(ics, virq) == 0)
+			break;
+	return 0;
+}
+
+static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
+			   const u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Current xics implementation translates everything
+	 * to level. It is not technically right for MSIs but this
+	 * is irrelevant at this point. We might get smarter in the future
+	 */
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+static struct irq_host_ops xics_host_ops = {
+	.match = xics_host_match,
+	.map = xics_host_map,
+	.xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
+{
+	xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
+				   XICS_IRQ_SPURIOUS);
+	BUG_ON(xics_host == NULL);
+	irq_set_default_host(xics_host);
+}
+
+void __init xics_register_ics(struct ics *ics)
+{
+	list_add(&ics->link, &ics_list);
+}
+
+static void __init xics_get_server_size(void)
+{
+	struct device_node *np;
+	const u32 *isize;
+
+	/* We fetch the interrupt server size from the first ICS node
+	 * we find if any
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,ppc-xics");
+	if (!np)
+		return;
+	isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
+	if (!isize)
+		return;
+	xics_interrupt_server_size = *isize;
+	of_node_put(np);
+}
+
+void __init xics_init(void)
+{
+	int rc = -1;
+
+	/* Fist locate ICP */
+#ifdef CONFIG_PPC_ICP_HV
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		rc = icp_hv_init();
+#endif
+#ifdef CONFIG_PPC_ICP_NATIVE
+	if (rc < 0)
+		rc = icp_native_init();
+#endif
+	if (rc < 0) {
+		pr_warning("XICS: Cannot find a Presentation Controller !\n");
+		return;
+	}
+
+	/* Copy get_irq callback over to ppc_md */
+	ppc_md.get_irq = icp_ops->get_irq;
+
+	/* Patch up IPI chip EOI */
+	xics_ipi_chip.irq_eoi = icp_ops->eoi;
+
+	/* Now locate ICS */
+#ifdef CONFIG_PPC_ICS_RTAS
+	rc = ics_rtas_init();
+#endif
+	if (rc < 0)
+		pr_warning("XICS: Cannot find a Source Controller !\n");
+
+	/* Initialize common bits */
+	xics_get_server_size();
+	xics_update_irq_servers();
+	xics_init_host();
+	xics_setup_cpu();
+}
-- 
cgit v1.2.3


From 50fb8ebe7c4ad60d147700d253f78bd1e615a526 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 12 Jan 2011 17:41:28 +1100
Subject: powerpc: Add more Power7 specific definitions

This adds more SPR definitions used on newer processors when running
in hypervisor mode. Along with some other P7 specific bits and pieces

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc_asm.h |  1 +
 arch/powerpc/include/asm/reg.h     | 46 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 98210067c1cc..1b422381fc16 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -170,6 +170,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define HMT_MEDIUM	or	2,2,2
 #define HMT_MEDIUM_HIGH or	5,5,5		# medium high priority
 #define HMT_HIGH	or	3,3,3
+#define HMT_EXTRA_HIGH	or	7,7,7		# power7 only
 
 #ifdef __KERNEL__
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7e4abebe76c0..6eb1d77edb4b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -210,8 +210,43 @@
 #define SPRN_TBWL	0x11C	/* Time Base Lower Register (super, R/W) */
 #define SPRN_TBWU	0x11D	/* Time Base Upper Register (super, R/W) */
 #define SPRN_SPURR	0x134	/* Scaled PURR */
+#define SPRN_HSPRG0	0x130	/* Hypervisor Scratch 0 */
+#define SPRN_HSPRG1	0x131	/* Hypervisor Scratch 1 */
+#define SPRN_HDSISR     0x132
+#define SPRN_HDAR       0x133
+#define SPRN_HDEC	0x136	/* Hypervisor Decrementer */
 #define SPRN_HIOR	0x137	/* 970 Hypervisor interrupt offset */
+#define SPRN_RMOR	0x138	/* Real mode offset register */
+#define SPRN_HRMOR	0x139	/* Real mode offset register */
+#define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
+#define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
+#define   LPCR_VPM0	(1ul << (63-0))
+#define   LPCR_VPM1	(1ul << (63-1))
+#define   LPCR_ISL	(1ul << (63-2))
+#define   LPCR_DPFD_SH	(63-11)
+#define   LPCR_VRMA_L	(1ul << (63-12))
+#define   LPCR_VRMA_LP0	(1ul << (63-15))
+#define   LPCR_VRMA_LP1	(1ul << (63-16))
+#define   LPCR_RMLS    0x1C000000      /* impl dependent rmo limit sel */
+#define   LPCR_ILE     0x02000000      /* !HV irqs set MSR:LE */
+#define   LPCR_PECE	0x00007000	/* powersave exit cause enable */
+#define     LPCR_PECE0	0x00004000	/* ext. exceptions can cause exit */
+#define     LPCR_PECE1	0x00002000	/* decrementer can cause exit */
+#define     LPCR_PECE2	0x00001000	/* machine check etc can cause exit */
+#define   LPCR_MER	0x00000800	/* Mediated External Exception */
+#define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
+#define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
+#define   LPCR_RMI     0x00000002      /* real mode is cache inhibit */
+#define   LPCR_HDICE   0x00000001      /* Hyp Decr enable (HV,PR,EE) */
+#define SPRN_LPID	0x13F	/* Logical Partition Identifier */
+#define	SPRN_HMER	0x150	/* Hardware m? error recovery */
+#define	SPRN_HMEER	0x151	/* Hardware m? enable error recovery */
+#define	SPRN_HEIR	0x153	/* Hypervisor Emulated Instruction Register */
+#define SPRN_TLBINDEXR	0x154	/* P7 TLB control register */
+#define SPRN_TLBVPNR	0x155	/* P7 TLB control register */
+#define SPRN_TLBRPNR	0x156	/* P7 TLB control register */
+#define SPRN_TLBLPIDR	0x157	/* P7 TLB control register */
 #define SPRN_DBAT0L	0x219	/* Data BAT 0 Lower Register */
 #define SPRN_DBAT0U	0x218	/* Data BAT 0 Upper Register */
 #define SPRN_DBAT1L	0x21B	/* Data BAT 1 Lower Register */
@@ -434,16 +469,23 @@
 #define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
 #define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
-#define   SRR1_WAKERESET	0x00380000 /* System reset */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEMT		0x00280000 /* mtctrl */
+#define	  SRR1_WAKEHMI		0x00280000 /* Hypervisor maintenance */
 #define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
 #define   SRR1_WAKETHERM	0x00100000 /* Thermal management interrupt */
+#define	  SRR1_WAKERESET	0x00100000 /* System reset */
+#define	  SRR1_WAKESTATE	0x00030000 /* Powersave exit mask [46:47] */
+#define	  SRR1_WS_DEEPEST	0x00030000 /* Some resources not maintained,
+					  * may not be recoverable */
+#define	  SRR1_WS_DEEPER	0x00020000 /* Some resources not maintained */
+#define	  SRR1_WS_DEEP		0x00010000 /* All resources maintained */
 #define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
 #define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
 #define   SRR1_PROGTRAP		0x00020000 /* Trap */
 #define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
+
 #define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
 #define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
 
@@ -894,6 +936,8 @@
 #define PV_POWER5p	0x003B
 #define PV_POWER7	0x003F
 #define PV_970FX	0x003C
+#define PV_POWER6	0x003E
+#define PV_POWER7	0x003F
 #define PV_630		0x0040
 #define PV_630p	0x0041
 #define PV_970MP	0x0044
-- 
cgit v1.2.3


From f6e17f9b0bf172a5813dfef0c03d0a25ba83b0de Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 4 Mar 2011 18:25:55 +1100
Subject: powerpc/xics: Make sure we have a sensible default distribution
 server

Even when nothing is specified in the device tree, and despite the
fact that we don't setup links properly yet, we still need a reasonable
value in there or some interrupts won't be setup properly to point to
an existing processor.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/xics/xics-common.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index a2be84de5237..e70175dfe322 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -57,7 +57,9 @@ void xics_update_irq_servers(void)
 	BUG_ON(!np);
 
 	hcpuid = get_hard_smp_processor_id(boot_cpuid);
-	xics_default_server = hcpuid;
+	xics_default_server = xics_default_distrib_server = hcpuid;
+
+	pr_devel("xics: xics_default_server = 0x%x\n", xics_default_server);
 
 	ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
 	if (!ireg) {
@@ -75,9 +77,11 @@ void xics_update_irq_servers(void)
 	for (j = 0; j < i; j += 2) {
 		if (ireg[j] == hcpuid) {
 			xics_default_distrib_server = ireg[j+1];
+			break;
 		}
 	}
-
+	pr_devel("xics: xics_default_distrib_server = 0x%x\n",
+		 xics_default_distrib_server);
 	of_node_put(np);
 }
 
@@ -113,7 +117,7 @@ void xics_mask_unknown_vec(unsigned int vec)
 {
 	struct ics *ics;
 
-	pr_err("Interrupt %u (real) is invalid, disabling it.\n", vec);
+	pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec);
 
 	list_for_each_entry(ics, &ics_list, link)
 		ics->mask_unknown(ics, vec);
@@ -293,6 +297,8 @@ unlock:
  * If not we set it to the first cpu in the mask, even if multiple cpus
  * are set. This is so things like irqbalance (which set core and package
  * wide affinities) do the right thing.
+ *
+ * We need to fix this to implement support for the links
  */
 int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 			unsigned int strict_check)
-- 
cgit v1.2.3


From 24cc67de62eebbda3ce0c46bdd56582c00dccd03 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 20 Jan 2011 18:50:55 +1100
Subject: powerpc: Define CPU feature for Architected 2.06 HV mode

This bit indicates that we are operating in hypervisor mode on a CPU
compliant to architecture 2.06 or later (currently server only).

We set it on POWER7 and have a boot-time CPU setup function that
clears it if MSR:HV isn't set (booting under a hypervisor).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h    |  3 +-
 arch/powerpc/kernel/Makefile           |  1 +
 arch/powerpc/kernel/cpu_setup_power7.S | 65 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/cputable.c         |  6 ++++
 4 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/cpu_setup_power7.S

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 1833d1a07e79..2fe37d781933 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -181,6 +181,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_SLB			LONG_ASM_CONST(0x0000000100000000)
 #define CPU_FTR_16M_PAGE		LONG_ASM_CONST(0x0000000200000000)
 #define CPU_FTR_TLBIEL			LONG_ASM_CONST(0x0000000400000000)
+#define CPU_FTR_HVMODE_206		LONG_ASM_CONST(0x0000000800000000)
 #define CPU_FTR_IABR			LONG_ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
@@ -418,7 +419,7 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
 #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3bb2a3e6a337..7c6eb4974f25 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   paca.o nvram_64.o firmware.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power7.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
new file mode 100644
index 000000000000..f2b317817c4e
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -0,0 +1,65 @@
+/*
+ * This file contains low level CPU setup functions.
+ *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Entry: r3 = crap, r4 = ptr to cputable entry
+ *
+ * Note that we can be called twice for pseudo-PVRs
+ */
+_GLOBAL(__setup_cpu_power7)
+	mflr	r11
+	bl	__init_hvmode_206
+	mtlr	r11
+	beqlr
+	bl	__init_LPCR
+	mtlr	r11
+	blr
+
+_GLOBAL(__restore_cpu_power7)
+	mflr	r11
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	beqlr
+	bl	__init_LPCR
+	mtlr	r11
+	blr
+
+__init_hvmode_206:
+	/* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */
+	mfmsr	r3
+	rldicl.	r0,r3,4,63
+	bnelr
+	ld	r5,CPU_SPEC_FEATURES(r4)
+	LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206)
+	xor	r5,r5,r6
+	std	r5,CPU_SPEC_FEATURES(r4)
+	blr
+
+__init_LPCR:
+	/* Setup a sane LPCR:
+	 *
+	 *   LPES = 0b11 (SRR0/1 used for 0x500)
+	 *   PECE = 0b111
+	 *
+	 * Other bits untouched for now
+	 */
+	mfspr	r3,SPRN_LPCR
+	ori	r3,r3,(LPCR_LPES0|LPCR_LPES1)
+	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+	mtspr	SPRN_LPCR,r3
+	isync
+	blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b9602ee06deb..b65b4908d3c7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -423,6 +423,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.dcache_bsize		= 128,
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7 */
@@ -439,6 +441,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.pmc_type		= PPC_PMC_IBM,
 		.oprofile_cpu_type	= "ppc64/power7",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7+ */
@@ -455,6 +459,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.pmc_type		= PPC_PMC_IBM,
 		.oprofile_cpu_type	= "ppc64/power7",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7+",
 	},
 	{	/* Cell Broadband Engine */
-- 
cgit v1.2.3


From 2dd60d79e0202628a47af9812a84d502cc63628c Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 20 Jan 2011 17:50:21 +1100
Subject: powerpc: In HV mode, use HSPRG0 for PACA

When running in Hypervisor mode (arch 2.06 or later), we store the PACA
in HSPRG0 instead of SPRG1. The architecture specifies that SPRGs may be
lost during a "nap" power management operation (though they aren't
currently on POWER7) and this enables use of SPRG1 by KVM guests.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h |  6 +++---
 arch/powerpc/include/asm/reg.h           | 27 ++++++++++++++++++++++++++-
 arch/powerpc/kernel/entry_64.S           |  4 ++--
 arch/powerpc/kernel/exceptions-64s.S     |  8 ++++----
 arch/powerpc/kernel/head_64.S            |  4 ++--
 arch/powerpc/kernel/paca.c               | 13 ++++++++++++-
 arch/powerpc/kvm/book3s_rmhandlers.S     |  4 +---
 7 files changed, 50 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 7778d6f0c878..337b6fa2f8cd 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -56,8 +56,8 @@
 #define LOAD_HANDLER(reg, label)					\
 	addi	reg,reg,(label)-_stext;	/* virt addr of handler ... */
 
-#define EXCEPTION_PROLOG_1(area)				\
-	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
+#define EXCEPTION_PROLOG_1(area)					\
+	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
 	std	r10,area+EX_R10(r13);					\
 	std	r11,area+EX_R11(r13);					\
@@ -174,7 +174,7 @@ label##_pSeries:							\
 	HMT_MEDIUM;							\
 	DO_KVM	n;							\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	mfspr	r13,SPRN_SPRG_PACA;	/* get paca address into r13 */	\
+	GET_PACA(r13);							\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
 	lbz	r10,PACASOFTIRQEN(r13);					\
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 6eb1d77edb4b..13429a0eba09 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -715,12 +715,15 @@
  * SPRG usage:
  *
  * All 64-bit:
- *	- SPRG1 stores PACA pointer
+ *	- SPRG1 stores PACA pointer except 64-bit server in
+ *        HV mode in which case it is HSPRG0
  *
  * 64-bit server:
  *	- SPRG0 unused (reserved for HV on Power4)
  *	- SPRG2 scratch for exception vectors
  *	- SPRG3 unused (user visible)
+ *      - HSPRG0 stores PACA in HV mode
+ *      - HSPRG1 scratch for "HV" exceptions
  *
  * 64-bit embedded
  *	- SPRG0 generic exception scratch
@@ -783,6 +786,22 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG2
+#define SPRN_SPRG_HPACA		SPRN_HSPRG0
+#define SPRN_SPRG_HSCRATCH0	SPRN_HSPRG1
+
+#define GET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_PACA;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HPACA;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_PACA(rX)					\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_PACA,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HPACA,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E_64
@@ -792,6 +811,10 @@
 #define SPRN_SPRG_TLB_EXFRAME	SPRN_SPRG2
 #define SPRN_SPRG_TLB_SCRATCH	SPRN_SPRG6
 #define SPRN_SPRG_GEN_SCRATCH	SPRN_SPRG0
+
+#define SET_PACA(rX)	mtspr	SPRN_SPRG_PACA,rX
+#define GET_PACA(rX)	mfspr	rX,SPRN_SPRG_PACA
+
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -842,6 +865,8 @@
 #define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
 #endif
 
+
+
 /*
  * An mtfsf instruction with the L bit set. On CPUs that support this a
  * full 64bits of FPSCR is restored and on other CPUs the L bit is ignored.
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d82878c4daa6..dbf5bfafd7bc 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -838,7 +838,7 @@ _GLOBAL(enter_rtas)
 
 _STATIC(rtas_return_loc)
 	/* relocation is off at this point */
-	mfspr	r4,SPRN_SPRG_PACA	/* Get PACA */
+	GET_PACA(r4)
 	clrldi	r4,r4,2			/* convert to realmode address */
 
 	bcl	20,31,$+4
@@ -869,7 +869,7 @@ _STATIC(rtas_restore_regs)
 	REST_8GPRS(14, r1)		/* Restore the non-volatiles */
 	REST_10GPRS(22, r1)		/* ditto */
 
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 
 	ld	r4,_CCR(r1)
 	mtcr	r4
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index aeb739e18769..6784bf7090f6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -53,7 +53,7 @@ data_access_pSeries:
 	DO_KVM	0x300
 	mtspr	SPRN_SPRG_SCRATCH0,r13
 BEGIN_FTR_SECTION
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 	std	r9,PACA_EXSLB+EX_R9(r13)
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	mfspr	r10,SPRN_DAR
@@ -82,7 +82,7 @@ data_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x380
 	mtspr	SPRN_SPRG_SCRATCH0,r13
-	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
+	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_DAR
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
@@ -121,7 +121,7 @@ instruction_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x480
 	mtspr	SPRN_SPRG_SCRATCH0,r13
-	mfspr	r13,SPRN_SPRG_PACA		/* get paca address into r13 */
+	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
 	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
@@ -165,7 +165,7 @@ BEGIN_FTR_SECTION
 	beq-	1f
 END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	mr	r9,r13
-	mfspr	r13,SPRN_SPRG_PACA
+	GET_PACA(r13)
 	mfspr	r11,SPRN_SRR0
 	ld	r12,PACAKBASE(r13)
 	ld	r10,PACAKMSR(r13)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3a319f9c9d3e..39a40400f3f2 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -228,7 +228,7 @@ generic_secondary_common_init:
 	mr	r3,r24			/* not found, copy phys to r3	 */
 	b	.kexec_wait		/* next kernel might do better	 */
 
-2:	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG */
+2:	SET_PACA(r13)
 #ifdef CONFIG_PPC_BOOK3E
 	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
 	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
@@ -534,7 +534,7 @@ _GLOBAL(pmac_secondary_start)
 	ld	r4,0(r4)		/* Get base vaddr of paca array	*/
 	mulli	r13,r24,PACA_SIZE	/* Calculate vaddr of right paca */
 	add	r13,r13,r4		/* for this processor.		*/
-	mtspr	SPRN_SPRG_PACA,r13	/* Save vaddr of paca in an SPRG*/
+	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
 
 	/* Mark interrupts soft and hard disabled (they might be enabled
 	 * in the PACA when doing hotplug)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 10f0aadee95b..102244edecf0 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -156,11 +156,22 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 /* Put the paca pointer into r13 and SPRG_PACA */
 void setup_paca(struct paca_struct *new_paca)
 {
+	/* Setup r13 */
 	local_paca = new_paca;
-	mtspr(SPRN_SPRG_PACA, local_paca);
+
 #ifdef CONFIG_PPC_BOOK3E
+	/* On Book3E, initialize the TLB miss exception frames */
 	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+	/* In HV mode, we setup both HPACA and PACA to avoid problems
+	 * if we do a GET_PACA() before the feature fixups have been
+	 * applied
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE_206))
+		mtspr(SPRN_SPRG_HPACA, local_paca);
 #endif
+	mtspr(SPRN_SPRG_PACA, local_paca);
+
 }
 
 static int __initdata paca_size;
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 2b9c9088d00e..b0ff5ff76e25 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -35,9 +35,7 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
-#define LOAD_SHADOW_VCPU(reg)				\
-	mfspr	reg, SPRN_SPRG_PACA
-
+#define LOAD_SHADOW_VCPU(reg)	GET_PACA(reg)					
 #define SHADOW_VCPU_OFF		PACA_KVM_SVCPU
 #define MSR_NOIRQ		MSR_KERNEL & ~(MSR_IR | MSR_DR)
 #define FUNC(name) 		GLUE(.,name)
-- 
cgit v1.2.3


From a5d4f3ad3a28cf046836b9bfae61d532b8f77036 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 5 Apr 2011 14:20:31 +1000
Subject: powerpc: Base support for exceptions using HSRR0/1

Pass the register type to the prolog, also provides alternate "HV"
version of hardware interrupt (0x500) and adjust LPES accordingly

We tag those interrupts by setting bit 0x2 in the trap number

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h   | 65 +++++++++++++++---------------
 arch/powerpc/include/asm/kvm_asm.h         |  1 +
 arch/powerpc/include/asm/kvm_book3s_asm.h  |  1 +
 arch/powerpc/kernel/cpu_setup_power7.S     |  3 +-
 arch/powerpc/kernel/exceptions-64s.S       | 48 +++++++++++++++++-----
 arch/powerpc/kvm/book3s_rmhandlers.S       |  1 +
 arch/powerpc/kvm/book3s_segment.S          | 10 ++++-
 arch/powerpc/platforms/iseries/exception.S |  2 +-
 arch/powerpc/platforms/iseries/exception.h |  4 +-
 9 files changed, 86 insertions(+), 49 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 337b6fa2f8cd..1d98e05be511 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -56,30 +56,37 @@
 #define LOAD_HANDLER(reg, label)					\
 	addi	reg,reg,(label)-_stext;	/* virt addr of handler ... */
 
-#define EXCEPTION_PROLOG_1(area)					\
+/* Exception register prefixes */
+#define EXC_HV	H
+#define EXC_STD
+
+#define __EXCEPTION_PROLOG_1(area, h)					\
 	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
 	std	r10,area+EX_R10(r13);					\
 	std	r11,area+EX_R11(r13);					\
 	std	r12,area+EX_R12(r13);					\
-	mfspr	r9,SPRN_SPRG_SCRATCH0;					\
+	mfspr	r9,SPRN_SPRG_##h##SCRATCH0;				\
 	std	r9,area+EX_R13(r13);					\
 	mfcr	r9
+#define EXCEPTION_PROLOG_1(area, h) __EXCEPTION_PROLOG_1(area, h)
 
-#define EXCEPTION_PROLOG_PSERIES_1(label)				\
+#define __EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	LOAD_HANDLER(r12,label)						\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
+	mtspr	SPRN_##h##SRR0,r12;					\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	mtspr	SPRN_##h##SRR1,r10;					\
+	h##rfid;							\
 	b	.	/* prevent speculative execution */
+#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
+	__EXCEPTION_PROLOG_PSERIES_1(label, h)
 
-#define EXCEPTION_PROLOG_PSERIES(area, label)				\
-	EXCEPTION_PROLOG_1(area);					\
-	EXCEPTION_PROLOG_PSERIES_1(label);
+#define EXCEPTION_PROLOG_PSERIES(area, label, h)			\
+	EXCEPTION_PROLOG_1(area, h);					\
+	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
 /*
  * The common exception prolog is used for all except a few exceptions
@@ -150,50 +157,44 @@ label##_pSeries:					\
 	HMT_MEDIUM;					\
 	DO_KVM	n;					\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD)
 
 #define HSTD_EXCEPTION_PSERIES(n, label)		\
 	. = n;						\
 	.globl label##_pSeries;				\
 label##_pSeries:					\
 	HMT_MEDIUM;					\
-	mtspr	SPRN_SPRG_SCRATCH0,r20;	/* save r20 */	\
-	mfspr	r20,SPRN_HSRR0;		/* copy HSRR0 to SRR0 */ \
-	mtspr	SPRN_SRR0,r20;				\
-	mfspr	r20,SPRN_HSRR1;		/* copy HSRR0 to SRR0 */ \
-	mtspr	SPRN_SRR1,r20;				\
-	mfspr	r20,SPRN_SPRG_SCRATCH0;	/* restore r20 */ \
-	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+	DO_KVM	n;					\
+	mtspr	SPRN_SPRG_HSCRATCH0,r13;/* save r13 */	\
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV)
 
 
-#define MASKABLE_EXCEPTION_PSERIES(n, label)				\
-	. = n;								\
-	.globl label##_pSeries;						\
-label##_pSeries:							\
+#define __MASKABLE_EXCEPTION_PSERIES(n, label, h)			\
 	HMT_MEDIUM;							\
 	DO_KVM	n;							\
-	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
+	mtspr	SPRN_SPRG_##h##SCRATCH0,r13;    /* save r13 */		\
 	GET_PACA(r13);							\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
 	lbz	r10,PACASOFTIRQEN(r13);					\
 	mfcr	r9;							\
 	cmpwi	r10,0;							\
-	beq	masked_interrupt;					\
-	mfspr	r10,SPRN_SPRG_SCRATCH0;					\
+	beq	masked_##h##interrupt;					\
+	mfspr	r10,SPRN_SPRG_##h##SCRATCH0;				\
 	std	r10,PACA_EXGEN+EX_R13(r13);				\
 	std	r11,PACA_EXGEN+EX_R11(r13);				\
 	std	r12,PACA_EXGEN+EX_R12(r13);				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
 	ld	r10,PACAKMSR(r13);	/* get MSR value for kernel */	\
-	mfspr	r11,SPRN_SRR0;		/* save SRR0 */			\
+	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\
 	LOAD_HANDLER(r12,label##_common)				\
-	mtspr	SPRN_SRR0,r12;						\
-	mfspr	r12,SPRN_SRR1;		/* and SRR1 */			\
-	mtspr	SPRN_SRR1,r10;						\
-	rfid;								\
+	mtspr	SPRN_##h##SRR0,r12;					\
+	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\
+	mtspr	SPRN_##h##SRR1,r10;					\
+	h##rfid;							\
 	b	.	/* prevent speculative execution */
+#define MASKABLE_EXCEPTION_PSERIES(n, label, h)				\
+	__MASKABLE_EXCEPTION_PSERIES(n, label, h)
 
 #ifdef CONFIG_PPC_ISERIES
 #define DISABLE_INTS				\
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 5b7504674397..0951b17f4eb5 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -59,6 +59,7 @@
 #define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
 #define BOOK3S_INTERRUPT_EXTERNAL	0x500
 #define BOOK3S_INTERRUPT_EXTERNAL_LEVEL	0x501
+#define BOOK3S_INTERRUPT_EXTERNAL_HV	0x502
 #define BOOK3S_INTERRUPT_ALIGNMENT	0x600
 #define BOOK3S_INTERRUPT_PROGRAM	0x700
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 36fdb3aff30b..d5a8a3861635 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -34,6 +34,7 @@
 	    (\intno == BOOK3S_INTERRUPT_DATA_SEGMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_INST_SEGMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_EXTERNAL) || \
+	    (\intno == BOOK3S_INTERRUPT_EXTERNAL_HV) || \
 	    (\intno == BOOK3S_INTERRUPT_ALIGNMENT) || \
 	    (\intno == BOOK3S_INTERRUPT_PROGRAM) || \
 	    (\intno == BOOK3S_INTERRUPT_FP_UNAVAIL) || \
diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
index f2b317817c4e..e801ef15d6d0 100644
--- a/arch/powerpc/kernel/cpu_setup_power7.S
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -52,13 +52,14 @@ __init_hvmode_206:
 __init_LPCR:
 	/* Setup a sane LPCR:
 	 *
-	 *   LPES = 0b11 (SRR0/1 used for 0x500)
+	 *   LPES = 0b01 (HSRR0/1 used for 0x500)
 	 *   PECE = 0b111
 	 *
 	 * Other bits untouched for now
 	 */
 	mfspr	r3,SPRN_LPCR
 	ori	r3,r3,(LPCR_LPES0|LPCR_LPES1)
+	xori	r3,r3, LPCR_LPES0
 	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
 	mtspr	SPRN_LPCR,r3
 	isync
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6784bf7090f6..17f1d6670635 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -44,7 +44,7 @@ _machine_check_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x200
 	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 	. = 0x300
 	.globl data_access_pSeries
@@ -71,9 +71,9 @@ BEGIN_FTR_SECTION
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(data_access_common)
+	EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD)
 FTR_SECTION_ELSE
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD)
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
 
 	. = 0x380
@@ -147,11 +147,24 @@ instruction_access_slb_pSeries:
 	bctr
 #endif
 
-	MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)
+	. = 0x500;
+	.globl hardware_interrupt_pSeries
+hardware_interrupt_pSeries:
+	BEGIN_FTR_SECTION
+	MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD)
+	FTR_SECTION_ELSE
+	MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV)
+	ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206)
+
 	STD_EXCEPTION_PSERIES(0x600, alignment)
 	STD_EXCEPTION_PSERIES(0x700, program_check)
 	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
-	MASKABLE_EXCEPTION_PSERIES(0x900, decrementer)
+
+	. = 0x900;
+	.globl decrementer_pSeries
+decrementer_pSeries:
+	MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD)
+
 	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
 	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
 
@@ -207,15 +220,15 @@ vsx_unavailable_pSeries_1:
 	b	vsx_unavailable_pSeries
 
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
+	HSTD_EXCEPTION_PSERIES(0x1202, cbe_system_error)
 #endif /* CONFIG_CBE_RAS */
 	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
+	HSTD_EXCEPTION_PSERIES(0x1602, cbe_maintenance)
 #endif /* CONFIG_CBE_RAS */
 	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
+	HSTD_EXCEPTION_PSERIES(0x1802, cbe_thermal)
 #endif /* CONFIG_CBE_RAS */
 
 	. = 0x3000
@@ -244,13 +257,26 @@ masked_interrupt:
 	rfid
 	b	.
 
+masked_Hinterrupt:
+	stb	r10,PACAHARDIRQEN(r13)
+	mtcrf	0x80,r9
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	mfspr	r10,SPRN_HSRR1
+	rldicl	r10,r10,48,1		/* clear MSR_EE */
+	rotldi	r10,r10,16
+	mtspr	SPRN_HSRR1,r10
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	mfspr	r13,SPRN_SPRG_HSCRATCH0
+	hrfid
+	b	.
+
 	.align	7
 do_stab_bolted_pSeries:
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
 	mfspr	r10,SPRN_SPRG_SCRATCH0
 	std	r10,PACA_EXSLB+EX_R13(r13)
-	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted)
+	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
 
 #ifdef CONFIG_PPC_PSERIES
 /*
@@ -261,14 +287,14 @@ do_stab_bolted_pSeries:
 system_reset_fwnmi:
 	HMT_MEDIUM
 	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	.globl machine_check_fwnmi
       .align 7
 machine_check_fwnmi:
 	HMT_MEDIUM
 	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
-	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 #endif /* CONFIG_PPC_PSERIES */
 
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index b0ff5ff76e25..046e1f3d4432 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -112,6 +112,7 @@ INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
+INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL_HV
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 7c52ed0b7051..d842795d0f23 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -155,9 +155,15 @@ kvmppc_handler_trampoline_exit:
 	PPC_LL	r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
 
 	/* Save guest PC and MSR */
-	mfsrr0	r3
+	andi.	r0,r12,0x2
+	beq	1f
+	mfspr	r3,SPRN_HSRR0
+	mfspr	r4,SPRN_HSRR1
+	andi.	r12,r12,0x3ffd
+	b	2f
+1:	mfsrr0	r3
 	mfsrr1	r4
-
+2:
 	PPC_STL	r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
 	PPC_STL	r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
 
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index 32a56c6dfa72..f7a487231a11 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -155,7 +155,7 @@ BEGIN_FTR_SECTION
 	std	r12,PACA_EXGEN+EX_R13(r13)
 	EXCEPTION_PROLOG_ISERIES_1
 FTR_SECTION_ELSE
-	EXCEPTION_PROLOG_1(PACA_EXGEN)
+	EXCEPTION_PROLOG_1(PACA_EXGEN, EXC_STD)
 	EXCEPTION_PROLOG_ISERIES_1
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
 	b	data_access_common
diff --git a/arch/powerpc/platforms/iseries/exception.h b/arch/powerpc/platforms/iseries/exception.h
index bae3fba5ad8e..57127d805fe3 100644
--- a/arch/powerpc/platforms/iseries/exception.h
+++ b/arch/powerpc/platforms/iseries/exception.h
@@ -39,7 +39,7 @@
 label##_iSeries:							\
 	HMT_MEDIUM;							\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	EXCEPTION_PROLOG_1(area);					\
+	EXCEPTION_PROLOG_1(area, EXC_STD);				\
 	EXCEPTION_PROLOG_ISERIES_1;					\
 	b	label##_common
 
@@ -48,7 +48,7 @@ label##_iSeries:							\
 label##_iSeries:							\
 	HMT_MEDIUM;							\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	EXCEPTION_PROLOG_1(PACA_EXGEN);					\
+	EXCEPTION_PROLOG_1(PACA_EXGEN, EXC_STD);			\
 	lbz	r10,PACASOFTIRQEN(r13);					\
 	cmpwi	0,r10,0;						\
 	beq-	label##_iSeries_masked;					\
-- 
cgit v1.2.3


From b3e6b5dfcf0974069a8ddcce7dd071120d20d79c Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 5 Apr 2011 14:27:11 +1000
Subject: powerpc: More work to support HV exceptions

Rework exception macros a bit to split offset from vector and add
some basic support for HDEC, HDSI, HISI and a few more.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h  | 37 ++++++++-----
 arch/powerpc/include/asm/feature-fixups.h |  2 +-
 arch/powerpc/kernel/exceptions-64s.S      | 92 +++++++++++++++++++++----------
 3 files changed, 89 insertions(+), 42 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 1d98e05be511..fb5b0af30fcf 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -150,28 +150,27 @@
 /*
  * Exception vectors.
  */
-#define STD_EXCEPTION_PSERIES(n, label)			\
-	. = n;						\
+#define STD_EXCEPTION_PSERIES(loc, vec, label)		\
+	. = loc;					\
 	.globl label##_pSeries;				\
 label##_pSeries:					\
 	HMT_MEDIUM;					\
-	DO_KVM	n;					\
+	DO_KVM	vec;					\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD)
 
-#define HSTD_EXCEPTION_PSERIES(n, label)		\
-	. = n;						\
-	.globl label##_pSeries;				\
-label##_pSeries:					\
+#define STD_EXCEPTION_HV(loc, vec, label)		\
+	. = loc;					\
+	.globl label##_hv;				\
+label##_hv:						\
 	HMT_MEDIUM;					\
-	DO_KVM	n;					\
+	DO_KVM	vec;					\
 	mtspr	SPRN_SPRG_HSCRATCH0,r13;/* save r13 */	\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV)
 
-
-#define __MASKABLE_EXCEPTION_PSERIES(n, label, h)			\
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
 	HMT_MEDIUM;							\
-	DO_KVM	n;							\
+	DO_KVM	vec;							\
 	mtspr	SPRN_SPRG_##h##SCRATCH0,r13;    /* save r13 */		\
 	GET_PACA(r13);							\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
@@ -193,8 +192,20 @@ label##_pSeries:					\
 	mtspr	SPRN_##h##SRR1,r10;					\
 	h##rfid;							\
 	b	.	/* prevent speculative execution */
-#define MASKABLE_EXCEPTION_PSERIES(n, label, h)				\
-	__MASKABLE_EXCEPTION_PSERIES(n, label, h)
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
+	__MASKABLE_EXCEPTION_PSERIES(vec, label, h)
+
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label)			\
+	. = loc;							\
+	.globl label##_pSeries;						\
+label##_pSeries:							\
+	_MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_STD)
+
+#define MASKABLE_EXCEPTION_HV(loc, vec, label)				\
+	. = loc;							\
+	.globl label##_hv;						\
+label##_hv:								\
+	_MASKABLE_EXCEPTION_PSERIES(vec, label, EXC_HV)
 
 #ifdef CONFIG_PPC_ISERIES
 #define DISABLE_INTS				\
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 921a8470e18a..bdc0d6877bce 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -49,7 +49,7 @@ label##5:							\
 	FTR_ENTRY_OFFSET label##2b-label##5b;			\
 	FTR_ENTRY_OFFSET label##3b-label##5b;			\
 	FTR_ENTRY_OFFSET label##4b-label##5b;			\
-	.ifgt (label##4b-label##3b)-(label##2b-label##1b);	\
+	.ifgt (label##4b- label##3b)-(label##2b- label##1b);	\
 	.error "Feature section else case larger than body";	\
 	.endif;							\
 	.popsection;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 17f1d6670635..805e20657868 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -37,7 +37,7 @@
 	.globl __start_interrupts
 __start_interrupts:
 
-	STD_EXCEPTION_PSERIES(0x100, system_reset)
+	STD_EXCEPTION_PSERIES(0x100, 0x100, system_reset)
 
 	. = 0x200
 _machine_check_pSeries:
@@ -113,7 +113,7 @@ data_access_slb_pSeries:
 	bctr
 #endif
 
-	STD_EXCEPTION_PSERIES(0x400, instruction_access)
+	STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
 
 	. = 0x480
 	.globl instruction_access_slb_pSeries
@@ -147,26 +147,29 @@ instruction_access_slb_pSeries:
 	bctr
 #endif
 
+	/* We open code these as we can't have a ". = x" (even with
+	 * x = "." within a feature section
+	 */
 	. = 0x500;
-	.globl hardware_interrupt_pSeries
+	.globl hardware_interrupt_pSeries;
+	.globl hardware_interrupt_hv;
 hardware_interrupt_pSeries:
+hardware_interrupt_hv:
 	BEGIN_FTR_SECTION
-	MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD)
+		_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD)
 	FTR_SECTION_ELSE
-	MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV)
+		_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV)
 	ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206)
 
-	STD_EXCEPTION_PSERIES(0x600, alignment)
-	STD_EXCEPTION_PSERIES(0x700, program_check)
-	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
+	STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+	STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+	STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
 
-	. = 0x900;
-	.globl decrementer_pSeries
-decrementer_pSeries:
-	MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD)
+	MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
+	MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer)
 
-	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
-	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
+	STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
+	STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
 
 	. = 0xc00
 	.globl	system_call_pSeries
@@ -196,8 +199,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	rfid		/* return to userspace */
 	b	.
 
-	STD_EXCEPTION_PSERIES(0xd00, single_step)
-	STD_EXCEPTION_PSERIES(0xe00, trap_0e)
+	STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+
+	/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
+	 * out of line to handle them
+	 */
+	. = 0xe00
+	b	h_data_storage_hv
+	. = 0xe20
+	b	h_instr_storage_hv
+	. = 0xe40
+	b	emulation_assist_hv
+	. = 0xe50
+	b	hmi_exception_hv
+	. = 0xe60
+	b	hmi_exception_hv
 
 	/* We need to deal with the Altivec unavailable exception
 	 * here which is at 0xf20, thus in the middle of the
@@ -206,39 +222,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	 */
 performance_monitor_pSeries_1:
 	. = 0xf00
-	DO_KVM	0xf00
 	b	performance_monitor_pSeries
 
 altivec_unavailable_pSeries_1:
 	. = 0xf20
-	DO_KVM	0xf20
 	b	altivec_unavailable_pSeries
 
 vsx_unavailable_pSeries_1:
 	. = 0xf40
-	DO_KVM	0xf40
 	b	vsx_unavailable_pSeries
 
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1202, cbe_system_error)
+	STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
 #endif /* CONFIG_CBE_RAS */
-	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
+	STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1602, cbe_maintenance)
+	STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
 #endif /* CONFIG_CBE_RAS */
-	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
+	STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
 #ifdef CONFIG_CBE_RAS
-	HSTD_EXCEPTION_PSERIES(0x1802, cbe_thermal)
+	STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
 #endif /* CONFIG_CBE_RAS */
 
 	. = 0x3000
 
-/*** pSeries interrupt support ***/
+/*** Out of line interrupts support ***/
+
+	/* moved from 0xe00 */
+	STD_EXCEPTION_HV(., 0xe00, h_data_storage)
+	STD_EXCEPTION_HV(., 0xe20, h_instr_storage)
+	STD_EXCEPTION_HV(., 0xe40, emulation_assist)
+	STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */
 
 	/* moved from 0xf00 */
-	STD_EXCEPTION_PSERIES(., performance_monitor)
-	STD_EXCEPTION_PSERIES(., altivec_unavailable)
-	STD_EXCEPTION_PSERIES(., vsx_unavailable)
+	STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor)
+	STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable)
+	STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable)
 
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -368,6 +387,8 @@ machine_check_common:
 	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
 	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
 	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
+        STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception)
+        STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception)
 	STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
 	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
 #ifdef CONFIG_ALTIVEC
@@ -445,6 +466,19 @@ data_access_common:
 	li	r5,0x300
 	b	.do_hash_page	 	/* Try to handle as hpte fault */
 
+	.align  7
+        .globl  h_data_storage_common
+h_data_storage_common:
+        mfspr   r10,SPRN_HDAR
+        std     r10,PACA_EXGEN+EX_DAR(r13)
+        mfspr   r10,SPRN_HDSISR
+        stw     r10,PACA_EXGEN+EX_DSISR(r13)
+        EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+        bl      .save_nvgprs
+        addi    r3,r1,STACK_FRAME_OVERHEAD
+        bl      .unknown_exception
+        b       .ret_from_except
+
 	.align	7
 	.globl instruction_access_common
 instruction_access_common:
@@ -454,6 +488,8 @@ instruction_access_common:
 	li	r5,0x400
 	b	.do_hash_page		/* Try to handle as hpte fault */
 
+        STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception)
+
 /*
  * Here is the common SLB miss user that is used when going to virtual
  * mode for SLB misses, that is currently not used
-- 
cgit v1.2.3


From 673b189a2e3353061fa8c49515d1014dab6ad9b9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 5 Apr 2011 13:59:58 +1000
Subject: powerpc: Always use SPRN_SPRG_HSCRATCH0 when running in HV mode

This uses feature sections to arrange that we always use HSPRG1
as the scratch register in the interrupt entry code rather than
SPRG2 when we're running in hypervisor mode on POWER7.  This will
ensure that we don't trash the guest's SPRG2 when we are running
KVM guests.  To simplify the code, we define GET_SCRATCH0() and
SET_SCRATCH0() macros like the GET_PACA/SET_PACA macros.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h   | 15 +++++++--------
 arch/powerpc/include/asm/reg.h             | 14 ++++++++++++++
 arch/powerpc/kernel/exceptions-64s.S       | 26 +++++++++++++-------------
 arch/powerpc/kvm/book3s_rmhandlers.S       |  6 +++---
 arch/powerpc/kvm/book3s_segment.S          |  2 +-
 arch/powerpc/platforms/iseries/exception.S |  2 +-
 arch/powerpc/platforms/iseries/exception.h |  4 ++--
 7 files changed, 41 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index fb5b0af30fcf..d6b4849df9b1 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -60,16 +60,15 @@
 #define EXC_HV	H
 #define EXC_STD
 
-#define __EXCEPTION_PROLOG_1(area, h)					\
+#define EXCEPTION_PROLOG_1(area)					\
 	GET_PACA(r13);							\
 	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
 	std	r10,area+EX_R10(r13);					\
 	std	r11,area+EX_R11(r13);					\
 	std	r12,area+EX_R12(r13);					\
-	mfspr	r9,SPRN_SPRG_##h##SCRATCH0;				\
+	GET_SCRATCH0(r9);						\
 	std	r9,area+EX_R13(r13);					\
 	mfcr	r9
-#define EXCEPTION_PROLOG_1(area, h) __EXCEPTION_PROLOG_1(area, h)
 
 #define __EXCEPTION_PROLOG_PSERIES_1(label, h)				\
 	ld	r12,PACAKBASE(r13);	/* get high part of &label */	\
@@ -85,7 +84,7 @@
 	__EXCEPTION_PROLOG_PSERIES_1(label, h)
 
 #define EXCEPTION_PROLOG_PSERIES(area, label, h)			\
-	EXCEPTION_PROLOG_1(area, h);					\
+	EXCEPTION_PROLOG_1(area);					\
 	EXCEPTION_PROLOG_PSERIES_1(label, h);
 
 /*
@@ -156,7 +155,7 @@
 label##_pSeries:					\
 	HMT_MEDIUM;					\
 	DO_KVM	vec;					\
-	mtspr	SPRN_SPRG_SCRATCH0,r13;		/* save r13 */	\
+	SET_SCRATCH0(r13);		/* save r13 */		\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_STD)
 
 #define STD_EXCEPTION_HV(loc, vec, label)		\
@@ -165,13 +164,13 @@ label##_pSeries:					\
 label##_hv:						\
 	HMT_MEDIUM;					\
 	DO_KVM	vec;					\
-	mtspr	SPRN_SPRG_HSCRATCH0,r13;/* save r13 */	\
+	SET_SCRATCH0(r13);	/* save r13 */		\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, EXC_HV)
 
 #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h)			\
 	HMT_MEDIUM;							\
 	DO_KVM	vec;							\
-	mtspr	SPRN_SPRG_##h##SCRATCH0,r13;    /* save r13 */		\
+	SET_SCRATCH0(r13);    /* save r13 */				\
 	GET_PACA(r13);							\
 	std	r9,PACA_EXGEN+EX_R9(r13);	/* save r9, r10 */	\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
@@ -179,7 +178,7 @@ label##_hv:						\
 	mfcr	r9;							\
 	cmpwi	r10,0;							\
 	beq	masked_##h##interrupt;					\
-	mfspr	r10,SPRN_SPRG_##h##SCRATCH0;				\
+	GET_SCRATCH0(r10);						\
 	std	r10,PACA_EXGEN+EX_R13(r13);				\
 	std	r11,PACA_EXGEN+EX_R11(r13);				\
 	std	r12,PACA_EXGEN+EX_R12(r13);				\
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 13429a0eba09..76d7d5fea5be 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -802,6 +802,20 @@
 	FTR_SECTION_ELSE_NESTED(66);			\
 	mtspr	SPRN_SPRG_HPACA,rX;			\
 	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define GET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_SCRATCH0;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mfspr	rX,SPRN_SPRG_HSCRATCH0;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#define SET_SCRATCH0(rX)				\
+	BEGIN_FTR_SECTION_NESTED(66);			\
+	mtspr	SPRN_SPRG_SCRATCH0,rX;			\
+	FTR_SECTION_ELSE_NESTED(66);			\
+	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
+	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E_64
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 805e20657868..e513c1d35b2a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -43,7 +43,7 @@ __start_interrupts:
 _machine_check_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x200
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
+	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 	. = 0x300
@@ -51,7 +51,7 @@ _machine_check_pSeries:
 data_access_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x300
-	mtspr	SPRN_SPRG_SCRATCH0,r13
+	SET_SCRATCH0(r13)
 BEGIN_FTR_SECTION
 	GET_PACA(r13)
 	std	r9,PACA_EXSLB+EX_R9(r13)
@@ -67,7 +67,7 @@ BEGIN_FTR_SECTION
 	std	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r11,PACA_EXSLB+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r12,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r12)
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R9(r13)
 	std	r12,PACA_EXGEN+EX_R13(r13)
@@ -81,7 +81,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
 data_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x380
-	mtspr	SPRN_SPRG_SCRATCH0,r13
+	SET_SCRATCH0(r13)
 	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_DAR
@@ -95,7 +95,7 @@ data_access_slb_pSeries:
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
 #ifndef CONFIG_RELOCATABLE
@@ -120,7 +120,7 @@ data_access_slb_pSeries:
 instruction_access_slb_pSeries:
 	HMT_MEDIUM
 	DO_KVM	0x480
-	mtspr	SPRN_SPRG_SCRATCH0,r13
+	SET_SCRATCH0(r13)
 	GET_PACA(r13)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
@@ -134,7 +134,7 @@ instruction_access_slb_pSeries:
 	std	r10,PACA_EXSLB+EX_R10(r13)
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	mfspr	r12,SPRN_SRR1		/* and SRR1 */
 #ifndef CONFIG_RELOCATABLE
@@ -272,7 +272,7 @@ masked_interrupt:
 	rotldi	r10,r10,16
 	mtspr	SPRN_SRR1,r10
 	ld	r10,PACA_EXGEN+EX_R10(r13)
-	mfspr	r13,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r13)
 	rfid
 	b	.
 
@@ -285,7 +285,7 @@ masked_Hinterrupt:
 	rotldi	r10,r10,16
 	mtspr	SPRN_HSRR1,r10
 	ld	r10,PACA_EXGEN+EX_R10(r13)
-	mfspr	r13,SPRN_SPRG_HSCRATCH0
+	GET_SCRATCH0(r13)
 	hrfid
 	b	.
 
@@ -293,7 +293,7 @@ masked_Hinterrupt:
 do_stab_bolted_pSeries:
 	std	r11,PACA_EXSLB+EX_R11(r13)
 	std	r12,PACA_EXSLB+EX_R12(r13)
-	mfspr	r10,SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	std	r10,PACA_EXSLB+EX_R13(r13)
 	EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD)
 
@@ -305,14 +305,14 @@ do_stab_bolted_pSeries:
       .align 7
 system_reset_fwnmi:
 	HMT_MEDIUM
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
+	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	.globl machine_check_fwnmi
       .align 7
 machine_check_fwnmi:
 	HMT_MEDIUM
-	mtspr	SPRN_SPRG_SCRATCH0,r13		/* save r13 */
+	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD)
 
 #endif /* CONFIG_PPC_PSERIES */
@@ -327,7 +327,7 @@ slb_miss_user_pseries:
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	std	r11,PACA_EXGEN+EX_R11(r13)
 	std	r12,PACA_EXGEN+EX_R12(r13)
-	mfspr	r10,SPRG_SCRATCH0
+	GET_SCRATCH0(r10)
 	ld	r11,PACA_EXSLB+EX_R9(r13)
 	ld	r12,PACA_EXSLB+EX_R3(r13)
 	std	r10,PACA_EXGEN+EX_R13(r13)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 046e1f3d4432..ae99af66ca34 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -70,7 +70,7 @@
 .global kvmppc_trampoline_\intno
 kvmppc_trampoline_\intno:
 
-	mtspr	SPRN_SPRG_SCRATCH0, r13		/* Save r13 */
+	SET_SCRATCH0(r13)		/* Save r13 */
 
 	/*
 	 * First thing to do is to find out if we're coming
@@ -89,7 +89,7 @@ kvmppc_trampoline_\intno:
 	lwz	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 	mtcr	r12
 	PPC_LL	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
-	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
+	GET_SCRATCH0(r13)			/* r13 = original r13 */
 	b	kvmppc_resume_\intno		/* Get back original handler */
 
 	/* Now we know we're handling a KVM guest */
@@ -157,7 +157,7 @@ kvmppc_handler_skip_ins:
 	lwz	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 	mtcr	r12
 	PPC_LL	r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
-	mfspr	r13, SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r13)
 
 	/* And get back into the code */
 	RFI
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index d842795d0f23..451264274b8c 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -168,7 +168,7 @@ kvmppc_handler_trampoline_exit:
 	PPC_STL	r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
 
 	/* Get scratch'ed off registers */
-	mfspr	r9, SPRN_SPRG_SCRATCH0
+	GET_SCRATCH0(r9)
 	PPC_LL	r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
 	lwz	r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
 
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index f7a487231a11..32a56c6dfa72 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -155,7 +155,7 @@ BEGIN_FTR_SECTION
 	std	r12,PACA_EXGEN+EX_R13(r13)
 	EXCEPTION_PROLOG_ISERIES_1
 FTR_SECTION_ELSE
-	EXCEPTION_PROLOG_1(PACA_EXGEN, EXC_STD)
+	EXCEPTION_PROLOG_1(PACA_EXGEN)
 	EXCEPTION_PROLOG_ISERIES_1
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
 	b	data_access_common
diff --git a/arch/powerpc/platforms/iseries/exception.h b/arch/powerpc/platforms/iseries/exception.h
index 57127d805fe3..bae3fba5ad8e 100644
--- a/arch/powerpc/platforms/iseries/exception.h
+++ b/arch/powerpc/platforms/iseries/exception.h
@@ -39,7 +39,7 @@
 label##_iSeries:							\
 	HMT_MEDIUM;							\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	EXCEPTION_PROLOG_1(area, EXC_STD);				\
+	EXCEPTION_PROLOG_1(area);					\
 	EXCEPTION_PROLOG_ISERIES_1;					\
 	b	label##_common
 
@@ -48,7 +48,7 @@ label##_iSeries:							\
 label##_iSeries:							\
 	HMT_MEDIUM;							\
 	mtspr	SPRN_SPRG_SCRATCH0,r13;	/* save r13 */			\
-	EXCEPTION_PROLOG_1(PACA_EXGEN, EXC_STD);			\
+	EXCEPTION_PROLOG_1(PACA_EXGEN);					\
 	lbz	r10,PACASOFTIRQEN(r13);					\
 	cmpwi	0,r10,0;						\
 	beq-	label##_iSeries_masked;					\
-- 
cgit v1.2.3


From 895796a8ab548fe03b6fea410dcb1b86e1913708 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 24 Jan 2011 13:25:55 +1100
Subject: powerpc: Initialize LPCR:DPFD on power7 to a sane default

This sets the default data stream prefetch size for operating
systems that don't set their own value in DSCR. We use 4 which
is "medium".

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/cpu_setup_power7.S | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
index e801ef15d6d0..2390f6f7c478 100644
--- a/arch/powerpc/kernel/cpu_setup_power7.S
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -54,6 +54,7 @@ __init_LPCR:
 	 *
 	 *   LPES = 0b01 (HSRR0/1 used for 0x500)
 	 *   PECE = 0b111
+	 *   DPFD = 4
 	 *
 	 * Other bits untouched for now
 	 */
@@ -61,6 +62,12 @@ __init_LPCR:
 	ori	r3,r3,(LPCR_LPES0|LPCR_LPES1)
 	xori	r3,r3, LPCR_LPES0
 	ori	r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+	li	r5,7
+	sldi	r5,r5,LPCR_DPFD_SH
+	andc	r3,r3,r5
+	li	r5,4
+	sldi	r5,r5,LPCR_DPFD_SH
+	or	r3,r3,r5
 	mtspr	SPRN_LPCR,r3
 	isync
 	blr
-- 
cgit v1.2.3


From b144871cb5f2c268e94258ae8f1ec810db2e1120 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 1 Mar 2011 15:46:09 +1100
Subject: powerpc: Initialize TLB and LPID register on HV mode Power7

In case entry from the bootloader isn't "clean"

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/cpu_setup_power7.S | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S
index 2390f6f7c478..4f9a93fcfe07 100644
--- a/arch/powerpc/kernel/cpu_setup_power7.S
+++ b/arch/powerpc/kernel/cpu_setup_power7.S
@@ -25,7 +25,10 @@ _GLOBAL(__setup_cpu_power7)
 	bl	__init_hvmode_206
 	mtlr	r11
 	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
 	bl	__init_LPCR
+	bl	__init_TLB
 	mtlr	r11
 	blr
 
@@ -34,7 +37,10 @@ _GLOBAL(__restore_cpu_power7)
 	mfmsr	r3
 	rldicl.	r0,r3,4,63
 	beqlr
+	li	r0,0
+	mtspr	SPRN_LPID,r0
 	bl	__init_LPCR
+	bl	__init_TLB
 	mtlr	r11
 	blr
 
@@ -71,3 +77,15 @@ __init_LPCR:
 	mtspr	SPRN_LPCR,r3
 	isync
 	blr
+
+__init_TLB:
+	/* Clear the TLB */
+	li	r6,128
+	mtctr	r6
+	li	r7,0xc00	/* IS field = 0b11 */
+	ptesync
+2:	tlbiel	r7
+	addi	r7,r7,0x1000
+	bdnz	2b
+	ptesync
+1:	blr
-- 
cgit v1.2.3


From ad0693ee722b93b63a89c845e99513f242e43aa6 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 1 Feb 2011 12:13:09 +1100
Subject: powerpc: Call CPU ->restore callback earlier on secondary CPUs

We do it before we loop on the PACA start flag. This way, we get a
chance to set critical SPRs on all CPUs before Linux tries to start
them up, which avoids problems when changing some bits such as LPCR
bits that need to be identical on all threads of a core or similar
things like that. Ideally, some of that should also be done before
the MMU is enabled, but that's a separate issue which would require
moving some of the SMP startup code earlier, let's not get there
for now, it works with that change alone.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_64.S | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 39a40400f3f2..95944278380c 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -236,17 +236,6 @@ generic_secondary_common_init:
 
 	/* From now on, r24 is expected to be logical cpuid */
 	mr	r24,r5
-3:	HMT_LOW
-	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
-					/* start.			 */
-
-#ifndef CONFIG_SMP
-	b	3b			/* Never go on non-SMP		 */
-#else
-	cmpwi	0,r23,0
-	beq	3b			/* Loop until told to go	 */
-
-	sync				/* order paca.run and cur_cpu_spec */
 
 	/* See if we need to call a cpu state restore handler */
 	LOAD_REG_ADDR(r23, cur_cpu_spec)
@@ -258,6 +247,17 @@ generic_secondary_common_init:
 	mtctr	r23
 	bctrl
 
+3:	HMT_LOW
+	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
+					/* start.			 */
+#ifndef CONFIG_SMP
+	b	3b			/* Never go on non-SMP		 */
+#else
+	cmpwi	0,r23,0
+	beq	3b			/* Loop until told to go	 */
+
+	sync				/* order paca.run and cur_cpu_spec */
+
 4:	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
-- 
cgit v1.2.3


From 9d07bc841c9779b4d7902e417f4e509996ce805d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 16 Mar 2011 14:54:35 +1100
Subject: powerpc: Properly handshake CPUs going out of boot spin loop

We need to wait a bit for them to have done their CPU setup
or we might end up with translation and EE on with different
LPCR values between threads

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/smp.h |  1 +
 arch/powerpc/kernel/head_64.S  | 18 +++++++++++++-----
 arch/powerpc/kernel/prom.c     | 27 ++++++++++-----------------
 arch/powerpc/kernel/setup_32.c |  1 +
 arch/powerpc/kernel/setup_64.c | 13 ++++++++++++-
 5 files changed, 37 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index a902a0d3ae0d..bb4c033a8fb0 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -29,6 +29,7 @@
 #include <asm/percpu.h>
 
 extern int boot_cpuid;
+extern int boot_cpu_count;
 
 extern void cpu_die(void);
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 95944278380c..0700e1135c91 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -242,23 +242,31 @@ generic_secondary_common_init:
 	ld	r23,0(r23)
 	ld	r23,CPU_SPEC_RESTORE(r23)
 	cmpdi	0,r23,0
-	beq	4f
+	beq	3f
 	ld	r23,0(r23)
 	mtctr	r23
 	bctrl
 
-3:	HMT_LOW
+3:	LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */
+	lwarx	r4,0,r3
+	subi	r4,r4,1
+	stwcx.	r4,0,r3
+	bne	3b
+	isync
+
+4:	HMT_LOW
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
 					/* start.			 */
 #ifndef CONFIG_SMP
-	b	3b			/* Never go on non-SMP		 */
+	b	4b			/* Never go on non-SMP		 */
 #else
 	cmpwi	0,r23,0
-	beq	3b			/* Loop until told to go	 */
+	beq	4b			/* Loop until told to go	 */
 
 	sync				/* order paca.run and cur_cpu_spec */
+	isync				/* In case code patching happened */
 
-4:	/* Create a temp kernel stack for use before relocation is on.	*/
+	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index e74fa12afc82..c391dc4c8bad 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -268,13 +268,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	static int logical_cpuid = 0;
 	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	const u32 *prop;
 	const u32 *intserv;
 	int i, nthreads;
 	unsigned long len;
-	int found = 0;
+	int found = -1;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -299,11 +298,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		 * booted proc.
 		 */
 		if (initial_boot_params && initial_boot_params->version >= 2) {
-			if (intserv[i] ==
-					initial_boot_params->boot_cpuid_phys) {
-				found = 1;
-				break;
-			}
+			if (intserv[i] == initial_boot_params->boot_cpuid_phys)
+				found = boot_cpu_count;
 		} else {
 			/*
 			 * Check if it's the boot-cpu, set it's hw index now,
@@ -311,23 +307,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 			 * off secondary threads.
 			 */
 			if (of_get_flat_dt_prop(node,
-					"linux,boot-cpu", NULL) != NULL) {
-				found = 1;
-				break;
-			}
+					"linux,boot-cpu", NULL) != NULL)
+				found = boot_cpu_count;
 		}
-
 #ifdef CONFIG_SMP
 		/* logical cpu id is always 0 on UP kernels */
-		logical_cpuid++;
+		boot_cpu_count++;
 #endif
 	}
 
-	if (found) {
-		DBG("boot cpu: logical %d physical %d\n", logical_cpuid,
+	if (found >= 0) {
+		DBG("boot cpu: logical %d physical %d\n", found,
 			intserv[i]);
-		boot_cpuid = logical_cpuid;
-		set_hard_smp_processor_id(boot_cpuid, intserv[i]);
+		boot_cpuid = found;
+		set_hard_smp_processor_id(found, intserv[i]);
 
 		/*
 		 * PAPR defines "logical" PVR values for cpus that
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 1d2fbc905303..620d792b52e4 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys);
 
 int boot_cpuid = -1;
 EXPORT_SYMBOL_GPL(boot_cpuid);
+int __initdata boot_cpu_count;
 int boot_cpuid_phys;
 
 int smp_hw_index[NR_CPUS];
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5a0401fcaebd..91a5cc5f0d02 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -72,6 +72,7 @@
 #endif
 
 int boot_cpuid = 0;
+int __initdata boot_cpu_count;
 u64 ppc64_pft_size;
 
 /* Pick defaults since we might want to patch instructions
@@ -233,6 +234,7 @@ void early_setup_secondary(void)
 void smp_release_cpus(void)
 {
 	unsigned long *ptr;
+	int i;
 
 	DBG(" -> smp_release_cpus()\n");
 
@@ -245,7 +247,16 @@ void smp_release_cpus(void)
 	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
 			- PHYSICAL_START);
 	*ptr = __pa(generic_secondary_smp_init);
-	mb();
+
+	/* And wait a bit for them to catch up */
+	for (i = 0; i < 100000; i++) {
+		mb();
+		HMT_low();
+		if (boot_cpu_count == 0)
+			break;
+		udelay(1);
+	}
+	DBG("boot_cpu_count = %d\n", boot_cpu_count);
 
 	DBG(" <- smp_release_cpus()\n");
 }
-- 
cgit v1.2.3


From 948cf67c4726cca2fc57533dccadfb54d890689d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 24 Jan 2011 18:42:41 +1100
Subject: powerpc: Add NAP mode support on Power7 in HV mode

Wakeup comes from the system reset handler with a potential loss of
the non-hypervisor CPU state. We save the non-volatile state on the
stack and a pointer to it in the PACA, which the system reset handler
uses to restore things

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/machdep.h    |  1 +
 arch/powerpc/include/asm/paca.h       |  2 +-
 arch/powerpc/include/asm/ppc-opcode.h |  6 +++
 arch/powerpc/kernel/Makefile          |  1 +
 arch/powerpc/kernel/exceptions-64s.S  | 30 ++++++++++-
 arch/powerpc/kernel/idle_power7.S     | 97 +++++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig        |  4 ++
 7 files changed, 139 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kernel/idle_power7.S

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index e4f01915fbb0..493dbb38e1ba 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -267,6 +267,7 @@ struct machdep_calls {
 
 extern void e500_idle(void);
 extern void power4_idle(void);
+extern void power7_idle(void);
 extern void ppc6xx_idle(void);
 extern void book3e_idle(void);
 
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ec57540cd7af..f6da4f517fca 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -125,7 +125,7 @@ struct paca_struct {
 	struct task_struct *__current;	/* Pointer to current */
 	u64 kstack;			/* Saved Kernel stack addr */
 	u64 stab_rr;			/* stab/slb round-robin counter */
-	u64 saved_r1;			/* r1 save for RTAS calls */
+	u64 saved_r1;			/* r1 save for RTAS calls or PM */
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u16 trap_save;			/* Used when bad stack is encountered */
 	u8 soft_enabled;		/* irq soft-enable flag */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1255569387b6..384a96db794a 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -56,6 +56,9 @@
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_XXLOR			0xf0000510
 
+#define PPC_INST_NAP			0x4c000364
+#define PPC_INST_SLEEP			0x4c0003a4
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -126,4 +129,7 @@
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), (a), (b)))
 
+#define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
+#define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7c6eb4974f25..0fd6273bb8a9 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
+obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
 obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
 obj-$(CONFIG_PPC_CLOCK)		+= clock.o
 procfs-y			:= proc_powerpc.o
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e513c1d35b2a..ad06333631ac 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -37,7 +37,35 @@
 	.globl __start_interrupts
 __start_interrupts:
 
-	STD_EXCEPTION_PSERIES(0x100, 0x100, system_reset)
+	.globl system_reset_pSeries;
+system_reset_pSeries:
+	HMT_MEDIUM;
+	DO_KVM	0x100;
+	SET_SCRATCH0(r13)
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+	/* Running native on arch 2.06 or later, check if we are
+	 * waking up from nap. We only handle no state loss and
+	 * supervisor state loss. We do -not- handle hypervisor
+	 * state loss at this time.
+	 */
+	mfspr	r13,SPRN_SRR1
+	rlwinm	r13,r13,47-31,30,31
+	cmpwi	cr0,r13,1
+	bne	1f
+	b	.power7_wakeup_noloss
+1:	cmpwi	cr0,r13,2
+	bne	1f
+	b	.power7_wakeup_loss
+	/* Total loss of HV state is fatal, we could try to use the
+	 * PIR to locate a PACA, then use an emergency stack etc...
+	 * but for now, let's just stay stuck here
+	 */
+1:	cmpwi	cr0,r13,3
+	beq	.
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206)
+#endif /* CONFIG_PPC_P7_NAP */
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD)
 
 	. = 0x200
 _machine_check_pSeries:
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
new file mode 100644
index 000000000000..f8f0bc7f1d4f
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -0,0 +1,97 @@
+/*
+ *  This file contains the power_save function for 970-family CPUs.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+
+#undef DEBUG
+
+	.text
+
+_GLOBAL(power7_idle)
+	/* Now check if user or arch enabled NAP mode */
+	LOAD_REG_ADDRBASE(r3,powersave_nap)
+	lwz	r4,ADDROFF(powersave_nap)(r3)
+	cmpwi	0,r4,0
+	beqlr
+
+	/* NAP is a state loss, we create a regs frame on the
+	 * stack, fill it up with the state we care about and
+	 * stick a pointer to it in PACAR1. We really only
+	 * need to save PC, some CR bits and the NV GPRs,
+	 * but for now an interrupt frame will do.
+	 */
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-INT_FRAME_SIZE(r1)
+	std	r0,_LINK(r1)
+	std	r0,_NIP(r1)
+
+#ifndef CONFIG_SMP
+	/* Make sure FPU, VSX etc... are flushed as we may lose
+	 * state when going to nap mode
+	 */
+	bl	.discard_lazy_cpu_state
+#endif /* CONFIG_SMP */
+
+	/* Hard disable interrupts */
+	mfmsr	r9
+	rldicl	r9,r9,48,1
+	rotldi	r9,r9,16
+	mtmsrd	r9,1			/* hard-disable interrupts */
+	li	r0,0
+	stb	r0,PACASOFTIRQEN(r13)	/* we'll hard-enable shortly */
+	stb	r0,PACAHARDIRQEN(r13)
+
+	/* Continue saving state */
+	SAVE_GPR(2, r1)
+	SAVE_NVGPRS(r1)
+	mfcr	r3
+	std	r3,_CCR(r1)
+	std	r9,_MSR(r1)
+	std	r1,PACAR1(r13)
+
+	/* Magic NAP mode enter sequence */
+	std	r0,0(r1)
+	ptesync
+	ld	r0,0(r1)
+1:	cmp	cr0,r0,r0
+	bne	1b
+	PPC_NAP
+	b	.
+
+_GLOBAL(power7_wakeup_loss)
+	GET_PACA(r13)
+	ld	r1,PACAR1(r13)
+	REST_NVGPRS(r1)
+	REST_GPR(2, r1)
+	ld	r3,_CCR(r1)
+	ld	r4,_MSR(r1)
+	ld	r5,_NIP(r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	mtcr	r3
+	mtspr	SPRN_SRR1,r4
+	mtspr	SPRN_SRR0,r5
+	rfid
+
+_GLOBAL(power7_wakeup_noloss)
+	GET_PACA(r13)
+	ld	r1,PACAR1(r13)
+	ld	r4,_MSR(r1)
+	ld	r5,_NIP(r1)
+	addi	r1,r1,INT_FRAME_SIZE
+	mtspr	SPRN_SRR1,r4
+	mtspr	SPRN_SRR0,r5
+	rfid
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f7b07720aa30..658ffc50493d 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -147,6 +147,10 @@ config PPC_970_NAP
 	bool
 	default n
 
+config PPC_P7_NAP
+	bool
+	default n
+
 config PPC_INDIRECT_IO
 	bool
 	select GENERIC_IOMAP
-- 
cgit v1.2.3


From dd797738643cd3c2dd9cdff7e4c3a04d318ab23a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 5 Apr 2011 14:34:58 +1000
Subject: powerpc: Perform an isync to synchronize CPUs coming out of
 secondary_hold

We need to do that to guarantee they see any code change done by
dynamic patching during boot.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_64.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 0700e1135c91..6d17c37f22a1 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -147,6 +147,8 @@ __secondary_hold:
 	mtctr	r4
 	mr	r3,r24
 	li	r4,0
+	/* Make sure that patched code is visible */
+	isync
 	bctr
 #else
 	BUG_OPCODE
-- 
cgit v1.2.3


From af2771493a1bf79cd9a1ab4f30327c428b5bd67c Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 6 Apr 2011 10:51:17 +1000
Subject: powerpc: Improve prom_printf()

Adds the ability to print decimal numbers and adds some more
format string variants

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 941ff4dbc567..7839bd7bfd15 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...)
 	const char *p, *q, *s;
 	va_list args;
 	unsigned long v;
+	long vs;
 	struct prom_t *_prom = &RELOC(prom);
 
 	va_start(args, format);
@@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...)
 			v = va_arg(args, unsigned long);
 			prom_print_hex(v);
 			break;
+		case 'd':
+			++q;
+			vs = va_arg(args, int);
+			if (vs < 0) {
+				prom_print(RELOC("-"));
+				vs = -vs;
+			}
+			prom_print_dec(vs);
+			break;
 		case 'l':
 			++q;
-			if (*q == 'u') { /* '%lu' */
+			if (*q == 0)
+				break;
+			else if (*q == 'x') {
+				++q;
+				v = va_arg(args, unsigned long);
+				prom_print_hex(v);
+			} else if (*q == 'u') { /* '%lu' */
 				++q;
 				v = va_arg(args, unsigned long);
 				prom_print_dec(v);
+			} else if (*q == 'd') { /* %ld */
+				++q;
+				vs = va_arg(args, long);
+				if (vs < 0) {
+					prom_print(RELOC("-"));
+					vs = -vs;
+				}
+				prom_print_dec(vs);
 			}
 			break;
 		}
-- 
cgit v1.2.3


From 5e8e7b404ac965be45e25d5538676151de89aefb Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Tue, 12 Apr 2011 19:00:04 +0000
Subject: powerpc/mm: Standardise on MMU_NO_CONTEXT

Use MMU_NO_CONTEXT as the initialiser for mm_context.id on
nohash and hash64.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/tlbflush.h  | 2 ++
 arch/powerpc/mm/mmu_context_hash64.c | 3 +--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index d50a380b2b6f..81143fcbd113 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -79,6 +79,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 
 #elif defined(CONFIG_PPC_STD_MMU_64)
 
+#define MMU_NO_CONTEXT		0
+
 /*
  * TLB flushing for 64-bit hash-MMU CPUs
  */
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 2535828aa84b..c5859448a0ab 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -31,7 +31,6 @@ static DEFINE_IDA(mmu_context_ida);
  * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
  * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
  */
-#define NO_CONTEXT	0
 #define MAX_CONTEXT	((1UL << 19) - 1)
 
 int __init_new_context(void)
@@ -95,5 +94,5 @@ void destroy_context(struct mm_struct *mm)
 {
 	__destroy_context(mm->context.id);
 	subpage_prot_free(mm);
-	mm->context.id = NO_CONTEXT;
+	mm->context.id = MMU_NO_CONTEXT;
 }
-- 
cgit v1.2.3


From ee7a2aa3d3fd10a7157dd19f737b2bafdea0458f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Tue, 12 Apr 2011 19:00:05 +0000
Subject: powerpc/mm: Fix slice state initialization for Book3E

On Book3E, MMU_NO_CONTEXT != 0, but the slice_mm_new_context()
macro assumes that it is.  This means that the map of the
page sizes for each slice is always initialized to zeroes
(which happens to be 4k pages), rather than to the correct
default base page size value - which might be 64k.

This patch corrects the problem.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/page_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 812b2cd80aed..488c52eb64cb 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -130,7 +130,7 @@ extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
 extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 				  unsigned long len, unsigned int psize);
 
-#define slice_mm_new_context(mm)	((mm)->context.id == 0)
+#define slice_mm_new_context(mm)	((mm)->context.id == MMU_NO_CONTEXT)
 
 #endif /* __ASSEMBLY__ */
 #else
-- 
cgit v1.2.3


From 6975a783d7b40c79be4b7a7ea450e023ff7e5e02 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Tue, 12 Apr 2011 20:38:55 +0000
Subject: powerpc/boot: Allow building the zImage wrapper as a relocatable
 ET_DYN

This patch adds code, linker script and makefile support to allow
building the zImage wrapper around the kernel as a position independent
executable.  This results in an ET_DYN instead of an ET_EXEC ELF output
file, which can be loaded at any location by the firmware and will
process its own relocations to work correctly at the loaded address.

This is of interest particularly since the standard ePAPR image format
must be an ET_DYN (although this patch alone is not sufficient to
produce a fully ePAPR compliant boot image).

Note for now we don't enable building with -pie for anything.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/boot/crt0.S            | 116 +++++++++++++++++++++++-------------
 arch/powerpc/boot/wrapper           |   9 ++-
 arch/powerpc/boot/zImage.coff.lds.S |   6 +-
 arch/powerpc/boot/zImage.lds.S      |  57 +++++++++++-------
 4 files changed, 118 insertions(+), 70 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index f1c4dfc635be..0f7428a37efb 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -6,16 +6,28 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ * NOTE: this code runs in 32 bit mode, is position-independent,
+ * and is packaged as ELF32.
  */
 
 #include "ppc_asm.h"
 
 	.text
-	/* a procedure descriptor used when booting this as a COFF file */
+	/* A procedure descriptor used when booting this as a COFF file.
+	 * When making COFF, this comes first in the link and we're
+	 * linked at 0x500000.
+	 */
 	.globl	_zimage_start_opd
 _zimage_start_opd:
-	.long	_zimage_start, 0, 0, 0
+	.long	0x500000, 0, 0, 0
+
+p_start:	.long	_start
+p_etext:	.long	_etext
+p_bss_start:	.long	__bss_start
+p_end:		.long	_end
+
+	.weak	_platform_stack_top
+p_pstack:	.long	_platform_stack_top
 
 	.weak	_zimage_start
 	.globl	_zimage_start
@@ -24,37 +36,65 @@ _zimage_start:
 _zimage_start_lib:
 	/* Work out the offset between the address we were linked at
 	   and the address where we're running. */
-	bl	1f
-1:	mflr	r0
-	lis	r9,1b@ha
-	addi	r9,r9,1b@l
-	subf.	r0,r9,r0
-	beq	3f		/* if running at same address as linked */
+	bl	.+4
+p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
+	/* grab the link address of the dynamic section in r11 */
+	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
+	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
+	cmpwi	r11,0
+	beq	3f		/* if not linked -pie */
+	/* get the runtime address of the dynamic section in r12 */
+	.weak	__dynamic_start
+	addis	r12,r10,(__dynamic_start-p_base)@ha
+	addi	r12,r12,(__dynamic_start-p_base)@l
+	subf	r11,r11,r12	/* runtime - linktime offset */
+
+	/* The dynamic section contains a series of tagged entries.
+	 * We need the RELA and RELACOUNT entries. */
+RELA = 7
+RELACOUNT = 0x6ffffff9
+	li	r9,0
+	li	r0,0
+9:	lwz	r8,0(r12)	/* get tag */
+	cmpwi	r8,0
+	beq	10f		/* end of list */
+	cmpwi	r8,RELA
+	bne	11f
+	lwz	r9,4(r12)	/* get RELA pointer in r9 */
+	b	12f
+11:	addis	r8,r8,(-RELACOUNT)@ha
+	cmpwi	r8,RELACOUNT@l
+	bne	12f
+	lwz	r0,4(r12)	/* get RELACOUNT value in r0 */
+12:	addi	r12,r12,8
+	b	9b
 
-	/* The .got2 section contains a list of addresses, so add
-	   the address offset onto each entry. */
-	lis	r9,__got2_start@ha
-	addi	r9,r9,__got2_start@l
-	lis	r8,__got2_end@ha
-	addi	r8,r8,__got2_end@l
-	subf.	r8,r9,r8
+	/* The relocation section contains a list of relocations.
+	 * We now do the R_PPC_RELATIVE ones, which point to words
+	 * which need to be initialized with addend + offset.
+	 * The R_PPC_RELATIVE ones come first and there are RELACOUNT
+	 * of them. */
+10:	/* skip relocation if we don't have both */
+	cmpwi	r0,0
 	beq	3f
-	srwi.	r8,r8,2
-	mtctr	r8
-	add	r9,r0,r9
-2:	lwz	r8,0(r9)
-	add	r8,r8,r0
-	stw	r8,0(r9)
-	addi	r9,r9,4
+	cmpwi	r9,0
+	beq	3f
+
+	add	r9,r9,r11	/* Relocate RELA pointer */
+	mtctr	r0
+2:	lbz	r0,4+3(r9)	/* ELF32_R_INFO(reloc->r_info) */
+	cmpwi	r0,22		/* R_PPC_RELATIVE */
+	bne	3f
+	lwz	r12,0(r9)	/* reloc->r_offset */
+	lwz	r0,8(r9)	/* reloc->r_addend */
+	add	r0,r0,r11
+	stwx	r0,r11,r12
+	addi	r9,r9,12
 	bdnz	2b
 
 	/* Do a cache flush for our text, in case the loader didn't */
-3:	lis	r9,_start@ha
-	addi	r9,r9,_start@l
-	add	r9,r0,r9
-	lis	r8,_etext@ha
-	addi	r8,r8,_etext@l
-	add	r8,r0,r8
+3:	lwz	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	lwz	r8,p_etext-p_base(r10)
 4:	dcbf	r0,r9
 	icbi	r0,r9
 	addi	r9,r9,0x20
@@ -64,27 +104,19 @@ _zimage_start_lib:
 	isync
 
 	/* Clear the BSS */
-	lis	r9,__bss_start@ha
-	addi	r9,r9,__bss_start@l
-	add	r9,r0,r9
-	lis	r8,_end@ha
-	addi	r8,r8,_end@l
-	add	r8,r0,r8
-	li	r10,0
-5:	stw	r10,0(r9)
+	lwz	r9,p_bss_start-p_base(r10)
+	lwz	r8,p_end-p_base(r10)
+	li	r0,0
+5:	stw	r0,0(r9)
 	addi	r9,r9,4
 	cmplw	cr0,r9,r8
 	blt	5b
 
 	/* Possibly set up a custom stack */
-.weak	_platform_stack_top
-	lis	r8,_platform_stack_top@ha
-	addi	r8,r8,_platform_stack_top@l
+	lwz	r8,p_pstack-p_base(r10)
 	cmpwi	r8,0
 	beq	6f
-	add	r8,r0,r8
 	lwz	r1,0(r8)
-	add	r1,r0,r1
 	li	r0,0
 	stwu	r0,-16(r1)	/* establish a stack frame */
 6:
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index cb97e7511d7e..fef527867811 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -39,6 +39,7 @@ dts=
 cacheit=
 binary=
 gzip=.gz
+pie=
 
 # cross-compilation prefix
 CROSS=
@@ -157,9 +158,10 @@ pmac|chrp)
     platformo=$object/of.o
     ;;
 coff)
-    platformo=$object/of.o
+    platformo="$object/crt0.o $object/of.o"
     lds=$object/zImage.coff.lds
     link_address='0x500000'
+    pie=
     ;;
 miboot|uboot)
     # miboot and U-boot want just the bare bits, not an ELF binary
@@ -208,6 +210,7 @@ ps3)
     ksection=.kernel:vmlinux.bin
     isection=.kernel:initrd
     link_address=''
+    pie=
     ;;
 ep88xc|ep405|ep8248e)
     platformo="$object/fixed-head.o $object/$platform.o"
@@ -310,9 +313,9 @@ fi
 
 if [ "$platform" != "miboot" ]; then
     if [ -n "$link_address" ] ; then
-        text_start="-Ttext $link_address --defsym _start=$link_address"
+        text_start="-Ttext $link_address"
     fi
-    ${CROSS}ld -m elf32ppc -T $lds $text_start -o "$ofile" \
+    ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
index 856dc78b14ef..de4c9e3c9344 100644
--- a/arch/powerpc/boot/zImage.coff.lds.S
+++ b/arch/powerpc/boot/zImage.coff.lds.S
@@ -3,13 +3,13 @@ ENTRY(_zimage_start_opd)
 EXTERN(_zimage_start_opd)
 SECTIONS
 {
-  _start = .;
   .text      :
   {
+    _start = .;
     *(.text)
     *(.fixup)
+    _etext = .;
   }
-  _etext = .;
   . = ALIGN(4096);
   .data    :
   {
@@ -17,9 +17,7 @@ SECTIONS
     *(.data*)
     *(__builtin_*)
     *(.sdata*)
-    __got2_start = .;
     *(.got2)
-    __got2_end = .;
 
     _dtb_start = .;
     *(.kernel:dtb)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 0962d62bdb50..2bd8731f1365 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -3,49 +3,64 @@ ENTRY(_zimage_start)
 EXTERN(_zimage_start)
 SECTIONS
 {
-  _start = .;
   .text      :
   {
+    _start = .;
     *(.text)
     *(.fixup)
+    _etext = .;
   }
-  _etext = .;
   . = ALIGN(4096);
   .data    :
   {
     *(.rodata*)
     *(.data*)
     *(.sdata*)
-    __got2_start = .;
     *(.got2)
-    __got2_end = .;
   }
+  .dynsym : { *(.dynsym) }
+  .dynstr : { *(.dynstr) }
+  .dynamic :
+  {
+    __dynamic_start = .;
+    *(.dynamic)
+  }
+  .hash : { *(.hash) }
+  .interp : { *(.interp) }
+  .rela.dyn : { *(.rela*) }
 
   . = ALIGN(8);
-  _dtb_start = .;
-  .kernel:dtb : { *(.kernel:dtb) }
-  _dtb_end = .;
-
-  . = ALIGN(4096);
-  _vmlinux_start =  .;
-  .kernel:vmlinux.strip : { *(.kernel:vmlinux.strip) }
-  _vmlinux_end =  .;
+  .kernel:dtb :
+  {
+    _dtb_start = .;
+    *(.kernel:dtb)
+    _dtb_end = .;
+  }
 
   . = ALIGN(4096);
-  _initrd_start =  .;
-  .kernel:initrd : { *(.kernel:initrd) }
-  _initrd_end =  .;
+  .kernel:vmlinux.strip :
+  {
+    _vmlinux_start =  .;
+    *(.kernel:vmlinux.strip)
+    _vmlinux_end =  .;
+  }
 
   . = ALIGN(4096);
-  _edata  =  .;
+  .kernel:initrd :
+  {
+    _initrd_start =  .;
+    *(.kernel:initrd)
+    _initrd_end =  .;
+  }
 
   . = ALIGN(4096);
-  __bss_start = .;
   .bss       :
   {
-   *(.sbss)
-   *(.bss)
+    _edata  =  .;
+    __bss_start = .;
+    *(.sbss)
+    *(.bss)
+    *(COMMON)
+    _end = . ;
   }
-  . = ALIGN(4096);
-  _end = . ;
 }
-- 
cgit v1.2.3


From 6c5b59b913874cae535a324a671b7ed4f17e6397 Mon Sep 17 00:00:00 2001
From: David Gibson <dwg@au1.ibm.com>
Date: Thu, 14 Apr 2011 18:29:16 +0000
Subject: powerpc/boot: Add an ePAPR compliant boot wrapper

This is a first cut at making bootwrapper code which will
produce a zImage compliant with the requirements set down
by ePAPR.

This is a very simple bootwrapper, taking the device tree
blob supplied by the ePAPR boot program and passing it on
to the kernel. It builds on the earlier patch to build a
relocatable ET_DYN zImage to meet the other ePAPR image
requirements.

For good measure we have some paranoid checks which will
generate warnings if some of the ePAPR entry condition
guarantees are not met.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig       |  6 +++++
 arch/powerpc/boot/Makefile |  4 ++-
 arch/powerpc/boot/epapr.c  | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/boot/wrapper  |  4 +++
 4 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/boot/epapr.c

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8f4d50b0adfa..a3128ca0fe11 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -193,6 +193,12 @@ config SYS_SUPPORTS_APM_EMULATION
 	default y if PMAC_APM_EMU
 	bool
 
+config EPAPR_BOOT
+	bool
+	help
+	  Used to allow a board to specify it wants an ePAPR compliant wrapper.
+	default n
+
 config DEFAULT_UIMAGE
 	bool
 	help
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 89178164af5e..0e2a152c3aa5 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -69,7 +69,8 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \
 		cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
 		fsl-soc.c mpc8xx.c pq2.c ugecon.c
 src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
-		cuboot-ebony.c cuboot-hotfoot.c treeboot-ebony.c prpmc2800.c \
+		cuboot-ebony.c cuboot-hotfoot.c epapr.c treeboot-ebony.c \
+		prpmc2800.c \
 		ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
 		cuboot-pq2.c cuboot-sequoia.c treeboot-walnut.c \
 		cuboot-bamboo.c cuboot-mpc7448hpc2.c cuboot-taishan.c \
@@ -182,6 +183,7 @@ image-$(CONFIG_PPC_HOLLY)		+= dtbImage.holly
 image-$(CONFIG_PPC_PRPMC2800)		+= dtbImage.prpmc2800
 image-$(CONFIG_PPC_ISERIES)		+= zImage.iseries
 image-$(CONFIG_DEFAULT_UIMAGE)		+= uImage
+image-$(CONFIG_EPAPR_BOOT)		+= zImage.epapr
 
 #
 # Targets which embed a device tree blob
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
new file mode 100644
index 000000000000..06c1961bd124
--- /dev/null
+++ b/arch/powerpc/boot/epapr.c
@@ -0,0 +1,66 @@
+/*
+ * Bootwrapper for ePAPR compliant firmwares
+ *
+ * Copyright 2010 David Gibson <david@gibson.dropbear.id.au>, IBM Corporation.
+ *
+ * Based on earlier bootwrappers by:
+ * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\
+ *   and
+ * Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+
+BSS_STACK(4096);
+
+#define EPAPR_SMAGIC	0x65504150
+#define EPAPR_EMAGIC	0x45504150
+
+static unsigned epapr_magic;
+static unsigned long ima_size;
+static unsigned long fdt_addr;
+
+static void platform_fixups(void)
+{
+	if ((epapr_magic != EPAPR_EMAGIC)
+	    && (epapr_magic != EPAPR_SMAGIC))
+		fatal("r6 contained 0x%08x instead of ePAPR magic number\n",
+		      epapr_magic);
+
+	if (ima_size < (unsigned long)_end)
+		printf("WARNING: Image loaded outside IMA!"
+		       " (_end=%p, ima_size=0x%lx)\n", _end, ima_size);
+	if (ima_size < fdt_addr)
+		printf("WARNING: Device tree address is outside IMA!"
+		       "(fdt_addr=0x%lx, ima_size=0x%lx)\n", fdt_addr,
+		       ima_size);
+	if (ima_size < fdt_addr + fdt_totalsize((void *)fdt_addr))
+		printf("WARNING: Device tree extends outside IMA!"
+		       " (fdt_addr=0x%lx, size=0x%x, ima_size=0x%lx\n",
+		       fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+		   unsigned long r6, unsigned long r7)
+{
+	epapr_magic = r6;
+	ima_size = r7;
+	fdt_addr = r3;
+
+	/* FIXME: we should process reserve entries */
+
+	simple_alloc_init(_end, ima_size - (unsigned long)_end, 32, 64);
+
+	fdt_init((void *)fdt_addr);
+
+	serial_console_init();
+	platform_ops.fixups = platform_fixups;
+}
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index fef527867811..dfa29cb0f475 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -247,6 +247,10 @@ gamecube|wii)
 treeboot-iss4xx-mpic)
     platformo="$object/treeboot-iss4xx.o"
     ;;
+epapr)
+    link_address='0x20000000'
+    pie=-pie
+    ;;
 esac
 
 vmz="$tmpdir/`basename \"$kernel\"`.$ext"
-- 
cgit v1.2.3


From de300974761d92f71cb583730ac9e1d4eb1b7156 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Mon, 11 Apr 2011 21:46:19 +0000
Subject: powerpc/smp: smp_ops->kick_cpu() should be able to fail

When we start a cpu we use smp_ops->kick_cpu(), which currently
returns void, it should be able to fail. Convert it to return
int, and update all uses.

Convert all the current error cases to return -ENOENT, which is
what would eventually be returned by __cpu_up() currently when
it doesn't detect the cpu as coming up in time.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/machdep.h        |  2 +-
 arch/powerpc/include/asm/smp.h            |  2 +-
 arch/powerpc/kernel/smp.c                 | 10 ++++++++--
 arch/powerpc/platforms/44x/iss4xx.c       |  6 ++++--
 arch/powerpc/platforms/85xx/smp.c         |  6 ++++--
 arch/powerpc/platforms/86xx/mpc86xx_smp.c |  6 ++++--
 arch/powerpc/platforms/cell/beat_smp.c    |  5 ++---
 arch/powerpc/platforms/cell/smp.c         |  6 ++++--
 arch/powerpc/platforms/chrp/smp.c         |  4 +++-
 arch/powerpc/platforms/iseries/smp.c      |  6 ++++--
 arch/powerpc/platforms/powermac/smp.c     | 10 +++++++---
 arch/powerpc/platforms/pseries/smp.c      |  6 ++++--
 12 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 493dbb38e1ba..c6345acf166f 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -33,7 +33,7 @@ struct kimage;
 struct smp_ops_t {
 	void  (*message_pass)(int target, int msg);
 	int   (*probe)(void);
-	void  (*kick_cpu)(int nr);
+	int   (*kick_cpu)(int nr);
 	void  (*setup_cpu)(int nr);
 	void  (*bringup_done)(void);
 	void  (*take_timebase)(void);
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index bb4c033a8fb0..50873493a97c 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -150,7 +150,7 @@ extern int smt_enabled_at_boot;
 
 extern int smp_mpic_probe(void);
 extern void smp_mpic_setup_cpu(int cpu);
-extern void smp_generic_kick_cpu(int nr);
+extern int smp_generic_kick_cpu(int nr);
 
 extern void smp_generic_give_timebase(void);
 extern void smp_generic_take_timebase(void);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b0..b6083f4f39b1 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -95,7 +95,7 @@ int smt_enabled_at_boot = 1;
 static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
 
 #ifdef CONFIG_PPC64
-void __devinit smp_generic_kick_cpu(int nr)
+int __devinit smp_generic_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
@@ -106,6 +106,8 @@ void __devinit smp_generic_kick_cpu(int nr)
 	 */
 	paca[nr].cpu_start = 1;
 	smp_mb();
+
+	return 0;
 }
 #endif
 
@@ -434,7 +436,11 @@ int __cpuinit __cpu_up(unsigned int cpu)
 
 	/* wake up cpus */
 	DBG("smp: kicking cpu %d\n", cpu);
-	smp_ops->kick_cpu(cpu);
+	rc = smp_ops->kick_cpu(cpu);
+	if (rc) {
+		pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
+		return rc;
+	}
 
 	/*
 	 * wait to see if the cpu made a callin (is actually up).
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index aa46e9d1e771..19395f18b1db 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -87,7 +87,7 @@ static void __cpuinit smp_iss4xx_setup_cpu(int cpu)
 	mpic_setup_this_cpu();
 }
 
-static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
+static int __cpuinit smp_iss4xx_kick_cpu(int cpu)
 {
 	struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
 	const u64 *spin_table_addr_prop;
@@ -104,7 +104,7 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
 					       NULL);
 	if (spin_table_addr_prop == NULL) {
 		pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
-		return;
+		return -ENOENT;
 	}
 
 	/* Assume it's mapped as part of the linear mapping. This is a bit
@@ -117,6 +117,8 @@ static void __cpuinit smp_iss4xx_kick_cpu(int cpu)
 	smp_wmb();
 	spin_table[1] = __pa(start_secondary_47x);
 	mb();
+
+	return 0;
 }
 
 static struct smp_ops_t iss_smp_ops = {
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 0d00ff9d05a0..fe3f6a3a5307 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -41,7 +41,7 @@ extern void __early_start(void);
 #define NUM_BOOT_ENTRY		8
 #define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
 
-static void __init
+static int __init
 smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
@@ -60,7 +60,7 @@ smp_85xx_kick_cpu(int nr)
 
 	if (cpu_rel_addr == NULL) {
 		printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
-		return;
+		return -ENOENT;
 	}
 
 	/*
@@ -107,6 +107,8 @@ smp_85xx_kick_cpu(int nr)
 		iounmap(bptr_vaddr);
 
 	pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
+
+	return 0;
 }
 
 static void __init
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index eacea0e3fcc8..af09baee22cb 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -56,7 +56,7 @@ smp_86xx_release_core(int nr)
 }
 
 
-static void __init
+static int __init
 smp_86xx_kick_cpu(int nr)
 {
 	unsigned int save_vector;
@@ -65,7 +65,7 @@ smp_86xx_kick_cpu(int nr)
 	unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
 
 	if (nr < 0 || nr >= NR_CPUS)
-		return;
+		return -ENOENT;
 
 	pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
 
@@ -92,6 +92,8 @@ smp_86xx_kick_cpu(int nr)
 	local_irq_restore(flags);
 
 	pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+	return 0;
 }
 
 
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
index 26efc204c47f..996df33165f1 100644
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ b/arch/powerpc/platforms/cell/beat_smp.c
@@ -93,12 +93,11 @@ static void __devinit smp_beatic_setup_cpu(int cpu)
 	beatic_setup_cpu(cpu);
 }
 
-static void __devinit smp_celleb_kick_cpu(int nr)
+static int __devinit smp_celleb_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
-	if (!smp_startup_cpu(nr))
-		return;
+	return smp_startup_cpu(nr);
 }
 
 static int smp_celleb_cpu_bootable(unsigned int nr)
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f774530075b7..03d638e2f44f 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -137,12 +137,12 @@ static void __devinit smp_cell_setup_cpu(int cpu)
 	mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
 }
 
-static void __devinit smp_cell_kick_cpu(int nr)
+static int __devinit smp_cell_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -150,6 +150,8 @@ static void __devinit smp_cell_kick_cpu(int nr)
 	 * the processor will continue on to secondary_start
 	 */
 	paca[nr].cpu_start = 1;
+
+	return 0;
 }
 
 static int smp_cell_cpu_bootable(unsigned int nr)
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index 02cafecc90e3..a800122e4dda 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -30,10 +30,12 @@
 #include <asm/mpic.h>
 #include <asm/rtas.h>
 
-static void __devinit smp_chrp_kick_cpu(int nr)
+static int __devinit smp_chrp_kick_cpu(int nr)
 {
 	*(unsigned long *)KERNELBASE = nr;
 	asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+	return 0;
 }
 
 static void __devinit smp_chrp_setup_cpu(int cpu_nr)
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 6c6029914dbc..02a677a1f912 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -86,13 +86,13 @@ static int smp_iSeries_probe(void)
 	return cpumask_weight(cpu_possible_mask);
 }
 
-static void smp_iSeries_kick_cpu(int nr)
+static int smp_iSeries_kick_cpu(int nr)
 {
 	BUG_ON((nr < 0) || (nr >= NR_CPUS));
 
 	/* Verify that our partition has a processor nr */
 	if (lppaca_of(nr).dyn_proc_status >= 2)
-		return;
+		return -ENOENT;
 
 	/* The processor is currently spinning, waiting
 	 * for the cpu_start field to become non-zero
@@ -100,6 +100,8 @@ static void smp_iSeries_kick_cpu(int nr)
 	 * continue on to secondary_start in iSeries_head.S
 	 */
 	paca[nr].cpu_start = 1;
+
+	return 0;
 }
 
 static void __devinit smp_iSeries_setup_cpu(int nr)
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bc5f0dc6ae1e..621d4b7755f2 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -329,7 +329,7 @@ static int __init smp_psurge_probe(void)
 	return ncpus;
 }
 
-static void __init smp_psurge_kick_cpu(int nr)
+static int __init smp_psurge_kick_cpu(int nr)
 {
 	unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
 	unsigned long a, flags;
@@ -394,6 +394,8 @@ static void __init smp_psurge_kick_cpu(int nr)
 		psurge_set_ipi(1);
 
 	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+	return 0;
 }
 
 static struct irqaction psurge_irqaction = {
@@ -791,14 +793,14 @@ static int __init smp_core99_probe(void)
 	return ncpus;
 }
 
-static void __devinit smp_core99_kick_cpu(int nr)
+static int __devinit smp_core99_kick_cpu(int nr)
 {
 	unsigned int save_vector;
 	unsigned long target, flags;
 	unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
 
 	if (nr < 0 || nr > 3)
-		return;
+		return -ENOENT;
 
 	if (ppc_md.progress)
 		ppc_md.progress("smp_core99_kick_cpu", 0x346);
@@ -830,6 +832,8 @@ static void __devinit smp_core99_kick_cpu(int nr)
 
 	local_irq_restore(flags);
 	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+	return 0;
 }
 
 static void __devinit smp_core99_setup_cpu(int cpu_nr)
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index fc72bfce7320..95f578158ff0 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -152,12 +152,12 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 #endif
 }
 
-static void __devinit smp_pSeries_kick_cpu(int nr)
+static int __devinit smp_pSeries_kick_cpu(int nr)
 {
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	if (!smp_startup_cpu(nr))
-		return;
+		return -ENOENT;
 
 	/*
 	 * The processor is currently spinning, waiting for the
@@ -179,6 +179,8 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
 						"Ret= %ld\n", nr, rc);
 	}
 #endif
+
+	return 0;
 }
 
 static int smp_pSeries_cpu_bootable(unsigned int nr)
-- 
cgit v1.2.3


From f5be2dc0bd8d27a39d84a89e4ff90ba38cd2b285 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Mon, 4 Apr 2011 20:57:27 +0000
Subject: powerpc/nohash: Allocate stale_map[cpu] on CPU_UP_PREPARE not
 CPU_ONLINE

Currently we allocate the stale_map for a cpu when it comes online,
this leaves open a small window where a process can be scheduled
on the cpu before the stale_map is allocated. Instead allocate
the stale_map at CPU_UP_PREPARE time, that way it will be always
available before tasks start running.

It is possible the cpu fails to come up, in which case we should free
the stale_map, so add a CPU_UP_CANCELED case to do that.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/mmu_context_nohash.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index c0aab52da3a5..4d8fa911c73d 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -338,12 +338,14 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
 		return NOTIFY_OK;
 
 	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
 		pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
 		stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
-- 
cgit v1.2.3


From b68a70c49686db0bff4637995d91b4db8abe5281 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 4 Apr 2011 23:56:18 +0000
Subject: powerpc: Replace open coded instruction patching with
 patch_instruction/patch_branch

There are a few places we patch instructions without using
patch_instruction and patch_branch, probably because they
predated it. Fix it.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_utils_64.c | 44 +++++++++++++++++++++++++----------------
 arch/powerpc/mm/slb.c           |  6 +++---
 2 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 58a022d0f463..d95d8f484d2f 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -53,6 +53,7 @@
 #include <asm/sections.h>
 #include <asm/spu.h>
 #include <asm/udbg.h>
+#include <asm/code-patching.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -547,15 +548,7 @@ int remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static inline void make_bl(unsigned int *insn_addr, void *func)
-{
-	unsigned long funcp = *((unsigned long *)func);
-	int offset = funcp - (unsigned long)insn_addr;
-
-	*insn_addr = (unsigned int)(0x48000001 | (offset & 0x03fffffc));
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
-}
+#define FUNCTION_TEXT(A)	((*(unsigned long *)(A)))
 
 static void __init htab_finish_init(void)
 {
@@ -570,16 +563,33 @@ static void __init htab_finish_init(void)
 	extern unsigned int *ht64_call_hpte_remove;
 	extern unsigned int *ht64_call_hpte_updatepp;
 
-	make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+	patch_branch(ht64_call_hpte_insert1,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_insert2,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_remove,
+		FUNCTION_TEXT(ppc_md.hpte_remove),
+		BRANCH_SET_LINK);
+	patch_branch(ht64_call_hpte_updatepp,
+		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		BRANCH_SET_LINK);
+
 #endif /* CONFIG_PPC_HAS_HASH_64K */
 
-	make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
-	make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
-	make_bl(htab_call_hpte_updatepp, ppc_md.hpte_updatepp);
+	patch_branch(htab_call_hpte_insert1,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_insert2,
+		FUNCTION_TEXT(ppc_md.hpte_insert),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_remove,
+		FUNCTION_TEXT(ppc_md.hpte_remove),
+		BRANCH_SET_LINK);
+	patch_branch(htab_call_hpte_updatepp,
+		FUNCTION_TEXT(ppc_md.hpte_updatepp),
+		BRANCH_SET_LINK);
 }
 
 static void __init htab_initialize(void)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 1d98ecc8eecd..5500712781d4 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -24,6 +24,7 @@
 #include <asm/firmware.h>
 #include <linux/compiler.h>
 #include <asm/udbg.h>
+#include <asm/code-patching.h>
 
 
 extern void slb_allocate_realmode(unsigned long ea);
@@ -249,9 +250,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 static inline void patch_slb_encoding(unsigned int *insn_addr,
 				      unsigned int immed)
 {
-	*insn_addr = (*insn_addr & 0xffff0000) | immed;
-	flush_icache_range((unsigned long)insn_addr, 4+
-			   (unsigned long)insn_addr);
+	int insn = (*insn_addr & 0xffff0000) | immed;
+	patch_instruction(insn_addr, insn);
 }
 
 void slb_set_size(u16 size)
-- 
cgit v1.2.3


From 931e1241a266e701157d3478d0d44fc58d6e84b4 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 14 Apr 2011 22:31:56 +0000
Subject: powerpc/a2: Add some #defines for A2 specific instructions

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc-opcode.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 384a96db794a..3e25b258568e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -59,6 +59,14 @@
 #define PPC_INST_NAP			0x4c000364
 #define PPC_INST_SLEEP			0x4c0003a4
 
+/* A2 specific instructions */
+#define PPC_INST_ERATWE			0x7c0001a6
+#define PPC_INST_ERATRE			0x7c000166
+#define PPC_INST_ERATILX		0x7c000066
+#define PPC_INST_ERATIVAX		0x7c000666
+#define PPC_INST_ERATSX			0x7c000126
+#define PPC_INST_ERATSX_DOT		0x7c000127
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -70,6 +78,8 @@
 #define __PPC_XT(s)	__PPC_XS(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
+#define __PPC_WS(w)	(((w) & 0x1f) << 11)
+
 /*
  * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
  * larx with EH set as an illegal instruction.
@@ -116,6 +126,21 @@
 #define PPC_TLBIVAX(a,b)	stringify_in_c(.long PPC_INST_TLBIVAX | \
 					__PPC_RA(a) | __PPC_RB(b))
 
+#define PPC_ERATWE(s, a, w)	stringify_in_c(.long PPC_INST_ERATWE | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATRE(s, a, w)	stringify_in_c(.long PPC_INST_ERATRE | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_ERATILX(t, a, b)	stringify_in_c(.long PPC_INST_ERATILX | \
+					__PPC_T_TLB(t) | __PPC_RA(a) | \
+					__PPC_RB(b))
+#define PPC_ERATIVAX(s, a, b)	stringify_in_c(.long PPC_INST_ERATIVAX | \
+					__PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX | \
+					__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_ERATSX_DOT(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX_DOT | \
+					__PPC_RS(t) | __PPC_RA(a) | __PPC_RB(b))
+
+
 /*
  * Define what the VSX XX1 form instructions will look like, then add
  * the 128 bit load store instructions based on that.
-- 
cgit v1.2.3


From cd852579055bd8ad848415aaabb78b65d522fce0 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 14 Apr 2011 22:31:58 +0000
Subject: powerpc/xics: xics.h relies on linux/interrupt.h

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xics.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 146aad8534de..c4ed4c5b6464 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -6,6 +6,8 @@
 #ifndef _XICS_H
 #define _XICS_H
 
+#include <linux/interrupt.h>
+
 #define XICS_IPI		2
 #define XICS_IRQ_SPURIOUS	0
 
-- 
cgit v1.2.3


From ab814b938d1d372bd2ac6268c15d4e0e6a5245c4 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 14 Apr 2011 22:31:58 +0000
Subject: powerpc: Add SCOM infrastructure

SCOM is a side-band configuration bus implemented on some processors.
This code provides a way for code to map and operate on devices via
SCOM, while the details of how that is implemented is left up to a
SCOM "controller" in the platform code.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/scom.h | 156 ++++++++++++++++++++++++++++++++
 arch/powerpc/sysdev/Kconfig     |   7 ++
 arch/powerpc/sysdev/Makefile    |   2 +
 arch/powerpc/sysdev/scom.c      | 192 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 357 insertions(+)
 create mode 100644 arch/powerpc/include/asm/scom.h
 create mode 100644 arch/powerpc/sysdev/scom.c

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
new file mode 100644
index 000000000000..0cabfd7bc2d1
--- /dev/null
+++ b/arch/powerpc/include/asm/scom.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _ASM_POWERPC_SCOM_H
+#define _ASM_POWERPC_SCOM_H
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_SCOM
+
+/*
+ * The SCOM bus is a sideband bus used for accessing various internal
+ * registers of the processor or the chipset. The implementation details
+ * differ between processors and platforms, and the access method as
+ * well.
+ *
+ * This API allows to "map" ranges of SCOM register numbers associated
+ * with a given SCOM controller. The later must be represented by a
+ * device node, though some implementations might support NULL if there
+ * is no possible ambiguity
+ *
+ * Then, scom_read/scom_write can be used to accesses registers inside
+ * that range. The argument passed is a register number relative to
+ * the beginning of the range mapped.
+ */
+
+typedef void *scom_map_t;
+
+/* Value for an invalid SCOM map */
+#define SCOM_MAP_INVALID	(NULL)
+
+/* The scom_controller data structure is what the platform passes
+ * to the core code in scom_init, it provides the actual implementation
+ * of all the SCOM functions
+ */
+struct scom_controller {
+	scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
+	void (*unmap)(scom_map_t map);
+
+	u64 (*read)(scom_map_t map, u32 reg);
+	void (*write)(scom_map_t map, u32 reg, u64 value);
+};
+
+extern const struct scom_controller *scom_controller;
+
+/**
+ * scom_init - Initialize the SCOM backend, called by the platform
+ * @controller: The platform SCOM controller
+ */
+static inline void scom_init(const struct scom_controller *controller)
+{
+	scom_controller = controller;
+}
+
+/**
+ * scom_map_ok - Test is a SCOM mapping is successful
+ * @map: The result of scom_map to test
+ */
+static inline int scom_map_ok(scom_map_t map)
+{
+	return map != SCOM_MAP_INVALID;
+}
+
+/**
+ * scom_map - Map a block of SCOM registers
+ * @ctrl_dev: Device node of the SCOM controller
+ *            some implementations allow NULL here
+ * @reg: first SCOM register to map
+ * @count: Number of SCOM registers to map
+ */
+
+static inline scom_map_t scom_map(struct device_node *ctrl_dev,
+				  u64 reg, u64 count)
+{
+	return scom_controller->map(ctrl_dev, reg, count);
+}
+
+/**
+ * scom_find_parent - Find the SCOM controller for a device
+ * @dev: OF node of the device
+ *
+ * This is not meant for general usage, but in combination with
+ * scom_map() allows to map registers not represented by the
+ * device own scom-reg property. Useful for applying HW workarounds
+ * on things not properly represented in the device-tree for example.
+ */
+struct device_node *scom_find_parent(struct device_node *dev);
+
+
+/**
+ * scom_map_device - Map a device's block of SCOM registers
+ * @dev: OF node of the device
+ * @index: Register bank index (index in "scom-reg" property)
+ *
+ * This function will use the device-tree binding for SCOM which
+ * is to follow "scom-parent" properties until it finds a node with
+ * a "scom-controller" property to find the controller. It will then
+ * use the "scom-reg" property which is made of reg/count pairs,
+ * each of them having a size defined by the controller's #scom-cells
+ * property
+ */
+extern scom_map_t scom_map_device(struct device_node *dev, int index);
+
+
+/**
+ * scom_unmap - Unmap a block of SCOM registers
+ * @map: Result of scom_map is to be unmapped
+ */
+static inline void scom_unmap(scom_map_t map)
+{
+	if (scom_map_ok(map))
+		scom_controller->unmap(map);
+}
+
+/**
+ * scom_read - Read a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ */
+static inline u64 scom_read(scom_map_t map, u32 reg)
+{
+	return scom_controller->read(map, reg);
+}
+
+/**
+ * scom_write - Write to a SCOM register
+ * @map: Result of scom_map
+ * @reg: Register index within that map
+ * @value: Value to write
+ */
+static inline void scom_write(scom_map_t map, u32 reg, u64 value)
+{
+	scom_controller->write(map, reg, value);
+}
+
+#endif /* CONFIG_PPC_SCOM */
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index cfc18770af79..d775fd148d13 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -15,3 +15,10 @@ config PPC_MSI_BITMAP
 
 source "arch/powerpc/sysdev/xics/Kconfig"
 
+config PPC_SCOM
+	bool
+
+config SCOM_DEBUGFS
+	bool "Expose SCOM controllers via debugfs"
+	depends on PPC_SCOM
+	default n
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 9516e7598573..6076e0074a87 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -58,6 +58,8 @@ ifeq ($(CONFIG_SUSPEND),y)
 obj-$(CONFIG_6xx)		+= 6xx-suspend.o
 endif
 
+obj-$(CONFIG_PPC_SCOM)		+= scom.o
+
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 obj-$(CONFIG_PPC_XICS)		+= xics/
diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c
new file mode 100644
index 000000000000..b2593ce30c9b
--- /dev/null
+++ b/arch/powerpc/sysdev/scom.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ *                <benh@kernel.crashing.org>
+ *     and        David Gibson, IBM Corporation.
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/scom.h>
+
+const struct scom_controller *scom_controller;
+EXPORT_SYMBOL_GPL(scom_controller);
+
+struct device_node *scom_find_parent(struct device_node *node)
+{
+	struct device_node *par, *tmp;
+	const u32 *p;
+
+	for (par = of_node_get(node); par;) {
+		if (of_get_property(par, "scom-controller", NULL))
+			break;
+		p = of_get_property(par, "scom-parent", NULL);
+		tmp = par;
+		if (p == NULL)
+			par = of_get_parent(par);
+		else
+			par = of_find_node_by_phandle(*p);
+		of_node_put(tmp);
+	}
+	return par;
+}
+EXPORT_SYMBOL_GPL(scom_find_parent);
+
+scom_map_t scom_map_device(struct device_node *dev, int index)
+{
+	struct device_node *parent;
+	unsigned int cells, size;
+	const u32 *prop;
+	u64 reg, cnt;
+	scom_map_t ret;
+
+	parent = scom_find_parent(dev);
+
+	if (parent == NULL)
+		return 0;
+
+	prop = of_get_property(parent, "#scom-cells", NULL);
+	cells = prop ? *prop : 1;
+
+	prop = of_get_property(dev, "scom-reg", &size);
+	if (!prop)
+		return 0;
+	size >>= 2;
+
+	if (index >= (size / (2*cells)))
+		return 0;
+
+	reg = of_read_number(&prop[index * cells * 2], cells);
+	cnt = of_read_number(&prop[index * cells * 2 + cells], cells);
+
+	ret = scom_map(parent, reg, cnt);
+	of_node_put(parent);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(scom_map_device);
+
+#ifdef CONFIG_SCOM_DEBUGFS
+struct scom_debug_entry {
+	struct device_node *dn;
+	unsigned long addr;
+	scom_map_t map;
+	spinlock_t lock;
+	char name[8];
+	struct debugfs_blob_wrapper blob;
+};
+
+static int scom_addr_set(void *data, u64 val)
+{
+	struct scom_debug_entry *ent = data;
+
+	ent->addr = 0;
+	scom_unmap(ent->map);
+
+	ent->map = scom_map(ent->dn, val, 1);
+	if (scom_map_ok(ent->map))
+		ent->addr = val;
+	else
+		return -EFAULT;
+
+	return 0;
+}
+
+static int scom_addr_get(void *data, u64 *val)
+{
+	struct scom_debug_entry *ent = data;
+	*val = ent->addr;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_addr_fops, scom_addr_get, scom_addr_set,
+			"0x%llx\n");
+
+static int scom_val_set(void *data, u64 val)
+{
+	struct scom_debug_entry *ent = data;
+
+	if (!scom_map_ok(ent->map))
+		return -EFAULT;
+
+	scom_write(ent->map, 0, val);
+
+	return 0;
+}
+
+static int scom_val_get(void *data, u64 *val)
+{
+	struct scom_debug_entry *ent = data;
+
+	if (!scom_map_ok(ent->map))
+		return -EFAULT;
+
+	*val = scom_read(ent->map, 0);
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(scom_val_fops, scom_val_get, scom_val_set,
+			"0x%llx\n");
+
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+			       int i)
+{
+	struct scom_debug_entry *ent;
+	struct dentry *dir;
+
+	ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+	if (!ent)
+		return -ENOMEM;
+
+	ent->dn = of_node_get(dn);
+	ent->map = SCOM_MAP_INVALID;
+	spin_lock_init(&ent->lock);
+	snprintf(ent->name, 8, "scom%d", i);
+	ent->blob.data = dn->full_name;
+	ent->blob.size = strlen(dn->full_name);
+
+	dir = debugfs_create_dir(ent->name, root);
+	if (!dir) {
+		of_node_put(dn);
+		kfree(ent);
+		return -1;
+	}
+
+	debugfs_create_file("addr", 0600, dir, ent, &scom_addr_fops);
+	debugfs_create_file("value", 0600, dir, ent, &scom_val_fops);
+	debugfs_create_blob("path", 0400, dir, &ent->blob);
+
+	return 0;
+}
+
+static int scom_debug_init(void)
+{
+	struct device_node *dn;
+	struct dentry *root;
+	int i, rc;
+
+	root = debugfs_create_dir("scom", powerpc_debugfs_root);
+	if (!root)
+		return -1;
+
+	i = rc = 0;
+	for_each_node_with_property(dn, "scom-controller")
+		rc |= scom_debug_init_one(root, dn, i++);
+
+	return rc;
+}
+device_initcall(scom_debug_init);
+#endif /* CONFIG_SCOM_DEBUGFS */
-- 
cgit v1.2.3


From 5ca123760177ed16cbd9bab609bff69eb8fc45bd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 14 Apr 2011 22:31:59 +0000
Subject: powerpc/xics: Move irq_host matching into the ics backend

An upcoming new ics backend will need to implement different matching
semantics to the current ones, which are essentially the RTAS ics
backends. So move the current match into the RTAS backend, and allow
other ics backends to override.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/xics.h        |  1 +
 arch/powerpc/sysdev/xics/ics-rtas.c    | 11 +++++++++++
 arch/powerpc/sysdev/xics/xics-common.c | 12 +++++++-----
 3 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index c4ed4c5b6464..6c06306c4100 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -59,6 +59,7 @@ struct ics {
 	int (*map)(struct ics *ics, unsigned int virq);
 	void (*mask_unknown)(struct ics *ics, unsigned long vec);
 	long (*get_server)(struct ics *ics, unsigned long vec);
+	int (*host_match)(struct ics *ics, struct device_node *node);
 	char data[];
 };
 
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 5b3ee387e89d..610c148fedcc 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -26,12 +26,14 @@ static int ibm_int_off;
 static int ics_rtas_map(struct ics *ics, unsigned int virq);
 static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec);
 static long ics_rtas_get_server(struct ics *ics, unsigned long vec);
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node);
 
 /* Only one global & state struct ics */
 static struct ics ics_rtas = {
 	.map		= ics_rtas_map,
 	.mask_unknown	= ics_rtas_mask_unknown,
 	.get_server	= ics_rtas_get_server,
+	.host_match	= ics_rtas_host_match,
 };
 
 static void ics_rtas_unmask_irq(struct irq_data *d)
@@ -202,6 +204,15 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
 	return status[0];
 }
 
+static int ics_rtas_host_match(struct ics *ics, struct device_node *node)
+{
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !of_device_is_compatible(node, "chrp,iic");
+}
+
 int ics_rtas_init(void)
 {
 	ibm_get_xive = rtas_token("ibm,get-xive");
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index e70175dfe322..c58844d72426 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -331,11 +331,13 @@ int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 
 static int xics_host_match(struct irq_host *h, struct device_node *node)
 {
-	/* IBM machines have interrupt parents of various funky types for things
-	 * like vdevices, events, etc... The trick we use here is to match
-	 * everything here except the legacy 8259 which is compatible "chrp,iic"
-	 */
-	return !of_device_is_compatible(node, "chrp,iic");
+	struct ics *ics;
+
+	list_for_each_entry(ics, &ics_list, link)
+		if (ics->host_match(ics, node))
+			return 1;
+
+	return 0;
 }
 
 /* Dummies */
-- 
cgit v1.2.3


From 411e689d929d5fc2e9066e30de55e8bcdbd573ad Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 14 Apr 2011 22:32:00 +0000
Subject: powerpc/nvram: Search for nvram using compatible

As well as searching for nodes with type = "nvram", search for nodes
that have compatible = "nvram". This can't be converted into a single
call to of_find_compatible_node() with a non-NULL type, because that
searches for a node that has _both_ type & compatible = "nvram".

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/mmio_nvram.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c
index 207324209065..ddc877a3a23a 100644
--- a/arch/powerpc/sysdev/mmio_nvram.c
+++ b/arch/powerpc/sysdev/mmio_nvram.c
@@ -115,6 +115,8 @@ int __init mmio_nvram_init(void)
 	int ret;
 
 	nvram_node = of_find_node_by_type(NULL, "nvram");
+	if (!nvram_node)
+		nvram_node = of_find_compatible_node(NULL, NULL, "nvram");
 	if (!nvram_node) {
 		printk(KERN_WARNING "nvram: no node found in device-tree\n");
 		return -ENODEV;
-- 
cgit v1.2.3


From c708c57e247775928b9a6bce7b4d8d14883bf39b Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Wed, 20 Apr 2011 10:15:31 +0200
Subject: [S390] prng: prevent access beyond end of stack

While initializing the state of the prng only the first 8 bytes of
random data where used, the second 8 bytes were read from the memory
after the stack. If only 64 bytes of the kernel stack are used and
CONFIG_DEBUG_PAGEALLOC is enabled a kernel panic may occur because of
the invalid page access. Use the correct multiplicator to stay within
the random data buffer.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/prng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 975e3ab13cb5..44bca3f994b0 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -76,7 +76,7 @@ static void prng_seed(int nbytes)
 
 	/* Add the entropy */
 	while (nbytes >= 8) {
-		*((__u64 *)parm_block) ^= *((__u64 *)buf+i*8);
+		*((__u64 *)parm_block) ^= *((__u64 *)buf+i);
 		prng_add_entropy();
 		i += 8;
 		nbytes -= 8;
-- 
cgit v1.2.3


From e4c031b4f2515e9531d71c8aa779799231dbcd0c Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Wed, 20 Apr 2011 10:15:32 +0200
Subject: [S390] fix page table walk for changing page attributes

The page table walk for changing page attributes used the wrong
address for pgd/pud/pmd lookups if the range was bigger than
a pmd entry. Fix the lookup by using the correct address.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pageattr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 122ffbd08ce0..0607e4b14b27 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -24,12 +24,13 @@ static void change_page_attr(unsigned long addr, int numpages,
 			WARN_ON_ONCE(1);
 			continue;
 		}
-		ptep = pte_offset_kernel(pmdp, addr + i * PAGE_SIZE);
+		ptep = pte_offset_kernel(pmdp, addr);
 
 		pte = *ptep;
 		pte = set(pte);
-		ptep_invalidate(&init_mm, addr + i * PAGE_SIZE, ptep);
+		ptep_invalidate(&init_mm, addr, ptep);
 		*ptep = pte;
+		addr += PAGE_SIZE;
 	}
 }
 
-- 
cgit v1.2.3


From e35c76cd47c244eaa7a74adaabde4d0a1cadb907 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 20 Apr 2011 10:15:34 +0200
Subject: [S390] pfault: fix token handling

f6649a7e "[S390] cleanup lowcore access from external interrupts" changed
handling of external interrupts. Instead of letting the external interrupt
handlers accessing the per cpu lowcore the entry code of the kernel reads
already all fields that are necessary and passes them to the handlers.
The pfault interrupt handler was incorrectly converted. It tries to
dereference a value which used to be a pointer to a lowcore field. After
the conversion however it is not anymore the pointer to the field but its
content. So instead of a dereference only a cast is needed to get the
task pointer that caused the pfault.

Fixes a NULL pointer dereference and a subsequent kernel crash:

Unable to handle kernel pointer dereference at virtual kernel address (null)
Oops: 0004 [#1] SMP
Modules linked in: nfsd exportfs nfs lockd fscache nfs_acl auth_rpcgss sunrpc
                   loop qeth_l3 qeth vmur ccwgroup ext3 jbd mbcache dm_mod
                   dasd_eckd_mod dasd_diag_mod dasd_mod
CPU: 0 Not tainted 2.6.38-2-s390x #1
Process cron (pid: 1106, task: 000000001f962f78, ksp: 000000001fa0f9d0)
Krnl PSW : 0404200180000000 000000000002c03e (pfault_interrupt+0xa2/0x138)
           R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3
Krnl GPRS: 0000000000000000 0000000000000001 0000000000000000 0000000000000001
           000000001f962f78 0000000000518968 0000000090000002 000000001ff03280
           0000000000000000 000000000064f000 000000001f962f78 0000000000002603
           0000000006002603 0000000000000000 000000001ff7fe68 000000001ff7fe48
Krnl Code: 000000000002c036: 5820d010            l       %r2,16(%r13)
           000000000002c03a: 1832                lr      %r3,%r2
           000000000002c03c: 1a31                ar      %r3,%r1
          >000000000002c03e: ba23d010            cs      %r2,%r3,16(%r13)
           000000000002c042: a744fffc            brc     4,2c03a
           000000000002c046: a7290002            lghi    %r2,2
           000000000002c04a: e320d0000024        stg     %r2,0(%r13)
           000000000002c050: 07f0                bcr     15,%r0
Call Trace:
 ([<000000001f962f78>] 0x1f962f78)
  [<000000000001acda>] do_extint+0xf6/0x138
  [<000000000039b6ca>] ext_no_vtime+0x30/0x34
  [<000000007d706e04>] 0x7d706e04
Last Breaking-Event-Address:
  [<0000000000000000>] 0x0

For stable maintainers:
the first kernel which contains this bug is 2.6.37.

Reported-by: Stephen Powell <zlinuxman@wowway.com>
Cc: Jonathan Nieder <jrnieder@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9217e332b118..4cf85fef407c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -558,9 +558,9 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	 * Get the token (= address of the task structure of the affected task).
 	 */
 #ifdef CONFIG_64BIT
-	tsk = *(struct task_struct **) param64;
+	tsk = (struct task_struct *) param64;
 #else
-	tsk = *(struct task_struct **) param32;
+	tsk = (struct task_struct *) param32;
 #endif
 
 	if (subcode & 0x0080) {
-- 
cgit v1.2.3


From 9ff4cfb3fcfd48b49fdd9be7381b3be340853aa4 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Wed, 20 Apr 2011 10:15:36 +0200
Subject: [S390] kvm-390: Let kernel exit SIE instruction on work

From: Christian Borntraeger <borntraeger@de.ibm.com>

This patch fixes the sie exit on interrupts. The low level
interrupt handler returns to the PSW address in pt_regs and not
to the PSW address in the lowcore.
Without this fix a cpu bound guest might never leave guest state
since the host interrupt handler would blindly return to the
SIE instruction, even on need_resched and friends.

Cc: stable@kernel.org
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kvm/sie64a.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index 7e9d30d567b0..ab0e041ac54c 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -48,10 +48,10 @@ sie_irq_handler:
 	tm	__TI_flags+7(%r2),_TIF_EXIT_SIE
 	jz	0f
 	larl	%r2,sie_exit			# work pending, leave sie
-	stg	%r2,__LC_RETURN_PSW+8
+	stg	%r2,SPI_PSW+8(0,%r15)
 	br	%r14
 0:	larl	%r2,sie_reenter			# re-enter with guest id
-	stg	%r2,__LC_RETURN_PSW+8
+	stg	%r2,SPI_PSW+8(0,%r15)
 1:	br	%r14
 
 /*
-- 
cgit v1.2.3


From 24bdb0b62cc82120924762ae6bc85afc8c3f2b26 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 12 Apr 2011 12:19:52 +0100
Subject: xen: do not create the extra e820 region at an addr lower than 4G

Do not add the extra e820 region at a physical address lower than 4G
because it breaks e820_end_of_low_ram_pfn().

It is OK for us to move the xen_extra_mem_start up and down because this
is the index of the memory that can be ballooned in/out - it is memory
not available to the kernel during bootup.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index fa0269a99377..90bac0aac3a5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -227,7 +227,7 @@ char * __init xen_memory_setup(void)
 
 	memcpy(map_raw, map, sizeof(map));
 	e820.nr_map = 0;
-	xen_extra_mem_start = mem_end;
+	xen_extra_mem_start = max((1ULL << 32), mem_end);
 	for (i = 0; i < memmap.nr_entries; i++) {
 		unsigned long long end;
 
-- 
cgit v1.2.3


From ee176455e28469e2420032aab3db11ac2ae3eaa8 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 19 Apr 2011 14:47:31 +0100
Subject: xen: mask_rw_pte: do not apply the early_ioremap checks on x86_32

The two "is_early_ioremap_ptep" checks in mask_rw_pte are only used on
x86_64, in fact early_ioremap is not used at all to setup the initial
pagetable on x86_32.
Moreover on x86_32 the two checks are wrong because the range
pgt_buf_start..pgt_buf_end initially should be mapped RW because
the pages in the range are not pagetable pages yet and haven't been
cleared yet. Afterwards considering the pgt_buf_start..pgt_buf_end is
part of the initial mapping, xen_alloc_pte is capable of turning
the ptes RO when they become pagetable pages.

Fix the issue and improve the readability of the code providing two
different implementation of mask_rw_pte for x86_32 and x86_64.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a991b57f91fe..aef7af92b28b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1473,16 +1473,20 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #endif
 }
 
+#ifdef CONFIG_X86_32
 static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 {
-	unsigned long pfn = pte_pfn(pte);
-
-#ifdef CONFIG_X86_32
 	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
 	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
 		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
 			       pte_val_ma(pte));
-#endif
+
+	return pte;
+}
+#else /* CONFIG_X86_64 */
+static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
 
 	/*
 	 * If the new pfn is within the range of the newly allocated
@@ -1497,6 +1501,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 
 	return pte;
 }
+#endif /* CONFIG_X86_64 */
 
 /* Init-time set_pte while constructing initial pagetables, which
    doesn't allow RO pagetable pages to be remapped RW */
-- 
cgit v1.2.3


From 8a91707d0a1a49193e23cb2d243632f2289feb24 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Apr 2011 11:54:10 -0400
Subject: xen/p2m: Add EXPORT_SYMBOL_GPL to the M2P override functions.

If the backends, which use these two functions, are compiled as
a module we need these two functions to be exported.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/p2m.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 2d2b32af3a1d..c851397e657c 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -682,7 +682,7 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
 
 	return 0;
 }
-
+EXPORT_SYMBOL_GPL(m2p_add_override);
 int m2p_remove_override(struct page *page, bool clear_pte)
 {
 	unsigned long flags;
@@ -719,6 +719,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(m2p_remove_override);
 
 struct page *m2p_find_override(unsigned long mfn)
 {
-- 
cgit v1.2.3


From cf568c58eb192368f5e796df935704535b54f451 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 30 Mar 2011 14:31:42 +0200
Subject: mach-ux500: fix i2c0 device setup regression

Adding two sets of I2C devices to the same bus doesn't quite work,
atleast not anymore. Stash one array and determine how much of it
shall be added instead.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/board-mop500.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index af913741e6ec..6e1907fa94f0 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -178,16 +178,15 @@ static struct i2c_board_info __initdata mop500_i2c0_devices[] = {
 		.irq		= NOMADIK_GPIO_TO_IRQ(217),
 		.platform_data  = &mop500_tc35892_data,
 	},
-};
-
-/* I2C0 devices only available prior to HREFv60 */
-static struct i2c_board_info __initdata mop500_i2c0_old_devices[] = {
+	/* I2C0 devices only available prior to HREFv60 */
 	{
 		I2C_BOARD_INFO("tps61052", 0x33),
 		.platform_data  = &mop500_tps61052_data,
 	},
 };
 
+#define NUM_PRE_V60_I2C0_DEVICES 1
+
 static struct i2c_board_info __initdata mop500_i2c2_devices[] = {
 	{
 		/* lp5521 LED driver, 1st device */
@@ -425,6 +424,8 @@ static void __init mop500_uart_init(void)
 
 static void __init mop500_init_machine(void)
 {
+	int i2c0_devs;
+
 	/*
 	 * The HREFv60 board removed a GPIO expander and routed
 	 * all these GPIO pins to the internal GPIO controller
@@ -448,11 +449,11 @@ static void __init mop500_init_machine(void)
 
 	platform_device_register(&ab8500_device);
 
-	i2c_register_board_info(0, mop500_i2c0_devices,
-				ARRAY_SIZE(mop500_i2c0_devices));
-	if (!machine_is_hrefv60())
-		i2c_register_board_info(0, mop500_i2c0_old_devices,
-					ARRAY_SIZE(mop500_i2c0_old_devices));
+	i2c0_devs = ARRAY_SIZE(mop500_i2c0_devices);
+	if (machine_is_hrefv60())
+		i2c0_devs -= NUM_PRE_V60_I2C0_DEVICES;
+
+	i2c_register_board_info(0, mop500_i2c0_devices, i2c0_devs);
 	i2c_register_board_info(2, mop500_i2c2_devices,
 				ARRAY_SIZE(mop500_i2c2_devices));
 }
-- 
cgit v1.2.3


From 2df122f52fd31c327e0aa05caf6017ecd7867d5f Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Mon, 4 Apr 2011 09:26:19 +0300
Subject: OMAP4: clock data: Change DSS clock aliases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DSS driver has used fck and ick clocks on OMAP2/3 to get DSS HW up and
running, and also to get the pixel clock's source clock rate from the
fck.

On OMAP4 the clock data is set up in a different way, as there's no ick,
dss_fck points to a fake clock which just affects DSS's MODULEMODE, and
dss_dss_clk if the DSS_FCK.

>From DSS driver's point of view the dss_fck sounds like an ick, and
dss_dss_clk is the fck. While this is not entirely correct from HW point
of view, especially for the ick, configuring the clock aliases that way
makes DSS "just work" with OMAP4's clock setup.

In the (hopefully near) future DSS driver will be reworked to use
pm_runtime support which should clean up the clock code.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Benoît Cousson <b-cousson@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/clock44xx_data.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index 276992d3b7fb..8c965671b4d4 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3116,14 +3116,9 @@ static struct omap_clk omap44xx_clks[] = {
 	CLK(NULL,	"dsp_fck",			&dsp_fck,	CK_443X),
 	CLK("omapdss_dss",	"sys_clk",			&dss_sys_clk,	CK_443X),
 	CLK("omapdss_dss",	"tv_clk",			&dss_tv_clk,	CK_443X),
-	CLK("omapdss_dss",	"dss_clk",			&dss_dss_clk,	CK_443X),
 	CLK("omapdss_dss",	"video_clk",			&dss_48mhz_clk,	CK_443X),
-	CLK("omapdss_dss",	"fck",				&dss_fck,	CK_443X),
-	/*
-	 * On OMAP4, DSS ick is a dummy clock; this is needed for compatibility
-	 * with OMAP2/3.
-	 */
-	CLK("omapdss_dss",	"ick",				&dummy_ck,	CK_443X),
+	CLK("omapdss_dss",	"fck",				&dss_dss_clk,	CK_443X),
+	CLK("omapdss_dss",	"ick",				&dss_fck,	CK_443X),
 	CLK(NULL,	"efuse_ctrl_cust_fck",		&efuse_ctrl_cust_fck,	CK_443X),
 	CLK(NULL,	"emif1_fck",			&emif1_fck,	CK_443X),
 	CLK(NULL,	"emif2_fck",			&emif2_fck,	CK_443X),
-- 
cgit v1.2.3


From 8bc2e98bcb280009cb0f85ce64e5f79b1669f9ff Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@ti.com>
Date: Wed, 13 Apr 2011 18:21:06 +0300
Subject: OMAP2+: PM: Fix the saving of CM_AUTOIDLE_PLL register on scratchpad
 area

The saving of CCR.CM_AUTOIDLE_PLL is done in scratchpad area.

However, in current code, the saving is done for CM_AUTOIDLE2_PLL
(offset 0x34) instead of CM_AUTOIDLE_PLL (offset 0x30).

This patch changes the code to save the correct register.

Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/control.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index 695279419020..df0c75c31998 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -317,7 +317,7 @@ void omap3_save_scratchpad_contents(void)
 	prcm_block_contents.cm_clken_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, CM_CLKEN);
 	prcm_block_contents.cm_autoidle_pll =
-			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_AUTOIDLE_PLL);
+			omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE);
 	prcm_block_contents.cm_clksel1_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_CLKSEL1_PLL);
 	prcm_block_contents.cm_clksel2_pll =
-- 
cgit v1.2.3


From a8ae645c014bc01090367de84f7601ad11628971 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@ti.com>
Date: Wed, 13 Apr 2011 18:21:07 +0300
Subject: OMAP3: PM: Do not rely on ROM code to restore
 CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL

As per OMAP3 erratum (i671), ROM code adds extra latencies while
restoring CM_AUTOIDLE_PLL register, if AUTO_PERIPH_DPLL is equal to 1.

This patch stores 0's in scratchpad content area corresponding to
AUTO_PERIPH_DPLL, to prevent ROM code to try to lock per DPLL, since
it won't respect proper programing scheme.

This register is then stored in prcm context. The saving and restore
is now done by kernel side.

Here follow the erratum description

DESCRIPTION

After OFF mode transition, among many restorations, the ROM Code restores the
CM_AUTOIDLE_PLL register, and after that, it tries to relock the PER DPLL.

In case the restoration data stored in scratchpad memory contains a field
CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL = 1, then the way the ROM Code restores and
locks the PER DPLL does not respect the PER DPLL programming scheme.

In that case, the DPLL might not lock. Meanwhile, when trying to lock the PER
DPLL, the ROM Code does not hang. Only extra latencies are introduced at
wake-up.

WORKAROUND

When saving the context-restore structure in scratchpad memory, in order to
respect the PER DPLL programming scheme, it is advised to store 0 in the
CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL field of the saved structure.

After wake-up, the application should store in CM_AUTOIDLE_PLL register the
right desired value.

Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/cm2xxx_3xxx.c | 17 +++++++++++++++++
 arch/arm/mach-omap2/control.c     |  8 +++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/cm2xxx_3xxx.c b/arch/arm/mach-omap2/cm2xxx_3xxx.c
index 9d0dec806e92..38830d8d4783 100644
--- a/arch/arm/mach-omap2/cm2xxx_3xxx.c
+++ b/arch/arm/mach-omap2/cm2xxx_3xxx.c
@@ -247,6 +247,7 @@ struct omap3_cm_regs {
 	u32 per_cm_clksel;
 	u32 emu_cm_clksel;
 	u32 emu_cm_clkstctrl;
+	u32 pll_cm_autoidle;
 	u32 pll_cm_autoidle2;
 	u32 pll_cm_clksel4;
 	u32 pll_cm_clksel5;
@@ -319,6 +320,15 @@ void omap3_cm_save_context(void)
 		omap2_cm_read_mod_reg(OMAP3430_EMU_MOD, CM_CLKSEL1);
 	cm_context.emu_cm_clkstctrl =
 		omap2_cm_read_mod_reg(OMAP3430_EMU_MOD, OMAP2_CM_CLKSTCTRL);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL == 1.
+	 * In this case, even though this register has been saved in
+	 * scratchpad contents, we need to restore AUTO_PERIPH_DPLL
+	 * by ourselves. So, we need to save it anyway.
+	 */
+	cm_context.pll_cm_autoidle =
+		omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE);
 	cm_context.pll_cm_autoidle2 =
 		omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE2);
 	cm_context.pll_cm_clksel4 =
@@ -441,6 +451,13 @@ void omap3_cm_restore_context(void)
 			       CM_CLKSEL1);
 	omap2_cm_write_mod_reg(cm_context.emu_cm_clkstctrl, OMAP3430_EMU_MOD,
 			       OMAP2_CM_CLKSTCTRL);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL.AUTO_PERIPH_DPLL == 1.
+	 * In this case, we need to restore AUTO_PERIPH_DPLL by ourselves.
+	 */
+	omap2_cm_write_mod_reg(cm_context.pll_cm_autoidle, PLL_MOD,
+			       CM_AUTOIDLE);
 	omap2_cm_write_mod_reg(cm_context.pll_cm_autoidle2, PLL_MOD,
 			       CM_AUTOIDLE2);
 	omap2_cm_write_mod_reg(cm_context.pll_cm_clksel4, PLL_MOD,
diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index df0c75c31998..da53ba3917ca 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -316,8 +316,14 @@ void omap3_save_scratchpad_contents(void)
 			omap2_cm_read_mod_reg(WKUP_MOD, CM_CLKSEL);
 	prcm_block_contents.cm_clken_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, CM_CLKEN);
+	/*
+	 * As per erratum i671, ROM code does not respect the PER DPLL
+	 * programming scheme if CM_AUTOIDLE_PLL..AUTO_PERIPH_DPLL == 1.
+	 * Then,  in anycase, clear these bits to avoid extra latencies.
+	 */
 	prcm_block_contents.cm_autoidle_pll =
-			omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE);
+			omap2_cm_read_mod_reg(PLL_MOD, CM_AUTOIDLE) &
+			~OMAP3430_AUTO_PERIPH_DPLL_MASK;
 	prcm_block_contents.cm_clksel1_pll =
 			omap2_cm_read_mod_reg(PLL_MOD, OMAP3430_CM_CLKSEL1_PLL);
 	prcm_block_contents.cm_clksel2_pll =
-- 
cgit v1.2.3


From f95440ca5bdd3ed3e31c2fbad07b9056b31ad18c Mon Sep 17 00:00:00 2001
From: "Avinash.H.M" <avinashhm@ti.com>
Date: Tue, 5 Apr 2011 21:10:15 +0530
Subject: OMAP2/3: hwmod: fix gpio-reset timeouts seen during bootup.

GPIO module expects the debounce clocks to be enabled during reset. It doesn't
reset properly and timeouts are seen, if this clock isn't enabled during
reset. Add the HWMOD_CONTROL_OPT_CLKS_IN_RESET flags to the GPIO HWMODs, with
which the debounce clocks are enabled during reset.

Cc: Rajendra Nayak <rnayak@ti.com>
Cc: Paul Walmsley <paul@pwsan.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Kevin Hilman <khilman@ti.com>
Signed-off-by: Avinash.H.M <avinashhm@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/omap_hwmod_2420_data.c | 4 ++++
 arch/arm/mach-omap2/omap_hwmod_2430_data.c | 5 +++++
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 6 ++++++
 3 files changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/omap_hwmod_2420_data.c b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
index f50e56a8115b..c4d0ae87d62a 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2420_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2420_data.c
@@ -1639,6 +1639,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio1_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio1_irqs),
 	.main_clk	= "gpios_fck",
@@ -1669,6 +1670,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio2_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio2_irqs),
 	.main_clk	= "gpios_fck",
@@ -1699,6 +1701,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio3_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio3_irqs),
 	.main_clk	= "gpios_fck",
@@ -1729,6 +1732,7 @@ static struct omap_hwmod_ocp_if *omap2420_gpio4_slaves[] = {
 
 static struct omap_hwmod omap2420_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap242x_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap242x_gpio4_irqs),
 	.main_clk	= "gpios_fck",
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index 96753f79a754..9682dd519f8d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -1742,6 +1742,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio1_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio1_irqs),
 	.main_clk	= "gpios_fck",
@@ -1772,6 +1773,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio2_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio2_irqs),
 	.main_clk	= "gpios_fck",
@@ -1802,6 +1804,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio3_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio3_irqs),
 	.main_clk	= "gpios_fck",
@@ -1832,6 +1835,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio4_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio4_irqs),
 	.main_clk	= "gpios_fck",
@@ -1862,6 +1866,7 @@ static struct omap_hwmod_ocp_if *omap2430_gpio5_slaves[] = {
 
 static struct omap_hwmod omap2430_gpio5_hwmod = {
 	.name		= "gpio5",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap243x_gpio5_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap243x_gpio5_irqs),
 	.main_clk	= "gpio5_fck",
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 9bee15575988..909a84de6682 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2141,6 +2141,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio1_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio1_hwmod = {
 	.name		= "gpio1",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio1_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio1_irqs),
 	.main_clk	= "gpio1_ick",
@@ -2177,6 +2178,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio2_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio2_hwmod = {
 	.name		= "gpio2",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio2_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio2_irqs),
 	.main_clk	= "gpio2_ick",
@@ -2213,6 +2215,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio3_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio3_hwmod = {
 	.name		= "gpio3",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio3_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio3_irqs),
 	.main_clk	= "gpio3_ick",
@@ -2249,6 +2252,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio4_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio4_hwmod = {
 	.name		= "gpio4",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio4_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio4_irqs),
 	.main_clk	= "gpio4_ick",
@@ -2285,6 +2289,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio5_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio5_hwmod = {
 	.name		= "gpio5",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio5_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio5_irqs),
 	.main_clk	= "gpio5_ick",
@@ -2321,6 +2326,7 @@ static struct omap_hwmod_ocp_if *omap3xxx_gpio6_slaves[] = {
 
 static struct omap_hwmod omap3xxx_gpio6_hwmod = {
 	.name		= "gpio6",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
 	.mpu_irqs	= omap3xxx_gpio6_irqs,
 	.mpu_irqs_cnt	= ARRAY_SIZE(omap3xxx_gpio6_irqs),
 	.main_clk	= "gpio6_ick",
-- 
cgit v1.2.3


From 37f8527dbfd05af0f670aa02370d0c4cca7fbda6 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 20 Apr 2011 19:19:10 -0700
Subject: Revert "x86, NUMA: Fix fakenuma boot failure"

Andreas Herrmann reported that 7d6b46707f24 ("x86, NUMA: Fix fakenuma
boot failure") causes certain physical NUMA topologies (for example
AMD Magny-Cours) to move sibling cpus to a single node when in reality
they are in separate domains.

This may result in some nodes being completely void of cpus, which
doesn't accurately represent the correct topology. The system will
boot, but will have suboptimal NUMA performance.

This commit was intended as a fix for NUMA emulation, but should
not cause a regression for real NUMA machines as a side effect.

( There will be a separate fix for the numa-debug code, which
  will not affect physical topologies. )

Reported-by: Andreas Herrmann <herrmann.der.user@googlemail.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1104201918110.12634@chino.kir.corp.google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/smpboot.c | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8ed8908cc9f7..c2871d3c71b6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -312,26 +312,6 @@ void __cpuinit smp_store_cpu_info(int id)
 		identify_secondary_cpu(c);
 }
 
-static void __cpuinit check_cpu_siblings_on_same_node(int cpu1, int cpu2)
-{
-	int node1 = early_cpu_to_node(cpu1);
-	int node2 = early_cpu_to_node(cpu2);
-
-	/*
-	 * Our CPU scheduler assumes all logical cpus in the same physical cpu
-	 * share the same node. But, buggy ACPI or NUMA emulation might assign
-	 * them to different node. Fix it.
-	 */
-	if (node1 != node2) {
-		pr_warning("CPU %d in node %d and CPU %d in node %d are in the same physical CPU. forcing same node %d\n",
-			   cpu1, node1, cpu2, node2, node2);
-
-		numa_remove_cpu(cpu1);
-		numa_set_node(cpu1, node2);
-		numa_add_cpu(cpu1);
-	}
-}
-
 static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
 {
 	cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
@@ -340,7 +320,6 @@ static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
 	cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
 	cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
 	cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
-	check_cpu_siblings_on_same_node(cpu1, cpu2);
 }
 
 
@@ -382,12 +361,10 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
 			cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
 			cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
-			check_cpu_siblings_on_same_node(cpu, i);
 		}
 		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
 			cpumask_set_cpu(i, cpu_core_mask(cpu));
 			cpumask_set_cpu(cpu, cpu_core_mask(i));
-			check_cpu_siblings_on_same_node(cpu, i);
 			/*
 			 *  Does this new cpu bringup a new core?
 			 */
-- 
cgit v1.2.3


From 7a6c6547825a2324faa76cff856db11d78de075e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 20 Apr 2011 19:19:13 -0700
Subject: x86, numa: Fix cpu nodemasks for NUMA emulation and
 CONFIG_DEBUG_PER_CPU_MAPS

The cpu<->node mappings under CONFIG_DEBUG_PER_CPU_MAPS=y
when NUMA emulation is enabled is currently broken because it does
not iterate through every emulated node and bind cpus that have
affinity to it.

NUMA emulation should bind each cpu to every local node to
accurately represent the true NUMA topology of the underlying
machine.

debug_cpumask_set_cpu() needs to be fixed at the same time so
that the debugging information that it emits shows the new
cpumask of the node being assigned when the cpu is being added
or removed.

It can now take responsibility of setting or clearing the cpu
itself to remove the need for duplicate code.

Also change its last parameter, "enable", to have the correct bool
type since it can only be true or false.

 -v2: Fix the return statements, by Kosaki Motohiro

Acked-and-Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andreas Herrmann <herrmann.der.user@googlemail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1104201918470.12634@chino.kir.corp.google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/numa.h  |  2 +-
 arch/x86/mm/numa.c           | 31 +++++++++++++------------------
 arch/x86/mm/numa_emulation.c | 20 ++++++--------------
 3 files changed, 20 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 3d4dab43c994..a50fc9f493b3 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -51,7 +51,7 @@ static inline void numa_remove_cpu(int cpu)		{ }
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
+void debug_cpumask_set_cpu(int cpu, int node, bool enable);
 #endif
 
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9559d360fde7..745258dfc4dc 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -213,53 +213,48 @@ int early_cpu_to_node(int cpu)
 	return per_cpu(x86_cpu_to_node_map, cpu);
 }
 
-struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+void debug_cpumask_set_cpu(int cpu, int node, bool enable)
 {
-	int node = early_cpu_to_node(cpu);
 	struct cpumask *mask;
 	char buf[64];
 
 	if (node == NUMA_NO_NODE) {
 		/* early_cpu_to_node() already emits a warning and trace */
-		return NULL;
+		return;
 	}
 	mask = node_to_cpumask_map[node];
 	if (!mask) {
 		pr_err("node_to_cpumask_map[%i] NULL\n", node);
 		dump_stack();
-		return NULL;
+		return;
 	}
 
+	if (enable)
+		cpumask_set_cpu(cpu, mask);
+	else
+		cpumask_clear_cpu(cpu, mask);
+
 	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable ? "numa_add_cpu" : "numa_remove_cpu",
 		cpu, node, buf);
-	return mask;
+	return;
 }
 
 # ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void __cpuinit numa_set_cpumask(int cpu, bool enable)
 {
-	struct cpumask *mask;
-
-	mask = debug_cpumask_set_cpu(cpu, enable);
-	if (!mask)
-		return;
-
-	if (enable)
-		cpumask_set_cpu(cpu, mask);
-	else
-		cpumask_clear_cpu(cpu, mask);
+	debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
 }
 
 void __cpuinit numa_add_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 1);
+	numa_set_cpumask(cpu, true);
 }
 
 void __cpuinit numa_remove_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 0);
+	numa_set_cpumask(cpu, false);
 }
 # endif	/* !CONFIG_NUMA_EMU */
 
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index ad091e4cff17..de84cc140379 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -454,10 +454,9 @@ void __cpuinit numa_remove_cpu(int cpu)
 		cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
 }
 #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+static void __cpuinit numa_set_cpumask(int cpu, bool enable)
 {
-	struct cpumask *mask;
-	int nid, physnid, i;
+	int nid, physnid;
 
 	nid = early_cpu_to_node(cpu);
 	if (nid == NUMA_NO_NODE) {
@@ -467,28 +466,21 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 
 	physnid = emu_nid_to_phys[nid];
 
-	for_each_online_node(i) {
+	for_each_online_node(nid) {
 		if (emu_nid_to_phys[nid] != physnid)
 			continue;
 
-		mask = debug_cpumask_set_cpu(cpu, enable);
-		if (!mask)
-			return;
-
-		if (enable)
-			cpumask_set_cpu(cpu, mask);
-		else
-			cpumask_clear_cpu(cpu, mask);
+		debug_cpumask_set_cpu(cpu, nid, enable);
 	}
 }
 
 void __cpuinit numa_add_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 1);
+	numa_set_cpumask(cpu, true);
 }
 
 void __cpuinit numa_remove_cpu(int cpu)
 {
-	numa_set_cpumask(cpu, 0);
+	numa_set_cpumask(cpu, false);
 }
 #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
-- 
cgit v1.2.3


From dffa4b2f62ff28c982144c7033001b1ece4d3532 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 20 Apr 2011 12:23:49 +0200
Subject: x86, mce: Drop the default decoding notifier

The default notifier doesn't make a lot of sense to call in the
correctable errors case. Drop it and emit the mcelog decoding
hint only in the uncorrectable errors case and when no notifier
is registered. Also, limit issuing the "mcelog --ascii" message
in the rare case when we dump unreported CEs before panicking.

While at it, remove unused old x86_mce_decode_callback from the
header.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Nagananda Chumbalkar <Nagananda.Chumbalkar@hp.com>
Cc: Russ Anderson <rja@sgi.com>
Link: http://lkml.kernel.org/r/20110420102349.GB1361@aftab
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mce.h       |  2 --
 arch/x86/kernel/cpu/mcheck/mce.c | 24 +++++++-----------------
 2 files changed, 7 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index eb16e94ae04f..021979a6e23f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -142,8 +142,6 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
 static inline void enable_p5_mce(void) {}
 #endif
 
-extern void (*x86_mce_decode_callback)(struct mce *m);
-
 void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
 DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 68e230327d65..ff1ae9b6464d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -105,20 +105,6 @@ static int			cpu_missing;
 ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
 EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
 
-static int default_decode_mce(struct notifier_block *nb, unsigned long val,
-			       void *data)
-{
-	pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
-	pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
-
-	return NOTIFY_STOP;
-}
-
-static struct notifier_block mce_dec_nb = {
-	.notifier_call = default_decode_mce,
-	.priority      = -1,
-};
-
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -212,6 +198,8 @@ void mce_log(struct mce *mce)
 
 static void print_mce(struct mce *m)
 {
+	int ret = 0;
+
 	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
 	       m->extcpu, m->mcgstatus, m->bank, m->status);
 
@@ -239,7 +227,11 @@ static void print_mce(struct mce *m)
 	 * Print out human-readable details about the MCE error,
 	 * (if the CPU has an implementation for that)
 	 */
-	atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+	ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+	if (ret == NOTIFY_STOP)
+		return;
+
+	pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
 }
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -1721,8 +1713,6 @@ __setup("mce", mcheck_enable);
 
 int __init mcheck_init(void)
 {
-	atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
-
 	mcheck_intel_therm_init();
 
 	return 0;
-- 
cgit v1.2.3


From d9b41e0b54fd7e164daf1e9c539c1070398aa02e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 20 Apr 2011 19:27:13 -0700
Subject: [PARISC] set memory ranges in N_NORMAL_MEMORY when onlined

When a DISCONTIGMEM memory range is brought online as a NUMA node, it
also needs to have its bet set in N_NORMAL_MEMORY.  This is necessary for
generic kernel code that utilizes N_NORMAL_MEMORY as a subset of N_ONLINE
for memory savings.

These types of hacks can hopefully be removed once DISCONTIGMEM is either
removed or abstracted away from CONFIG_NUMA.

Fixes a panic in the slub code which only initializes structures for
N_NORMAL_MEMORY to save memory:

	Backtrace:
	 [<000000004021c938>] add_partial+0x28/0x98
	 [<000000004021faa0>] __slab_free+0x1d0/0x1d8
	 [<000000004021fd04>] kmem_cache_free+0xc4/0x128
	 [<000000004033bf9c>] ida_get_new_above+0x21c/0x2c0
	 [<00000000402a8980>] sysfs_new_dirent+0xd0/0x238
	 [<00000000402a974c>] create_dir+0x5c/0x168
	 [<00000000402a9ab0>] sysfs_create_dir+0x98/0x128
	 [<000000004033d6c4>] kobject_add_internal+0x114/0x258
	 [<000000004033d9ac>] kobject_add_varg+0x7c/0xa0
	 [<000000004033df20>] kobject_add+0x50/0x90
	 [<000000004033dfb4>] kobject_create_and_add+0x54/0xc8
	 [<00000000407862a0>] cgroup_init+0x138/0x1f0
	 [<000000004077ce50>] start_kernel+0x5a0/0x840
	 [<000000004011fa3c>] start_parisc+0xa4/0xb8
	 [<00000000404bb034>] packet_ioctl+0x16c/0x208
	 [<000000004049ac30>] ip_mroute_setsockopt+0x260/0xf20

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: stable@kernel.org
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 arch/parisc/mm/init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index b7ed8d7a9b33..b1d126258dee 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -266,8 +266,10 @@ static void __init setup_bootmem(void)
 	}
 	memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
 
-	for (i = 0; i < npmem_ranges; i++)
+	for (i = 0; i < npmem_ranges; i++) {
+		node_set_state(i, N_NORMAL_MEMORY);
 		node_set_online(i);
+	}
 #endif
 
 	/*
-- 
cgit v1.2.3


From 505d9147a72d4e14323af9581dde066bd5fc439c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Thu, 21 Apr 2011 15:37:20 -0700
Subject: sparc32: fix section mismatch warnings in apc, pmc and time_32

In all cases there were a struct of_device_id variable defined __initdata.
But it was referenced from struct platform_driver.of_match_table
which is not guaranteed to be used during init only.

So drop the __initdata annotation.

This fixes following warnings:

WARNING: arch/sparc/kernel/built-in.o(.data+0x810): Section mismatch in reference from the variable clock_driver to the variable .init.data:clock_match
The variable clock_driver references
the variable __initdata clock_match
If the reference is valid then annotate the
variable with __init* or __refdata (see linux/init.h) or name the variable:
*_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console

WARNING: arch/sparc/kernel/built-in.o(.data+0xcec): Section mismatch in reference from the variable apc_driver to the variable .init.data:apc_match
The variable apc_driver references
the variable __initdata apc_match
If the reference is valid then annotate the
variable with __init* or __refdata (see linux/init.h) or name the variable:
*_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console

WARNING: arch/sparc/kernel/built-in.o(.data+0xd60): Section mismatch in reference from the variable pmc_driver to the variable .init.data:pmc_match
The variable pmc_driver references
the variable __initdata pmc_match
If the reference is valid then annotate the
variable with __init* or __refdata (see linux/init.h) or name the variable:
*_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/apc.c     | 2 +-
 arch/sparc/kernel/pmc.c     | 2 +-
 arch/sparc/kernel/time_32.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index f679c57644d5..1e34f29e58bb 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata apc_match[] = {
+static struct of_device_id apc_match[] = {
 	{
 		.name = APC_OBPNAME,
 	},
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index 93d7b4465f8d..6a585d393580 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata pmc_match[] = {
+static struct of_device_id pmc_match[] = {
 	{
 		.name = PMC_OBPNAME,
 	},
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 4e236391b635..96046a4024c2 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op)
 	return 0;
 }
 
-static struct of_device_id __initdata clock_match[] = {
+static struct of_device_id clock_match[] = {
 	{
 		.name = "eeprom",
 	},
-- 
cgit v1.2.3


From f486b3dc2d048e7309a733f97eb9f9f83d586df2 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Thu, 21 Apr 2011 16:35:46 -0700
Subject: sparc32: fix sparcstation 5 boot

The sparcstation 5 I have available has no MID property for the CPU.
This resulted in a panic when booting a SMP kernel on this box.

The assigned field in cpu_data is never used, so if we fail
to read the MID property then inform user and continue booting.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_32.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 91c10fb70858..850a1360c0d6 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 void __cpuinit smp_store_cpu_info(int id)
 {
 	int cpu_node;
+	int mid;
 
 	cpu_data(id).udelay_val = loops_per_jiffy;
 
@@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id)
 	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
 						     "clock-frequency", 0);
 	cpu_data(id).prom_node = cpu_node;
-	cpu_data(id).mid = cpu_get_hwmid(cpu_node);
+	mid = cpu_get_hwmid(cpu_node);
 
-	if (cpu_data(id).mid < 0)
-		panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
+	if (mid < 0) {
+		printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node);
+		mid = 0;
+	}
+	cpu_data(id).mid = mid;
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
-- 
cgit v1.2.3


From e2a85aecebc03d165bc2dcd233deadd5dd97ea9f Mon Sep 17 00:00:00 2001
From: Andrea Galbusera <gizero@gmail.com>
Date: Thu, 21 Apr 2011 02:21:21 +0000
Subject: powerpc: Fix multicast problem in fs_enet driver

mac-fec.c was setting individual UDP address registers instead of multicast
group address registers when joining a multicast group.
This prevented from correctly receiving UDP multicast packets.
According to datasheet, replaced hash_table_high and hash_table_low
with grp_hash_table_high and grp_hash_table_low respectively.
Also renamed hash_table_* with grp_hash_table_* in struct fec declaration
for 8xx: these registers are used only for multicast there.

Tested on a MPC5121 based board.
Build tested also against mpc866_ads_defconfig.

Signed-off-by: Andrea Galbusera <gizero@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/include/asm/8xx_immap.h | 4 ++--
 drivers/net/fs_enet/mac-fec.c        | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h
index 6b6dc20b0beb..bdf0563ba423 100644
--- a/arch/powerpc/include/asm/8xx_immap.h
+++ b/arch/powerpc/include/asm/8xx_immap.h
@@ -393,8 +393,8 @@ typedef struct fec {
 	uint	fec_addr_low;		/* lower 32 bits of station address	*/
 	ushort	fec_addr_high;		/* upper 16 bits of station address	*/
 	ushort	res1;			/* reserved				*/
-	uint	fec_hash_table_high;	/* upper 32-bits of hash table		*/
-	uint	fec_hash_table_low;	/* lower 32-bits of hash table		*/
+	uint	fec_grp_hash_table_high;	/* upper 32-bits of hash table		*/
+	uint	fec_grp_hash_table_low;	/* lower 32-bits of hash table		*/
 	uint	fec_r_des_start;	/* beginning of Rx descriptor ring	*/
 	uint	fec_x_des_start;	/* beginning of Tx descriptor ring	*/
 	uint	fec_r_buff_size;	/* Rx buffer size			*/
diff --git a/drivers/net/fs_enet/mac-fec.c b/drivers/net/fs_enet/mac-fec.c
index 61035fc5599b..b9fbc83d64a7 100644
--- a/drivers/net/fs_enet/mac-fec.c
+++ b/drivers/net/fs_enet/mac-fec.c
@@ -226,8 +226,8 @@ static void set_multicast_finish(struct net_device *dev)
 	}
 
 	FC(fecp, r_cntrl, FEC_RCNTRL_PROM);
-	FW(fecp, hash_table_high, fep->fec.hthi);
-	FW(fecp, hash_table_low, fep->fec.htlo);
+	FW(fecp, grp_hash_table_high, fep->fec.hthi);
+	FW(fecp, grp_hash_table_low, fep->fec.htlo);
 }
 
 static void set_multicast_list(struct net_device *dev)
@@ -273,8 +273,8 @@ static void restart(struct net_device *dev)
 	/*
 	 * Reset all multicast.
 	 */
-	FW(fecp, hash_table_high, fep->fec.hthi);
-	FW(fecp, hash_table_low, fep->fec.htlo);
+	FW(fecp, grp_hash_table_high, fep->fec.hthi);
+	FW(fecp, grp_hash_table_low, fep->fec.htlo);
 
 	/*
 	 * Set maximum receive buffer size.
-- 
cgit v1.2.3


From b2508e828d71baacd9a743dd48dcbf85d96affdd Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 21 Apr 2011 16:48:35 -0700
Subject: perf: Support Xeon E7's via the Westmere PMU driver

There's a new model number public, 47, for Xeon E7 (aka Westmere EX).

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: a.p.zijlstra@chello.nl
Link: http://lkml.kernel.org/r/1303429715-10202-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 8fc2b2cee1da..586cced12d1f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1425,6 +1425,7 @@ static __init int intel_pmu_init(void)
 
 	case 37: /* 32 nm nehalem, "Clarkdale" */
 	case 44: /* 32 nm nehalem, "Gulftown" */
+	case 47: /* 32 nm Xeon E7 */
 		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
-- 
cgit v1.2.3


From b52c55c6a25e4515b5e075a989ff346fc251ed09 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 22 Apr 2011 08:44:38 +0200
Subject: x86, perf event: Turn off unstructured raw event access to offcore
 registers

Andi Kleen pointed out that the Intel offcore support patches were merged
without user-space tool support to the functionality:

 |
 | The offcore_msr perf kernel code was merged into 2.6.39-rc*, but the
 | user space bits were not. This made it impossible to set the extra mask
 | and actually do the OFFCORE profiling
 |

Andi submitted a preliminary patch for user-space support, as an
extension to perf's raw event syntax:

 |
 | Some raw events -- like the Intel OFFCORE events -- support additional
 | parameters. These can be appended after a ':'.
 |
 | For example on a multi socket Intel Nehalem:
 |
 |    perf stat -e r1b7:20ff -a sleep 1
 |
 | Profile the OFFCORE_RESPONSE.ANY_REQUEST with event mask REMOTE_DRAM_0
 | that measures any access to DRAM on another socket.
 |

But this kind of usability is absolutely unacceptable - users should not
be expected to type in magic, CPU and model specific incantations to get
access to useful hardware functionality.

The proper solution is to expose useful offcore functionality via
generalized events - that way users do not have to care which specific
CPU model they are using, they can use the conceptual event and not some
model specific quirky hexa number.

We already have such generalization in place for CPU cache events,
and it's all very extensible.

"Offcore" events measure general DRAM access patters along various
parameters. They are particularly useful in NUMA systems.

We want to support them via generalized DRAM events: either as the
fourth level of cache (after the last-level cache), or as a separate
generalization category.

That way user-space support would be very obvious, memory access
profiling could be done via self-explanatory commands like:

  perf record -e dram ./myapp
  perf record -e dram-remote ./myapp

... to measure DRAM accesses or more expensive cross-node NUMA DRAM
accesses.

These generalized events would work on all CPUs and architectures that
have comparable PMU features.

( Note, these are just examples: actual implementation could have more
  sophistication and more parameter - as long as they center around
  similarly simple usecases. )

Now we do not want to revert *all* of the current offcore bits, as they
are still somewhat useful for generic last-level-cache events, implemented
in this commit:

  e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere

But we definitely do not yet want to expose the unstructured raw events
to user-space, until better generalization and usability is implemented
for these hardware event features.

( Note: after generalization has been implemented raw offcore events can be
  supported as well: there can always be an odd event that is marginally
  useful but not useful enough to generalize. DRAM profiling is definitely
  *not* such a category so generalization must be done first. )

Furthermore, PERF_TYPE_RAW access to these registers was not intended
to go upstream without proper support - it was a side-effect of the above
e994d7d23a0b commit, not mentioned in the changelog.

As v2.6.39 is nearing release we go for the simplest approach: disable
the PERF_TYPE_RAW offcore hack for now, before it escapes into a released
kernel and becomes an ABI.

Once proper structure is implemented for these hardware events and users
are offered usable solutions we can revisit this issue.

Reported-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1302658203-4239-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index eed3673a8656..632e5dc9c9c0 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -586,8 +586,12 @@ static int x86_setup_perfctr(struct perf_event *event)
 			return -EOPNOTSUPP;
 	}
 
+	/*
+	 * Do not allow config1 (extended registers) to propagate,
+	 * there's no sane user-space generalization yet:
+	 */
 	if (attr->type == PERF_TYPE_RAW)
-		return x86_pmu_extra_regs(event->attr.config, event);
+		return 0;
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, event);
-- 
cgit v1.2.3


From 103b3934817a7c42fba6e1ef76ecb390a2837d40 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 21 Apr 2011 11:03:20 -0400
Subject: perf, x86: P4 PMU -- Use perf_sample_data_init helper

Instead of opencoded assignments better to use
perf_sample_data_init helper.

Tested-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Link: http://lkml.kernel.org/r/1303398203-2918-2-git-send-email-dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_p4.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 8ff882fdb1c0..ae31e9698d92 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -912,8 +912,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
-- 
cgit v1.2.3


From 1ea5a6afd95a4803900c97ed63a47a883ebe7b3e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 21 Apr 2011 11:03:21 -0400
Subject: perf, x86: P4 PMU - Don't forget to clear cpuc->active_mask on
 overflow

It's not enough to simply disable event on overflow the
cpuc->active_mask should be cleared as well otherwise counter
may stall in "active" even in real being already disabled (which
potentially may lead to the situation that user may not use this
counter further).

Don pointed out that:

 " I also noticed this patch fixed some unknown NMIs
   on a P4 when I stressed the box".

Tested-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Link: http://lkml.kernel.org/r/1303398203-2918-3-git-send-email-dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_p4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index c2520e178d32..d1f77e2934a1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -947,7 +947,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_event_set_period(event))
 			continue;
 		if (perf_event_overflow(event, 1, &data, regs))
-			p4_pmu_disable_event(event);
+			x86_pmu_stop(event, 0);
 	}
 
 	if (handled) {
-- 
cgit v1.2.3


From f4929bd37208540c2c6f416e9035ff1938f2dbc6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Apr 2011 13:39:56 +0200
Subject: perf, x86: Update/fix Intel Nehalem cache events

Change the Nehalem cache events to use retired memory instruction counters
(similar to Westmere), this greatly improves the provided stats.

Using:

main ()
{
        int i;

        for (i = 0; i < 1000000000; i++) {
                asm("mov (%%rsp), %%rbx;"
                    "mov %%rbx, (%%rsp);" : : : "rbx");
        }
}

We find:

 $ perf stat --repeat 10 -e instructions:u -e l1-dcache-loads:u -e l1-dcache-stores:u ./loop_1b_loads+stores
  Performance counter stats for './loop_1b_loads+stores' (10 runs):
      4,000,081,056 instructions:u           #      0.000 IPC ( +-   0.000% )
      4,999,502,846 l1-dcache-loads:u          ( +-   0.008% )
      1,000,034,832 l1-dcache-stores:u         ( +-   0.000% )
         1.565184942  seconds time elapsed   ( +-   0.005% )

The 5b is surprising - we'd expect 1b:

 $ perf stat --repeat 10 -e instructions:u -e r10b:u -e l1-dcache-stores:u ./loop_1b_loads+stores
  Performance counter stats for './loop_1b_loads+stores' (10 runs):
      4,000,081,054 instructions:u           #      0.000 IPC ( +-   0.000% )
      1,000,021,961 r10b:u                     ( +-   0.000% )
      1,000,030,951 l1-dcache-stores:u         ( +-   0.000% )
         1.565055422  seconds time elapsed   ( +-   0.003% )

Which this patch thus fixes.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Link: http://lkml.kernel.org/n/tip-q9rtru7b7840tws75xzboapv@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 586cced12d1f..43fa20b13817 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -391,12 +391,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids
 {
  [ C(L1D) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
 	},
 	[ C(OP_PREFETCH) ] = {
 		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-- 
cgit v1.2.3


From 568aa7508c99952ef18e4bfed87545e66f9e042f Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 19 Apr 2011 10:21:20 +0200
Subject: ARM: at91: AT91CAP9 has a macb device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit

	ee621dd (net: atmel_macb Kconfig: remove long dependency line)

replaced a list of several explicit machines in the dependencies of MACB by
a single symbol that is selected by the respective machines. ee621dd missed
to let ARCH_AT91CAP9 select HAVE_NET_MACB though which is fixed here.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Andrew Victor <linux@maxim.org.za>
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
---
 arch/arm/mach-at91/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 19390231a0e9..2d299bf5d72f 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -83,6 +83,7 @@ config ARCH_AT91CAP9
 	select CPU_ARM926T
 	select GENERIC_CLOCKEVENTS
 	select HAVE_FB_ATMEL
+	select HAVE_NET_MACB
 
 config ARCH_AT572D940HF
 	bool "AT572D940HF"
-- 
cgit v1.2.3


From 0312e826a48168761dbbe10a1e8cbc22ebc99767 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Wed, 30 Mar 2011 22:31:15 +1000
Subject: arm: at91: minimal defconfig for at91x40 SoC

A minimal defconfig for build testing the AT91x40 SoC.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
---
 arch/arm/configs/at91x40_defconfig | 48 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 arch/arm/configs/at91x40_defconfig

(limited to 'arch')

diff --git a/arch/arm/configs/at91x40_defconfig b/arch/arm/configs/at91x40_defconfig
new file mode 100644
index 000000000000..c55e9212fcbb
--- /dev/null
+++ b/arch/arm/configs/at91x40_defconfig
@@ -0,0 +1,48 @@
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_EMBEDDED=y
+# CONFIG_HOTPLUG is not set
+# CONFIG_ELF_CORE is not set
+# CONFIG_FUTEX is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_MMU is not set
+CONFIG_ARCH_AT91=y
+CONFIG_ARCH_AT91X40=y
+CONFIG_MACH_AT91EB01=y
+CONFIG_AT91_EARLY_USART0=y
+CONFIG_CPU_ARM7TDMI=y
+CONFIG_SET_MEM_PARAM=y
+CONFIG_DRAM_BASE=0x01000000
+CONFIG_DRAM_SIZE=0x00400000
+CONFIG_FLASH_MEM_BASE=0x01400000
+CONFIG_PROCESSOR_ID=0x14000040
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_BINFMT_FLAT=y
+# CONFIG_SUSPEND is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_MTD=y
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_RAM=y
+CONFIG_MTD_ROM=y
+CONFIG_BLK_DEV_RAM=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+# CONFIG_DNOTIFY is not set
+CONFIG_ROMFS_FS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
-- 
cgit v1.2.3


From 91a2f4d3cd2a2f1a2c6830896bd2403ca0130137 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@uclinux.org>
Date: Wed, 30 Mar 2011 14:43:32 +1000
Subject: arm: at91: fix compiler warning for eb01 board build

Fix compiler warning when building for AT91EB01 board:

arch/arm/mach-at91/board-eb01.c:41: warning: initialisation from incompatible pointer type

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
---
 arch/arm/mach-at91/board-eb01.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-at91/board-eb01.c b/arch/arm/mach-at91/board-eb01.c
index 1f9d3cb64c50..d8df59a3426d 100644
--- a/arch/arm/mach-at91/board-eb01.c
+++ b/arch/arm/mach-at91/board-eb01.c
@@ -30,6 +30,11 @@
 #include <mach/board.h>
 #include "generic.h"
 
+static void __init at91eb01_init_irq(void)
+{
+	at91x40_init_interrupts(NULL);
+}
+
 static void __init at91eb01_map_io(void)
 {
 	at91x40_initialize(40000000);
@@ -38,7 +43,7 @@ static void __init at91eb01_map_io(void)
 MACHINE_START(AT91EB01, "Atmel AT91 EB01")
 	/* Maintainer: Greg Ungerer <gerg@snapgear.com> */
 	.timer		= &at91x40_timer,
-	.init_irq	= at91x40_init_interrupts,
+	.init_irq	= at91eb01_init_irq,
 	.map_io		= at91eb01_map_io,
 MACHINE_END
 
-- 
cgit v1.2.3


From 9baeb7e47aed8e399d15d2ea8c032efe3680f20b Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Sat, 23 Apr 2011 10:52:16 +0800
Subject: at91: Add ARCH_ID and basic cpu macros definition for 5series chips
 family.

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/include/mach/cpu.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-at91/include/mach/cpu.h b/arch/arm/mach-at91/include/mach/cpu.h
index 3bef931d0b1c..0700f2125305 100644
--- a/arch/arm/mach-at91/include/mach/cpu.h
+++ b/arch/arm/mach-at91/include/mach/cpu.h
@@ -27,6 +27,7 @@
 #define ARCH_ID_AT91SAM9G45	0x819b05a0
 #define ARCH_ID_AT91SAM9G45MRL	0x819b05a2	/* aka 9G45-ES2 & non ES lots */
 #define ARCH_ID_AT91SAM9G45ES	0x819b05a1	/* 9G45-ES (Engineering Sample) */
+#define ARCH_ID_AT91SAM9X5	0x819a05a0
 #define ARCH_ID_AT91CAP9	0x039A03A0
 
 #define ARCH_ID_AT91SAM9XE128	0x329973a0
@@ -55,6 +56,12 @@ static inline unsigned long at91_cpu_fully_identify(void)
 #define ARCH_EXID_AT91SAM9G46	0x00000003
 #define ARCH_EXID_AT91SAM9G45	0x00000004
 
+#define ARCH_EXID_AT91SAM9G15	0x00000000
+#define ARCH_EXID_AT91SAM9G35	0x00000001
+#define ARCH_EXID_AT91SAM9X35	0x00000002
+#define ARCH_EXID_AT91SAM9G25	0x00000003
+#define ARCH_EXID_AT91SAM9X25	0x00000004
+
 static inline unsigned long at91_exid_identify(void)
 {
 	return at91_sys_read(AT91_DBGU_EXID);
@@ -143,6 +150,27 @@ static inline unsigned long at91cap9_rev_identify(void)
 #define cpu_is_at91sam9m11()	(0)
 #endif
 
+#ifdef CONFIG_ARCH_AT91SAM9X5
+#define cpu_is_at91sam9x5()	(at91_cpu_identify() == ARCH_ID_AT91SAM9X5)
+#define cpu_is_at91sam9g15()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G15))
+#define cpu_is_at91sam9g35()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G35))
+#define cpu_is_at91sam9x35()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9X35))
+#define cpu_is_at91sam9g25()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9G25))
+#define cpu_is_at91sam9x25()	(cpu_is_at91sam9x5() && \
+				(at91_exid_identify() == ARCH_EXID_AT91SAM9X25))
+#else
+#define cpu_is_at91sam9x5()	(0)
+#define cpu_is_at91sam9g15()	(0)
+#define cpu_is_at91sam9g35()	(0)
+#define cpu_is_at91sam9x35()	(0)
+#define cpu_is_at91sam9g25()	(0)
+#define cpu_is_at91sam9x25()	(0)
+#endif
+
 #ifdef CONFIG_ARCH_AT91CAP9
 #define cpu_is_at91cap9()	(at91_cpu_identify() == ARCH_ID_AT91CAP9)
 #define cpu_is_at91cap9_revB()	(at91cap9_rev_identify() == ARCH_REVISION_CAP9_B)
-- 
cgit v1.2.3


From fa7b69475a6c192853949ba496dd9c37b497b548 Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Fri, 22 Apr 2011 10:08:52 -0700
Subject: perf events, x86, P4: Fix typo in comment

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: trivial@kernel.org
Link: http://lkml.kernel.org/r/1303492132-3004-1-git-send-email-justinmattock@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_p4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae31e9698d92..f4c1da2f9352 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1187,7 +1187,7 @@ static __init int p4_pmu_init(void)
 {
 	unsigned int low, high;
 
-	/* If we get stripped -- indexig fails */
+	/* If we get stripped -- indexing fails */
 	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
 
 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-- 
cgit v1.2.3


From 15d6aba24d88231415f4e7e091c0f1e60c3e6fd5 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 24 Apr 2011 11:11:51 +0300
Subject: x86: Demacro CONFIG_PARAVIRT cpu accessors

Recently, we had a build failure on !CONFIG_PARAVIRT due to a
callback ->wbinvd() clashing with a macro wbinvd().

While we worked around the issue, avoid it in the future by
changing the macro (and a few surrounding ones) to an inline
function.

Signed-off-by: Avi Kivity <avi@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1303632711-21662-1-git-send-email-avi@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/system.h | 85 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 71 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 12569e691ce3..c2ff2a1d845e 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -303,24 +303,81 @@ static inline void native_wbinvd(void)
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#define read_cr0()	(native_read_cr0())
-#define write_cr0(x)	(native_write_cr0(x))
-#define read_cr2()	(native_read_cr2())
-#define write_cr2(x)	(native_write_cr2(x))
-#define read_cr3()	(native_read_cr3())
-#define write_cr3(x)	(native_write_cr3(x))
-#define read_cr4()	(native_read_cr4())
-#define read_cr4_safe()	(native_read_cr4_safe())
-#define write_cr4(x)	(native_write_cr4(x))
-#define wbinvd()	(native_wbinvd())
+
+static inline unsigned long read_cr0(void)
+{
+	return native_read_cr0();
+}
+
+static inline void write_cr0(unsigned long x)
+{
+	native_write_cr0(x);
+}
+
+static inline unsigned long read_cr2(void)
+{
+	return native_read_cr2();
+}
+
+static inline void write_cr2(unsigned long x)
+{
+	native_write_cr2(x);
+}
+
+static inline unsigned long read_cr3(void)
+{
+	return native_read_cr3();
+}
+
+static inline void write_cr3(unsigned long x)
+{
+	native_write_cr3(x);
+}
+
+static inline unsigned long read_cr4(void)
+{
+	return native_read_cr4();
+}
+
+static inline unsigned long read_cr4_safe(void)
+{
+	return native_read_cr4_safe();
+}
+
+static inline void write_cr4(unsigned long x)
+{
+	native_write_cr4(x);
+}
+
+static inline void wbinvd(void)
+{
+	native_wbinvd();
+}
+
 #ifdef CONFIG_X86_64
-#define read_cr8()	(native_read_cr8())
-#define write_cr8(x)	(native_write_cr8(x))
-#define load_gs_index   native_load_gs_index
+
+static inline unsigned long read_cr8(void)
+{
+	return native_read_cr8();
+}
+
+static inline void write_cr8(unsigned long x)
+{
+	native_write_cr8(x);
+}
+
+static inline void load_gs_index(unsigned selector)
+{
+	native_load_gs_index(selector);
+}
+
 #endif
 
 /* Clear the 'TS' bit */
-#define clts()		(native_clts())
+static inline void clts(void)
+{
+	native_clts();
+}
 
 #endif/* CONFIG_PARAVIRT */
 
-- 
cgit v1.2.3


From 328f5cc30290a92ea3ca62b2a63d2b9ebcb0d334 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:02:33 +0200
Subject: ARM: Use struct syscore_ops instead of sysdevs for PM in common code

Convert some ARM architecture's common code to using
struct syscore_ops objects for power management instead of sysdev
classes and sysdevs.

This simplifies the code and reduces the kernel's memory footprint.
It also is necessary for removing sysdevs from the kernel entirely in
the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/common/vic.c            | 69 +++++++++++++++-------------------------
 arch/arm/include/asm/mach/time.h |  1 -
 arch/arm/kernel/leds.c           | 28 +++++++++-------
 arch/arm/kernel/time.c           | 35 +++++++-------------
 arch/arm/vfp/vfpmodule.c         | 19 +++--------
 5 files changed, 58 insertions(+), 94 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index 113085a77123..7aa4262ada7a 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -22,17 +22,16 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
 #include <asm/mach/irq.h>
 #include <asm/hardware/vic.h>
 
-#if defined(CONFIG_PM)
+#ifdef CONFIG_PM
 /**
  * struct vic_device - VIC PM device
- * @sysdev: The system device which is registered.
  * @irq: The IRQ number for the base of the VIC.
  * @base: The register base for the VIC.
  * @resume_sources: A bitmask of interrupts for resume.
@@ -43,8 +42,6 @@
  * @protect: Save for VIC_PROTECT.
  */
 struct vic_device {
-	struct sys_device sysdev;
-
 	void __iomem	*base;
 	int		irq;
 	u32		resume_sources;
@@ -59,11 +56,6 @@ struct vic_device {
 static struct vic_device vic_devices[CONFIG_ARM_VIC_NR];
 
 static int vic_id;
-
-static inline struct vic_device *to_vic(struct sys_device *sys)
-{
-	return container_of(sys, struct vic_device, sysdev);
-}
 #endif /* CONFIG_PM */
 
 /**
@@ -85,10 +77,9 @@ static void vic_init2(void __iomem *base)
 	writel(32, base + VIC_PL190_DEF_VECT_ADDR);
 }
 
-#if defined(CONFIG_PM)
-static int vic_class_resume(struct sys_device *dev)
+#ifdef CONFIG_PM
+static void resume_one_vic(struct vic_device *vic)
 {
-	struct vic_device *vic = to_vic(dev);
 	void __iomem *base = vic->base;
 
 	printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base);
@@ -107,13 +98,18 @@ static int vic_class_resume(struct sys_device *dev)
 
 	writel(vic->soft_int, base + VIC_INT_SOFT);
 	writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR);
+}
 
-	return 0;
+static void vic_resume(void)
+{
+	int id;
+
+	for (id = vic_id - 1; id >= 0; id--)
+		resume_one_vic(vic_devices + id);
 }
 
-static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_one_vic(struct vic_device *vic)
 {
-	struct vic_device *vic = to_vic(dev);
 	void __iomem *base = vic->base;
 
 	printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base);
@@ -128,14 +124,21 @@ static int vic_class_suspend(struct sys_device *dev, pm_message_t state)
 
 	writel(vic->resume_irqs, base + VIC_INT_ENABLE);
 	writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR);
+}
+
+static int vic_suspend(void)
+{
+	int id;
+
+	for (id = 0; id < vic_id; id++)
+		suspend_one_vic(vic_devices + id);
 
 	return 0;
 }
 
-struct sysdev_class vic_class = {
-	.name		= "vic",
-	.suspend	= vic_class_suspend,
-	.resume		= vic_class_resume,
+struct syscore_ops vic_syscore_ops = {
+	.suspend	= vic_suspend,
+	.resume		= vic_resume,
 };
 
 /**
@@ -147,30 +150,8 @@ struct sysdev_class vic_class = {
 */
 static int __init vic_pm_init(void)
 {
-	struct vic_device *dev = vic_devices;
-	int err;
-	int id;
-
-	if (vic_id == 0)
-		return 0;
-
-	err = sysdev_class_register(&vic_class);
-	if (err) {
-		printk(KERN_ERR "%s: cannot register class\n", __func__);
-		return err;
-	}
-
-	for (id = 0; id < vic_id; id++, dev++) {
-		dev->sysdev.id = id;
-		dev->sysdev.cls = &vic_class;
-
-		err = sysdev_register(&dev->sysdev);
-		if (err) {
-			printk(KERN_ERR "%s: failed to register device\n",
-			       __func__);
-			return err;
-		}
-	}
+	if (vic_id > 0)
+		register_syscore_ops(&vic_syscore_ops);
 
 	return 0;
 }
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 883f6be5117a..d5adaae5ee2c 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -34,7 +34,6 @@
  *   timer interrupt which may be pending.
  */
 struct sys_timer {
-	struct sys_device	dev;
 	void			(*init)(void);
 	void			(*suspend)(void);
 	void			(*resume)(void);
diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c
index 31a316c1777b..0f107dcb0347 100644
--- a/arch/arm/kernel/leds.c
+++ b/arch/arm/kernel/leds.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/leds.h>
 
@@ -69,36 +70,37 @@ static ssize_t leds_store(struct sys_device *dev,
 
 static SYSDEV_ATTR(event, 0200, NULL, leds_store);
 
-static int leds_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_class leds_sysclass = {
+	.name		= "leds",
+};
+
+static struct sys_device leds_device = {
+	.id		= 0,
+	.cls		= &leds_sysclass,
+};
+
+static int leds_suspend(void)
 {
 	leds_event(led_stop);
 	return 0;
 }
 
-static int leds_resume(struct sys_device *dev)
+static void leds_resume(void)
 {
 	leds_event(led_start);
-	return 0;
 }
 
-static int leds_shutdown(struct sys_device *dev)
+static void leds_shutdown(void)
 {
 	leds_event(led_halted);
-	return 0;
 }
 
-static struct sysdev_class leds_sysclass = {
-	.name		= "leds",
+static struct syscore_ops leds_syscore_ops = {
 	.shutdown	= leds_shutdown,
 	.suspend	= leds_suspend,
 	.resume		= leds_resume,
 };
 
-static struct sys_device leds_device = {
-	.id		= 0,
-	.cls		= &leds_sysclass,
-};
-
 static int __init leds_init(void)
 {
 	int ret;
@@ -107,6 +109,8 @@ static int __init leds_init(void)
 		ret = sysdev_register(&leds_device);
 	if (ret == 0)
 		ret = sysdev_create_file(&leds_device, &attr_event);
+	if (ret == 0)
+		register_syscore_ops(&leds_syscore_ops);
 	return ret;
 }
 
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 1ff46cabc7ef..cb634c3e28e9 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -21,7 +21,7 @@
 #include <linux/timex.h>
 #include <linux/errno.h>
 #include <linux/profile.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/timer.h>
 #include <linux/irq.h>
 
@@ -115,48 +115,37 @@ void timer_tick(void)
 #endif
 
 #if defined(CONFIG_PM) && !defined(CONFIG_GENERIC_CLOCKEVENTS)
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
+static int timer_suspend(void)
 {
-	struct sys_timer *timer = container_of(dev, struct sys_timer, dev);
-
-	if (timer->suspend != NULL)
-		timer->suspend();
+	if (system_timer->suspend)
+		system_timer->suspend();
 
 	return 0;
 }
 
-static int timer_resume(struct sys_device *dev)
+static void timer_resume(void)
 {
-	struct sys_timer *timer = container_of(dev, struct sys_timer, dev);
-
-	if (timer->resume != NULL)
-		timer->resume();
-
-	return 0;
+	if (system_timer->resume)
+		system_timer->resume();
 }
 #else
 #define timer_suspend NULL
 #define timer_resume NULL
 #endif
 
-static struct sysdev_class timer_sysclass = {
-	.name		= "timer",
+static struct syscore_ops timer_syscore_ops = {
 	.suspend	= timer_suspend,
 	.resume		= timer_resume,
 };
 
-static int __init timer_init_sysfs(void)
+static int __init timer_init_syscore_ops(void)
 {
-	int ret = sysdev_class_register(&timer_sysclass);
-	if (ret == 0) {
-		system_timer->dev.cls = &timer_sysclass;
-		ret = sysdev_register(&system_timer->dev);
-	}
+	register_syscore_ops(&timer_syscore_ops);
 
-	return ret;
+	return 0;
 }
 
-device_initcall(timer_init_sysfs);
+device_initcall(timer_init_syscore_ops);
 
 void __init time_init(void)
 {
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index f74695075e64..f25e7ec89416 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -398,9 +398,9 @@ static void vfp_enable(void *unused)
 }
 
 #ifdef CONFIG_PM
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
-static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
+static int vfp_pm_suspend(void)
 {
 	struct thread_info *ti = current_thread_info();
 	u32 fpexc = fmrx(FPEXC);
@@ -420,34 +420,25 @@ static int vfp_pm_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int vfp_pm_resume(struct sys_device *dev)
+static void vfp_pm_resume(void)
 {
 	/* ensure we have access to the vfp */
 	vfp_enable(NULL);
 
 	/* and disable it to ensure the next usage restores the state */
 	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
-
-	return 0;
 }
 
-static struct sysdev_class vfp_pm_sysclass = {
-	.name		= "vfp",
+static struct syscore_ops vfp_pm_syscore_ops = {
 	.suspend	= vfp_pm_suspend,
 	.resume		= vfp_pm_resume,
 };
 
-static struct sys_device vfp_pm_sysdev = {
-	.cls	= &vfp_pm_sysclass,
-};
-
 static void vfp_pm_init(void)
 {
-	sysdev_class_register(&vfp_pm_sysclass);
-	sysdev_register(&vfp_pm_sysdev);
+	register_syscore_ops(&vfp_pm_syscore_ops);
 }
 
-
 #else
 static inline void vfp_pm_init(void) { }
 #endif /* CONFIG_PM */
-- 
cgit v1.2.3


From 3c437ffd20329619672b12a97bee944bccdd4ec9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:02:46 +0200
Subject: ARM / OMAP: Use struct syscore_ops for "core" power management

Replace the sysdev class and struct sys_device used for power
management in the OMAP's GPIO code with a struct syscore_ops object
which is simpler.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Kevin Hilman <khilman@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/plat-omap/gpio.c | 35 +++++++++--------------------------
 1 file changed, 9 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c
index d2adcdda23cf..bd9e32187eab 100644
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -1372,9 +1372,7 @@ static const struct dev_pm_ops omap_mpuio_dev_pm_ops = {
 	.resume_noirq = omap_mpuio_resume_noirq,
 };
 
-/* use platform_driver for this, now that there's no longer any
- * point to sys_device (other than not disturbing old code).
- */
+/* use platform_driver for this. */
 static struct platform_driver omap_mpuio_driver = {
 	.driver		= {
 		.name	= "mpuio",
@@ -1745,7 +1743,7 @@ static int __devinit omap_gpio_probe(struct platform_device *pdev)
 }
 
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
-static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
+static int omap_gpio_suspend(void)
 {
 	int i;
 
@@ -1795,12 +1793,12 @@ static int omap_gpio_suspend(struct sys_device *dev, pm_message_t mesg)
 	return 0;
 }
 
-static int omap_gpio_resume(struct sys_device *dev)
+static void omap_gpio_resume(void)
 {
 	int i;
 
 	if (!cpu_class_is_omap2() && !cpu_is_omap16xx())
-		return 0;
+		return;
 
 	for (i = 0; i < gpio_bank_count; i++) {
 		struct gpio_bank *bank = &gpio_bank[i];
@@ -1836,21 +1834,13 @@ static int omap_gpio_resume(struct sys_device *dev)
 		__raw_writel(bank->saved_wakeup, wake_set);
 		spin_unlock_irqrestore(&bank->lock, flags);
 	}
-
-	return 0;
 }
 
-static struct sysdev_class omap_gpio_sysclass = {
-	.name		= "gpio",
+static struct syscore_ops omap_gpio_syscore_ops = {
 	.suspend	= omap_gpio_suspend,
 	.resume		= omap_gpio_resume,
 };
 
-static struct sys_device omap_gpio_device = {
-	.id		= 0,
-	.cls		= &omap_gpio_sysclass,
-};
-
 #endif
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
@@ -2108,21 +2098,14 @@ postcore_initcall(omap_gpio_drv_reg);
 
 static int __init omap_gpio_sysinit(void)
 {
-	int ret = 0;
-
 	mpuio_init();
 
 #if defined(CONFIG_ARCH_OMAP16XX) || defined(CONFIG_ARCH_OMAP2PLUS)
-	if (cpu_is_omap16xx() || cpu_class_is_omap2()) {
-		if (ret == 0) {
-			ret = sysdev_class_register(&omap_gpio_sysclass);
-			if (ret == 0)
-				ret = sysdev_register(&omap_gpio_device);
-		}
-	}
+	if (cpu_is_omap16xx() || cpu_class_is_omap2())
+		register_syscore_ops(&omap_gpio_syscore_ops);
 #endif
 
-	return ret;
+	return 0;
 }
 
 arch_initcall(omap_gpio_sysinit);
-- 
cgit v1.2.3


From b7808056141bc4d67213036921a5a685ebec0274 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:02:55 +0200
Subject: ARM / Integrator: Use struct syscore_ops for core PM

Replace the sysdev class and struct sys_device used for power
management by the Integrator interrupt-handling code with a
struct syscore_ops object which is simpler.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/mach-integrator/integrator_ap.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 980803ff348c..d3e96451529c 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -24,7 +24,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/clocksource.h>
@@ -180,13 +180,13 @@ static void __init ap_init_irq(void)
 #ifdef CONFIG_PM
 static unsigned long ic_irq_enable;
 
-static int irq_suspend(struct sys_device *dev, pm_message_t state)
+static int irq_suspend(void)
 {
 	ic_irq_enable = readl(VA_IC_BASE + IRQ_ENABLE);
 	return 0;
 }
 
-static int irq_resume(struct sys_device *dev)
+static void irq_resume(void)
 {
 	/* disable all irq sources */
 	writel(-1, VA_CMIC_BASE + IRQ_ENABLE_CLEAR);
@@ -194,33 +194,25 @@ static int irq_resume(struct sys_device *dev)
 	writel(-1, VA_IC_BASE + FIQ_ENABLE_CLEAR);
 
 	writel(ic_irq_enable, VA_IC_BASE + IRQ_ENABLE_SET);
-	return 0;
 }
 #else
 #define irq_suspend NULL
 #define irq_resume NULL
 #endif
 
-static struct sysdev_class irq_class = {
-	.name		= "irq",
+static struct syscore_ops irq_syscore_ops = {
 	.suspend	= irq_suspend,
 	.resume		= irq_resume,
 };
 
-static struct sys_device irq_device = {
-	.id	= 0,
-	.cls	= &irq_class,
-};
-
-static int __init irq_init_sysfs(void)
+static int __init irq_syscore_init(void)
 {
-	int ret = sysdev_class_register(&irq_class);
-	if (ret == 0)
-		ret = sysdev_register(&irq_device);
-	return ret;
+	register_syscore_ops(&irq_syscore_ops);
+
+	return 0;
 }
 
-device_initcall(irq_init_sysfs);
+device_initcall(irq_syscore_init);
 
 /*
  * Flash handling.
-- 
cgit v1.2.3


From 905339807bde7bb726001b69fbdf69ab0cf69a9e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:03:03 +0200
Subject: ARM / SA1100: Use struct syscore_ops for "core" power management

Replace the sysdev class and struct sys_device used for power
management by the SA1100 interrupt-handling code with a
struct syscore_ops object which is simpler.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/mach-sa1100/irq.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 423ddb3d65e9..dfbf824a69fa 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/ioport.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <asm/mach/irq.h>
@@ -234,7 +234,7 @@ static struct sa1100irq_state {
 	unsigned int	iccr;
 } sa1100irq_state;
 
-static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state)
+static int sa1100irq_suspend(void)
 {
 	struct sa1100irq_state *st = &sa1100irq_state;
 
@@ -264,7 +264,7 @@ static int sa1100irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int sa1100irq_resume(struct sys_device *dev)
+static void sa1100irq_resume(void)
 {
 	struct sa1100irq_state *st = &sa1100irq_state;
 
@@ -277,24 +277,17 @@ static int sa1100irq_resume(struct sys_device *dev)
 
 		ICMR = st->icmr;
 	}
-	return 0;
 }
 
-static struct sysdev_class sa1100irq_sysclass = {
-	.name		= "sa11x0-irq",
+static struct syscore_ops sa1100irq_syscore_ops = {
 	.suspend	= sa1100irq_suspend,
 	.resume		= sa1100irq_resume,
 };
 
-static struct sys_device sa1100irq_device = {
-	.id		= 0,
-	.cls		= &sa1100irq_sysclass,
-};
-
 static int __init sa1100irq_init_devicefs(void)
 {
-	sysdev_class_register(&sa1100irq_sysclass);
-	return sysdev_register(&sa1100irq_device);
+	register_syscore_ops(&sa1100irq_syscore_ops);
+	return 0;
 }
 
 device_initcall(sa1100irq_init_devicefs);
-- 
cgit v1.2.3


From 2eaa03b5bebd1e80014f780d7bf27c3e66daefd6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:03:11 +0200
Subject: ARM / PXA: Use struct syscore_ops for "core" power management

Replace sysdev classes and struct sys_device objects used for "core"
power management by the PXA platform code with struct syscore_ops
objects that are simpler.

This reduces the code size and the kernel memory footprint.  It also
is necessary for removing sysdevs entirely from the kernel in the
future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/mach-pxa/balloon3.c              |  1 -
 arch/arm/mach-pxa/clock-pxa2xx.c          | 18 +++----------
 arch/arm/mach-pxa/clock-pxa3xx.c          | 17 +++---------
 arch/arm/mach-pxa/clock.h                 |  7 ++---
 arch/arm/mach-pxa/cm-x270.c               |  1 -
 arch/arm/mach-pxa/cm-x2xx.c               | 23 +++++------------
 arch/arm/mach-pxa/colibri-evalboard.c     |  1 -
 arch/arm/mach-pxa/colibri-pxa270-income.c |  1 -
 arch/arm/mach-pxa/colibri-pxa270.c        |  1 -
 arch/arm/mach-pxa/generic.h               |  8 +++---
 arch/arm/mach-pxa/irq.c                   | 17 +++---------
 arch/arm/mach-pxa/lpd270.c                | 20 +++++---------
 arch/arm/mach-pxa/lubbock.c               | 21 +++++----------
 arch/arm/mach-pxa/mainstone.c             | 22 +++++-----------
 arch/arm/mach-pxa/mfp-pxa2xx.c            | 12 ++++-----
 arch/arm/mach-pxa/mfp-pxa3xx.c            | 21 ++++-----------
 arch/arm/mach-pxa/mioa701.c               | 43 ++++++-------------------------
 arch/arm/mach-pxa/palmld.c                |  1 -
 arch/arm/mach-pxa/palmtreo.c              |  1 -
 arch/arm/mach-pxa/palmz72.c               | 24 ++++++-----------
 arch/arm/mach-pxa/pxa25x.c                | 25 +++++-------------
 arch/arm/mach-pxa/pxa27x.c                | 25 +++++-------------
 arch/arm/mach-pxa/pxa3xx.c                | 25 +++++-------------
 arch/arm/mach-pxa/pxa95x.c                | 20 +++-----------
 arch/arm/mach-pxa/raumfeld.c              |  1 -
 arch/arm/mach-pxa/smemc.c                 | 29 +++++----------------
 arch/arm/mach-pxa/trizeps4.c              |  1 -
 arch/arm/mach-pxa/viper.c                 | 12 ++++-----
 arch/arm/mach-pxa/vpac270.c               |  1 -
 arch/arm/plat-pxa/gpio.c                  | 17 +++---------
 arch/arm/plat-pxa/mfp.c                   |  1 -
 31 files changed, 110 insertions(+), 307 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index bfbecec6d05f..810a982a66f8 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -15,7 +15,6 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
diff --git a/arch/arm/mach-pxa/clock-pxa2xx.c b/arch/arm/mach-pxa/clock-pxa2xx.c
index 1ce090448493..1d5859d9a0e3 100644
--- a/arch/arm/mach-pxa/clock-pxa2xx.c
+++ b/arch/arm/mach-pxa/clock-pxa2xx.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/pxa2xx-regs.h>
 
@@ -33,32 +33,22 @@ const struct clkops clk_pxa2xx_cken_ops = {
 #ifdef CONFIG_PM
 static uint32_t saved_cken;
 
-static int pxa2xx_clock_suspend(struct sys_device *d, pm_message_t state)
+static int pxa2xx_clock_suspend(void)
 {
 	saved_cken = CKEN;
 	return 0;
 }
 
-static int pxa2xx_clock_resume(struct sys_device *d)
+static void pxa2xx_clock_resume(void)
 {
 	CKEN = saved_cken;
-	return 0;
 }
 #else
 #define pxa2xx_clock_suspend	NULL
 #define pxa2xx_clock_resume	NULL
 #endif
 
-struct sysdev_class pxa2xx_clock_sysclass = {
-	.name		= "pxa2xx-clock",
+struct syscore_ops pxa2xx_clock_syscore_ops = {
 	.suspend	= pxa2xx_clock_suspend,
 	.resume		= pxa2xx_clock_resume,
 };
-
-static int __init pxa2xx_clock_init(void)
-{
-	if (cpu_is_pxa2xx())
-		return sysdev_class_register(&pxa2xx_clock_sysclass);
-	return 0;
-}
-postcore_initcall(pxa2xx_clock_init);
diff --git a/arch/arm/mach-pxa/clock-pxa3xx.c b/arch/arm/mach-pxa/clock-pxa3xx.c
index 3f864cd0bd28..2a37a9a8f621 100644
--- a/arch/arm/mach-pxa/clock-pxa3xx.c
+++ b/arch/arm/mach-pxa/clock-pxa3xx.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/smemc.h>
 #include <mach/pxa3xx-regs.h>
@@ -182,7 +183,7 @@ const struct clkops clk_pxa3xx_pout_ops = {
 static uint32_t cken[2];
 static uint32_t accr;
 
-static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state)
+static int pxa3xx_clock_suspend(void)
 {
 	cken[0] = CKENA;
 	cken[1] = CKENB;
@@ -190,28 +191,18 @@ static int pxa3xx_clock_suspend(struct sys_device *d, pm_message_t state)
 	return 0;
 }
 
-static int pxa3xx_clock_resume(struct sys_device *d)
+static void pxa3xx_clock_resume(void)
 {
 	ACCR = accr;
 	CKENA = cken[0];
 	CKENB = cken[1];
-	return 0;
 }
 #else
 #define pxa3xx_clock_suspend	NULL
 #define pxa3xx_clock_resume	NULL
 #endif
 
-struct sysdev_class pxa3xx_clock_sysclass = {
-	.name		= "pxa3xx-clock",
+struct syscore_ops pxa3xx_clock_syscore_ops = {
 	.suspend	= pxa3xx_clock_suspend,
 	.resume		= pxa3xx_clock_resume,
 };
-
-static int __init pxa3xx_clock_init(void)
-{
-	if (cpu_is_pxa3xx() || cpu_is_pxa95x())
-		return sysdev_class_register(&pxa3xx_clock_sysclass);
-	return 0;
-}
-postcore_initcall(pxa3xx_clock_init);
diff --git a/arch/arm/mach-pxa/clock.h b/arch/arm/mach-pxa/clock.h
index f9f349a21b54..1f2fb9c43f06 100644
--- a/arch/arm/mach-pxa/clock.h
+++ b/arch/arm/mach-pxa/clock.h
@@ -1,5 +1,5 @@
 #include <linux/clkdev.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 struct clkops {
 	void			(*enable)(struct clk *);
@@ -54,7 +54,7 @@ extern const struct clkops clk_pxa2xx_cken_ops;
 void clk_pxa2xx_cken_enable(struct clk *clk);
 void clk_pxa2xx_cken_disable(struct clk *clk);
 
-extern struct sysdev_class pxa2xx_clock_sysclass;
+extern struct syscore_ops pxa2xx_clock_syscore_ops;
 
 #if defined(CONFIG_PXA3xx) || defined(CONFIG_PXA95x)
 #define DEFINE_PXA3_CKEN(_name, _cken, _rate, _delay)	\
@@ -74,5 +74,6 @@ extern const struct clkops clk_pxa3xx_smemc_ops;
 extern void clk_pxa3xx_cken_enable(struct clk *);
 extern void clk_pxa3xx_cken_disable(struct clk *);
 
-extern struct sysdev_class pxa3xx_clock_sysclass;
+extern struct syscore_ops pxa3xx_clock_syscore_ops;
+
 #endif
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index b88d601a8090..13518a705399 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index 8225e2e58c6e..a10996782476 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
 
@@ -388,7 +388,7 @@ static inline void cmx2xx_init_display(void) {}
 #ifdef CONFIG_PM
 static unsigned long sleep_save_msc[10];
 
-static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state)
+static int cmx2xx_suspend(void)
 {
 	cmx2xx_pci_suspend();
 
@@ -412,7 +412,7 @@ static int cmx2xx_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int cmx2xx_resume(struct sys_device *dev)
+static void cmx2xx_resume(void)
 {
 	cmx2xx_pci_resume();
 
@@ -420,27 +420,18 @@ static int cmx2xx_resume(struct sys_device *dev)
 	__raw_writel(sleep_save_msc[0], MSC0);
 	__raw_writel(sleep_save_msc[1], MSC1);
 	__raw_writel(sleep_save_msc[2], MSC2);
-
-	return 0;
 }
 
-static struct sysdev_class cmx2xx_pm_sysclass = {
-	.name = "pm",
+static struct syscore_ops cmx2xx_pm_syscore_ops = {
 	.resume = cmx2xx_resume,
 	.suspend = cmx2xx_suspend,
 };
 
-static struct sys_device cmx2xx_pm_device = {
-	.cls = &cmx2xx_pm_sysclass,
-};
-
 static int __init cmx2xx_pm_init(void)
 {
-	int error;
-	error = sysdev_class_register(&cmx2xx_pm_sysclass);
-	if (error == 0)
-		error = sysdev_register(&cmx2xx_pm_device);
-	return error;
+	register_syscore_ops(&cmx2xx_pm_syscore_ops);
+
+	return 0;
 }
 #else
 static int __init cmx2xx_pm_init(void) { return 0; }
diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c
index 81c3c433e2d6..d28e802e2448 100644
--- a/arch/arm/mach-pxa/colibri-evalboard.c
+++ b/arch/arm/mach-pxa/colibri-evalboard.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c
index 44c1b77ece67..80538b8806ed 100644
--- a/arch/arm/mach-pxa/colibri-pxa270-income.c
+++ b/arch/arm/mach-pxa/colibri-pxa270-income.c
@@ -22,7 +22,6 @@
 #include <linux/platform_device.h>
 #include <linux/pwm_backlight.h>
 #include <linux/i2c/pxa-i2c.h>
-#include <linux/sysdev.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index 6fc5d328ba7f..7545a48ed88b 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -17,7 +17,6 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/ucb1400.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h
index a079d8baa45a..e6c9344a95ae 100644
--- a/arch/arm/mach-pxa/generic.h
+++ b/arch/arm/mach-pxa/generic.h
@@ -61,10 +61,10 @@ extern unsigned pxa3xx_get_clk_frequency_khz(int);
 #define pxa3xx_get_clk_frequency_khz(x)		(0)
 #endif
 
-extern struct sysdev_class pxa_irq_sysclass;
-extern struct sysdev_class pxa_gpio_sysclass;
-extern struct sysdev_class pxa2xx_mfp_sysclass;
-extern struct sysdev_class pxa3xx_mfp_sysclass;
+extern struct syscore_ops pxa_irq_syscore_ops;
+extern struct syscore_ops pxa_gpio_syscore_ops;
+extern struct syscore_ops pxa2xx_mfp_syscore_ops;
+extern struct syscore_ops pxa3xx_mfp_syscore_ops;
 
 void __init pxa_set_ffuart_info(void *info);
 void __init pxa_set_btuart_info(void *info);
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 6251e3f5c62c..32ed551bf9c5 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 
@@ -183,7 +183,7 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn)
 static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32];
 static unsigned long saved_ipr[MAX_INTERNAL_IRQS];
 
-static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa_irq_suspend(void)
 {
 	int i;
 
@@ -202,7 +202,7 @@ static int pxa_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa_irq_resume(struct sys_device *dev)
+static void pxa_irq_resume(void)
 {
 	int i;
 
@@ -218,22 +218,13 @@ static int pxa_irq_resume(struct sys_device *dev)
 			__raw_writel(saved_ipr[i], IRQ_BASE + IPR(i));
 
 	__raw_writel(1, IRQ_BASE + ICCR);
-	return 0;
 }
 #else
 #define pxa_irq_suspend		NULL
 #define pxa_irq_resume		NULL
 #endif
 
-struct sysdev_class pxa_irq_sysclass = {
-	.name		= "irq",
+struct syscore_ops pxa_irq_syscore_ops = {
 	.suspend	= pxa_irq_suspend,
 	.resume		= pxa_irq_resume,
 };
-
-static int __init pxa_irq_init(void)
-{
-	return sysdev_class_register(&pxa_irq_sysclass);
-}
-
-core_initcall(pxa_irq_init);
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index f5de541725b1..6cf8180bf5bd 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -15,7 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
@@ -159,30 +159,22 @@ static void __init lpd270_init_irq(void)
 
 
 #ifdef CONFIG_PM
-static int lpd270_irq_resume(struct sys_device *dev)
+static void lpd270_irq_resume(void)
 {
 	__raw_writew(lpd270_irq_enabled, LPD270_INT_MASK);
-	return 0;
 }
 
-static struct sysdev_class lpd270_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops lpd270_irq_syscore_ops = {
 	.resume = lpd270_irq_resume,
 };
 
-static struct sys_device lpd270_irq_device = {
-	.cls = &lpd270_irq_sysclass,
-};
-
 static int __init lpd270_irq_device_init(void)
 {
-	int ret = -ENODEV;
 	if (machine_is_logicpd_pxa270()) {
-		ret = sysdev_class_register(&lpd270_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&lpd270_irq_device);
+		register_syscore_ops(&lpd270_irq_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(lpd270_irq_device_init);
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 3ede978c83d9..e10ddb827147 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/major.h>
 #include <linux/fb.h>
 #include <linux/interrupt.h>
@@ -176,31 +176,22 @@ static void __init lubbock_init_irq(void)
 
 #ifdef CONFIG_PM
 
-static int lubbock_irq_resume(struct sys_device *dev)
+static void lubbock_irq_resume(void)
 {
 	LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-	return 0;
 }
 
-static struct sysdev_class lubbock_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops lubbock_irq_syscore_ops = {
 	.resume = lubbock_irq_resume,
 };
 
-static struct sys_device lubbock_irq_device = {
-	.cls = &lubbock_irq_sysclass,
-};
-
 static int __init lubbock_irq_device_init(void)
 {
-	int ret = -ENODEV;
-
 	if (machine_is_lubbock()) {
-		ret = sysdev_class_register(&lubbock_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&lubbock_irq_device);
+		register_syscore_ops(&lubbock_irq_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(lubbock_irq_device_init);
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 95163baca29e..3479e2b3b511 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -15,7 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
@@ -185,31 +185,21 @@ static void __init mainstone_init_irq(void)
 
 #ifdef CONFIG_PM
 
-static int mainstone_irq_resume(struct sys_device *dev)
+static void mainstone_irq_resume(void)
 {
 	MST_INTMSKENA = mainstone_irq_enabled;
-	return 0;
 }
 
-static struct sysdev_class mainstone_irq_sysclass = {
-	.name = "cpld_irq",
+static struct syscore_ops mainstone_irq_syscore_ops = {
 	.resume = mainstone_irq_resume,
 };
 
-static struct sys_device mainstone_irq_device = {
-	.cls = &mainstone_irq_sysclass,
-};
-
 static int __init mainstone_irq_device_init(void)
 {
-	int ret = -ENODEV;
+	if (machine_is_mainstone())
+		register_syscore_ops(&mainstone_irq_syscore_ops);
 
-	if (machine_is_mainstone()) {
-		ret = sysdev_class_register(&mainstone_irq_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&mainstone_irq_device);
-	}
-	return ret;
+	return 0;
 }
 
 device_initcall(mainstone_irq_device_init);
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index 1d1419b73457..87ae3129f4f7 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/gpio.h>
 #include <mach/pxa2xx-regs.h>
@@ -338,7 +338,7 @@ static unsigned long saved_gafr[2][4];
 static unsigned long saved_gpdr[4];
 static unsigned long saved_pgsr[4];
 
-static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
+static int pxa2xx_mfp_suspend(void)
 {
 	int i;
 
@@ -365,7 +365,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state)
 	return 0;
 }
 
-static int pxa2xx_mfp_resume(struct sys_device *d)
+static void pxa2xx_mfp_resume(void)
 {
 	int i;
 
@@ -376,15 +376,13 @@ static int pxa2xx_mfp_resume(struct sys_device *d)
 		PGSR(i) = saved_pgsr[i];
 	}
 	PSSR = PSSR_RDH | PSSR_PH;
-	return 0;
 }
 #else
 #define pxa2xx_mfp_suspend	NULL
 #define pxa2xx_mfp_resume	NULL
 #endif
 
-struct sysdev_class pxa2xx_mfp_sysclass = {
-	.name		= "mfp",
+struct syscore_ops pxa2xx_mfp_syscore_ops = {
 	.suspend	= pxa2xx_mfp_suspend,
 	.resume		= pxa2xx_mfp_resume,
 };
@@ -409,6 +407,6 @@ static int __init pxa2xx_mfp_init(void)
 	for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++)
 		gpdr_lpm[i] = GPDR(i * 32);
 
-	return sysdev_class_register(&pxa2xx_mfp_sysclass);
+	return 0;
 }
 postcore_initcall(pxa2xx_mfp_init);
diff --git a/arch/arm/mach-pxa/mfp-pxa3xx.c b/arch/arm/mach-pxa/mfp-pxa3xx.c
index 7a270eecd480..89863a01ecd7 100644
--- a/arch/arm/mach-pxa/mfp-pxa3xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa3xx.c
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/mfp-pxa3xx.h>
@@ -31,13 +31,13 @@
  * a pull-down mode if they're an active low chip select, and we're
  * just entering standby.
  */
-static int pxa3xx_mfp_suspend(struct sys_device *d, pm_message_t state)
+static int pxa3xx_mfp_suspend(void)
 {
 	mfp_config_lpm();
 	return 0;
 }
 
-static int pxa3xx_mfp_resume(struct sys_device *d)
+static void pxa3xx_mfp_resume(void)
 {
 	mfp_config_run();
 
@@ -47,24 +47,13 @@ static int pxa3xx_mfp_resume(struct sys_device *d)
 	 * preserve them here in case they will be referenced later
 	 */
 	ASCR &= ~(ASCR_RDH | ASCR_D1S | ASCR_D2S | ASCR_D3S);
-	return 0;
 }
 #else
 #define pxa3xx_mfp_suspend	NULL
 #define pxa3xx_mfp_resume	NULL
 #endif
 
-struct sysdev_class pxa3xx_mfp_sysclass = {
-	.name		= "mfp",
+struct syscore_ops pxa3xx_mfp_syscore_ops = {
 	.suspend	= pxa3xx_mfp_suspend,
-	.resume 	= pxa3xx_mfp_resume,
+	.resume		= pxa3xx_mfp_resume,
 };
-
-static int __init mfp_init_devicefs(void)
-{
-	if (cpu_is_pxa3xx())
-		return sysdev_class_register(&pxa3xx_mfp_sysclass);
-
-	return 0;
-}
-postcore_initcall(mfp_init_devicefs);
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 23925db8ff74..e3470137c934 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/input.h>
 #include <linux/delay.h>
 #include <linux/gpio_keys.h>
@@ -488,7 +488,7 @@ static void install_bootstrap(void)
 }
 
 
-static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state)
+static int mioa701_sys_suspend(void)
 {
 	int i = 0, is_bt_on;
 	u32 *mem_resume_vector	= phys_to_virt(RESUME_VECTOR_ADDR);
@@ -514,7 +514,7 @@ static int mioa701_sys_suspend(struct sys_device *sysdev, pm_message_t state)
 	return 0;
 }
 
-static int mioa701_sys_resume(struct sys_device *sysdev)
+static void mioa701_sys_resume(void)
 {
 	int i = 0;
 	u32 *mem_resume_vector	= phys_to_virt(RESUME_VECTOR_ADDR);
@@ -527,43 +527,18 @@ static int mioa701_sys_resume(struct sys_device *sysdev)
 	*mem_resume_enabler = save_buffer[i++];
 	*mem_resume_bt	    = save_buffer[i++];
 	*mem_resume_unknown = save_buffer[i++];
-
-	return 0;
 }
 
-static struct sysdev_class mioa701_sysclass = {
-	.name = "mioa701",
-};
-
-static struct sys_device sysdev_bootstrap = {
-	.cls		= &mioa701_sysclass,
-};
-
-static struct sysdev_driver driver_bootstrap = {
-	.suspend	= &mioa701_sys_suspend,
-	.resume		= &mioa701_sys_resume,
+static struct syscore_ops mioa701_syscore_ops = {
+	.suspend	= mioa701_sys_suspend,
+	.resume		= mioa701_sys_resume,
 };
 
 static int __init bootstrap_init(void)
 {
-	int rc;
 	int save_size = mioa701_bootstrap_lg + (sizeof(u32) * 3);
 
-	rc = sysdev_class_register(&mioa701_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering mioa701 sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&sysdev_bootstrap);
-	if (rc) {
-		printk(KERN_ERR "Failed registering mioa701 sys device\n");
-		return -ENODEV;
-	}
-	rc = sysdev_driver_register(&mioa701_sysclass, &driver_bootstrap);
-	if (rc) {
-		printk(KERN_ERR "Failed registering PMU sys driver\n");
-		return -ENODEV;
-	}
+	register_syscore_ops(&mioa701_syscore_ops);
 
 	save_buffer = kmalloc(save_size, GFP_KERNEL);
 	if (!save_buffer)
@@ -576,9 +551,7 @@ static int __init bootstrap_init(void)
 static void bootstrap_exit(void)
 {
 	kfree(save_buffer);
-	sysdev_driver_unregister(&mioa701_sysclass, &driver_bootstrap);
-	sysdev_unregister(&sysdev_bootstrap);
-	sysdev_class_unregister(&mioa701_sysclass);
+	unregister_syscore_ops(&mioa701_syscore_ops);
 
 	printk(KERN_CRIT "Unregistering mioa701 suspend will hang next"
 	       "resume !!!\n");
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index a6f898cbfac9..4061ecddee70 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -24,7 +24,6 @@
 #include <linux/gpio.h>
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
-#include <linux/sysdev.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c
index 8aadad55fbe4..20d1b18b1733 100644
--- a/arch/arm/mach-pxa/palmtreo.c
+++ b/arch/arm/mach-pxa/palmtreo.c
@@ -25,7 +25,6 @@
 #include <linux/pwm_backlight.h>
 #include <linux/gpio.h>
 #include <linux/power_supply.h>
-#include <linux/sysdev.h>
 #include <linux/w1-gpio.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 3b8a4f37dbbe..65f24f0b77e8 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/gpio_keys.h>
@@ -233,9 +233,9 @@ static struct palmz72_resume_info palmz72_resume_info = {
 
 static unsigned long store_ptr;
 
-/* sys_device for Palm Zire 72 PM */
+/* syscore_ops for Palm Zire 72 PM */
 
-static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg)
+static int palmz72_pm_suspend(void)
 {
 	/* setup the resume_info struct for the original bootloader */
 	palmz72_resume_info.resume_addr = (u32) cpu_resume;
@@ -249,31 +249,23 @@ static int palmz72_pm_suspend(struct sys_device *dev, pm_message_t msg)
 	return 0;
 }
 
-static int palmz72_pm_resume(struct sys_device *dev)
+static void palmz72_pm_resume(void)
 {
 	*PALMZ72_SAVE_DWORD = store_ptr;
-	return 0;
 }
 
-static struct sysdev_class palmz72_pm_sysclass = {
-	.name = "palmz72_pm",
+static struct syscore_ops palmz72_pm_syscore_ops = {
 	.suspend = palmz72_pm_suspend,
 	.resume = palmz72_pm_resume,
 };
 
-static struct sys_device palmz72_pm_device = {
-	.cls = &palmz72_pm_sysclass,
-};
-
 static int __init palmz72_pm_init(void)
 {
-	int ret = -ENODEV;
 	if (machine_is_palmz72()) {
-		ret = sysdev_class_register(&palmz72_pm_sysclass);
-		if (ret == 0)
-			ret = sysdev_register(&palmz72_pm_device);
+		register_syscore_ops(&palmz72_pm_syscore_ops);
+		return 0;
 	}
-	return ret;
+	return -ENODEV;
 }
 
 device_initcall(palmz72_pm_init);
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index a4af8c52d7ee..fed363cec9c6 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/irq.h>
 
 #include <asm/mach/map.h>
@@ -350,21 +350,9 @@ static struct platform_device *pxa25x_devices[] __initdata = {
 	&pxa_device_asoc_platform,
 };
 
-static struct sys_device pxa25x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa2xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa2xx_clock_sysclass,
-	}
-};
-
 static int __init pxa25x_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa25x()) {
 
@@ -377,11 +365,10 @@ static int __init pxa25x_init(void)
 
 		pxa25x_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa25x_sysdev); i++) {
-			ret = sysdev_register(&pxa25x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa2xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
 		ret = platform_add_devices(pxa25x_devices,
 					   ARRAY_SIZE(pxa25x_devices));
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 909756eaf4b7..2fecbec58d88 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/suspend.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/i2c/pxa-i2c.h>
@@ -428,21 +428,9 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa27x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa2xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa2xx_clock_sysclass,
-	}
-};
-
 static int __init pxa27x_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa27x()) {
 
@@ -455,11 +443,10 @@ static int __init pxa27x_init(void)
 
 		pxa27x_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa27x_sysdev); i++) {
-			ret = sysdev_register(&pxa27x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa2xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa2xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 8dd107391157..8521d7d6f1da 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -20,7 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach/map.h>
@@ -427,21 +427,9 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa3xx_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa3xx_mfp_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa3xx_clock_sysclass,
-	}
-};
-
 static int __init pxa3xx_init(void)
 {
-	int i, ret = 0;
+	int ret = 0;
 
 	if (cpu_is_pxa3xx()) {
 
@@ -462,11 +450,10 @@ static int __init pxa3xx_init(void)
 
 		pxa3xx_init_pm();
 
-		for (i = 0; i < ARRAY_SIZE(pxa3xx_sysdev); i++) {
-			ret = sysdev_register(&pxa3xx_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa3xx_mfp_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/pxa95x.c b/arch/arm/mach-pxa/pxa95x.c
index 23b229bd06e9..ecc82a330fad 100644
--- a/arch/arm/mach-pxa/pxa95x.c
+++ b/arch/arm/mach-pxa/pxa95x.c
@@ -18,7 +18,7 @@
 #include <linux/i2c/pxa-i2c.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/gpio.h>
@@ -260,16 +260,6 @@ static struct platform_device *devices[] __initdata = {
 	&pxa27x_device_pwm1,
 };
 
-static struct sys_device pxa95x_sysdev[] = {
-	{
-		.cls	= &pxa_irq_sysclass,
-	}, {
-		.cls	= &pxa_gpio_sysclass,
-	}, {
-		.cls	= &pxa3xx_clock_sysclass,
-	}
-};
-
 static int __init pxa95x_init(void)
 {
 	int ret = 0, i;
@@ -293,11 +283,9 @@ static int __init pxa95x_init(void)
 		if ((ret = pxa_init_dma(IRQ_DMA, 32)))
 			return ret;
 
-		for (i = 0; i < ARRAY_SIZE(pxa95x_sysdev); i++) {
-			ret = sysdev_register(&pxa95x_sysdev[i]);
-			if (ret)
-				pr_err("failed to register sysdev[%d]\n", i);
-		}
+		register_syscore_ops(&pxa_irq_syscore_ops);
+		register_syscore_ops(&pxa_gpio_syscore_ops);
+		register_syscore_ops(&pxa3xx_clock_syscore_ops);
 
 		ret = platform_add_devices(devices, ARRAY_SIZE(devices));
 	}
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index cd1861351f75..d130f77b6d11 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -18,7 +18,6 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
diff --git a/arch/arm/mach-pxa/smemc.c b/arch/arm/mach-pxa/smemc.c
index 232b7316ec08..79923058d10f 100644
--- a/arch/arm/mach-pxa/smemc.c
+++ b/arch/arm/mach-pxa/smemc.c
@@ -6,7 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/hardware.h>
 #include <mach/smemc.h>
@@ -16,7 +16,7 @@ static unsigned long msc[2];
 static unsigned long sxcnfg, memclkcfg;
 static unsigned long csadrcfg[4];
 
-static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa3xx_smemc_suspend(void)
 {
 	msc[0] = __raw_readl(MSC0);
 	msc[1] = __raw_readl(MSC1);
@@ -30,7 +30,7 @@ static int pxa3xx_smemc_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa3xx_smemc_resume(struct sys_device *dev)
+static void pxa3xx_smemc_resume(void)
 {
 	__raw_writel(msc[0], MSC0);
 	__raw_writel(msc[1], MSC1);
@@ -40,34 +40,19 @@ static int pxa3xx_smemc_resume(struct sys_device *dev)
 	__raw_writel(csadrcfg[1], CSADRCFG1);
 	__raw_writel(csadrcfg[2], CSADRCFG2);
 	__raw_writel(csadrcfg[3], CSADRCFG3);
-
-	return 0;
 }
 
-static struct sysdev_class smemc_sysclass = {
-	.name		= "smemc",
+static struct syscore_ops smemc_syscore_ops = {
 	.suspend	= pxa3xx_smemc_suspend,
 	.resume		= pxa3xx_smemc_resume,
 };
 
-static struct sys_device smemc_sysdev = {
-	.id		= 0,
-	.cls		= &smemc_sysclass,
-};
-
 static int __init smemc_init(void)
 {
-	int ret = 0;
+	if (cpu_is_pxa3xx())
+		register_syscore_ops(&smemc_syscore_ops);
 
-	if (cpu_is_pxa3xx()) {
-		ret = sysdev_class_register(&smemc_sysclass);
-		if (ret)
-			return ret;
-
-		ret = sysdev_register(&smemc_sysdev);
-	}
-
-	return ret;
+	return 0;
 }
 subsys_initcall(smemc_init);
 #endif
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index b9cfbebdfe9c..687417a93698 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index b523f119e0f0..903218eab56d 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -44,6 +44,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
+#include <linux/syscore_ops.h>
 
 #include <mach/pxa25x.h>
 #include <mach/audio.h>
@@ -130,20 +131,19 @@ static u8 viper_hw_version(void)
 	return v1;
 }
 
-/* CPU sysdev */
-static int viper_cpu_suspend(struct sys_device *sysdev, pm_message_t state)
+/* CPU system core operations. */
+static int viper_cpu_suspend(void)
 {
 	viper_icr_set_bit(VIPER_ICR_R_DIS);
 	return 0;
 }
 
-static int viper_cpu_resume(struct sys_device *sysdev)
+static void viper_cpu_resume(void)
 {
 	viper_icr_clear_bit(VIPER_ICR_R_DIS);
-	return 0;
 }
 
-static struct sysdev_driver viper_cpu_sysdev_driver = {
+static struct syscore_ops viper_cpu_syscore_ops = {
 	.suspend	= viper_cpu_suspend,
 	.resume		= viper_cpu_resume,
 };
@@ -945,7 +945,7 @@ static void __init viper_init(void)
 	viper_init_vcore_gpios();
 	viper_init_cpufreq();
 
-	sysdev_driver_register(&cpu_sysdev_class, &viper_cpu_sysdev_driver);
+	register_syscore_ops(&viper_cpu_syscore_ops);
 
 	if (version) {
 		pr_info("viper: hardware v%di%d detected. "
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index f71d377c8640..67bd41488bf8 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -16,7 +16,6 @@
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
 #include <linux/usb/gpio_vbus.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
diff --git a/arch/arm/plat-pxa/gpio.c b/arch/arm/plat-pxa/gpio.c
index dce088f45678..48ebb9479b61 100644
--- a/arch/arm/plat-pxa/gpio.c
+++ b/arch/arm/plat-pxa/gpio.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 
 #include <mach/gpio.h>
@@ -295,7 +295,7 @@ void __init pxa_init_gpio(int mux_irq, int start, int end, set_wake_t fn)
 }
 
 #ifdef CONFIG_PM
-static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
+static int pxa_gpio_suspend(void)
 {
 	struct pxa_gpio_chip *c;
 	int gpio;
@@ -312,7 +312,7 @@ static int pxa_gpio_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pxa_gpio_resume(struct sys_device *dev)
+static void pxa_gpio_resume(void)
 {
 	struct pxa_gpio_chip *c;
 	int gpio;
@@ -326,22 +326,13 @@ static int pxa_gpio_resume(struct sys_device *dev)
 		__raw_writel(c->saved_gfer, c->regbase + GFER_OFFSET);
 		__raw_writel(c->saved_gpdr, c->regbase + GPDR_OFFSET);
 	}
-	return 0;
 }
 #else
 #define pxa_gpio_suspend	NULL
 #define pxa_gpio_resume		NULL
 #endif
 
-struct sysdev_class pxa_gpio_sysclass = {
-	.name		= "gpio",
+struct syscore_ops pxa_gpio_syscore_ops = {
 	.suspend	= pxa_gpio_suspend,
 	.resume		= pxa_gpio_resume,
 };
-
-static int __init pxa_gpio_init(void)
-{
-	return sysdev_class_register(&pxa_gpio_sysclass);
-}
-
-core_initcall(pxa_gpio_init);
diff --git a/arch/arm/plat-pxa/mfp.c b/arch/arm/plat-pxa/mfp.c
index a9aa5ad3f4eb..be12eadcce20 100644
--- a/arch/arm/plat-pxa/mfp.c
+++ b/arch/arm/plat-pxa/mfp.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/sysdev.h>
 
 #include <plat/mfp.h>
 
-- 
cgit v1.2.3


From bb072c3cf21d1c9a5a2eeb5a00679ee7bf39675b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:03:21 +0200
Subject: ARM / Samsung: Use struct syscore_ops for "core" power management

Replace sysdev classes and struct sys_device objects used for "core"
power management by Samsung platforms with struct syscore_ops objects
that are simpler.

This generally reduces the code size and the kernel memory footprint.
It also is necessary for removing sysdevs entirely from the kernel in
the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos4/pm.c               | 45 ++++++++++++---------
 arch/arm/mach-s3c2410/irq.c              | 30 +-------------
 arch/arm/mach-s3c2410/mach-bast.c        | 17 +++-----
 arch/arm/mach-s3c2410/pm.c               | 13 +++---
 arch/arm/mach-s3c2410/s3c2410.c          |  5 +++
 arch/arm/mach-s3c2412/irq.c              |  2 -
 arch/arm/mach-s3c2412/mach-jive.c        | 19 +++------
 arch/arm/mach-s3c2412/pm.c               | 27 +++++++------
 arch/arm/mach-s3c2412/s3c2412.c          |  4 ++
 arch/arm/mach-s3c2416/irq.c              |  2 -
 arch/arm/mach-s3c2416/pm.c               | 27 ++++++-------
 arch/arm/mach-s3c2416/s3c2416.c          |  5 +++
 arch/arm/mach-s3c2440/mach-osiris.c      | 18 +++------
 arch/arm/mach-s3c2440/s3c2440.c          |  8 ++++
 arch/arm/mach-s3c2440/s3c2442.c          |  6 +++
 arch/arm/mach-s3c2440/s3c244x-irq.c      |  4 --
 arch/arm/mach-s3c2440/s3c244x.c          | 62 ++++++++++++++---------------
 arch/arm/mach-s3c64xx/irq-pm.c           | 18 ++++-----
 arch/arm/mach-s5pv210/pm.c               | 25 ++++++++----
 arch/arm/plat-s3c24xx/dma.c              | 68 ++++++++++++--------------------
 arch/arm/plat-s3c24xx/irq-pm.c           |  7 +---
 arch/arm/plat-s5p/irq-pm.c               |  7 +---
 arch/arm/plat-samsung/include/plat/cpu.h |  6 +++
 arch/arm/plat-samsung/include/plat/pm.h  |  6 ++-
 24 files changed, 203 insertions(+), 228 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-exynos4/pm.c b/arch/arm/mach-exynos4/pm.c
index 10d917d9e3ad..8755ca8dd48d 100644
--- a/arch/arm/mach-exynos4/pm.c
+++ b/arch/arm/mach-exynos4/pm.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/suspend.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -372,7 +373,27 @@ void exynos4_scu_enable(void __iomem *scu_base)
 	flush_cache_all();
 }
 
-static int exynos4_pm_resume(struct sys_device *dev)
+static struct sysdev_driver exynos4_pm_driver = {
+	.add		= exynos4_pm_add,
+};
+
+static __init int exynos4_pm_drvinit(void)
+{
+	unsigned int tmp;
+
+	s3c_pm_init();
+
+	/* All wakeup disable */
+
+	tmp = __raw_readl(S5P_WAKEUP_MASK);
+	tmp |= ((0xFF << 8) | (0x1F << 1));
+	__raw_writel(tmp, S5P_WAKEUP_MASK);
+
+	return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver);
+}
+arch_initcall(exynos4_pm_drvinit);
+
+static void exynos4_pm_resume(void)
 {
 	/* For release retention */
 
@@ -394,27 +415,15 @@ static int exynos4_pm_resume(struct sys_device *dev)
 	/* enable L2X0*/
 	writel_relaxed(1, S5P_VA_L2CC + L2X0_CTRL);
 #endif
-
-	return 0;
 }
 
-static struct sysdev_driver exynos4_pm_driver = {
-	.add		= exynos4_pm_add,
+static struct syscore_ops exynos4_pm_syscore_ops = {
 	.resume		= exynos4_pm_resume,
 };
 
-static __init int exynos4_pm_drvinit(void)
+static __init int exynos4_pm_syscore_init(void)
 {
-	unsigned int tmp;
-
-	s3c_pm_init();
-
-	/* All wakeup disable */
-
-	tmp = __raw_readl(S5P_WAKEUP_MASK);
-	tmp |= ((0xFF << 8) | (0x1F << 1));
-	__raw_writel(tmp, S5P_WAKEUP_MASK);
-
-	return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver);
+	register_syscore_ops(&exynos4_pm_syscore_ops);
+	return 0;
 }
-arch_initcall(exynos4_pm_drvinit);
+arch_initcall(exynos4_pm_syscore_init);
diff --git a/arch/arm/mach-s3c2410/irq.c b/arch/arm/mach-s3c2410/irq.c
index 5e2f35332056..2854129f8cc7 100644
--- a/arch/arm/mach-s3c2410/irq.c
+++ b/arch/arm/mach-s3c2410/irq.c
@@ -23,38 +23,12 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <plat/cpu.h>
 #include <plat/pm.h>
 
-static int s3c2410_irq_add(struct sys_device *sysdev)
-{
-	return 0;
-}
-
-static struct sysdev_driver s3c2410_irq_driver = {
-	.add		= s3c2410_irq_add,
+struct syscore_ops s3c24xx_irq_syscore_ops = {
 	.suspend	= s3c24xx_irq_suspend,
 	.resume		= s3c24xx_irq_resume,
 };
-
-static int __init s3c2410_irq_init(void)
-{
-	return sysdev_driver_register(&s3c2410_sysclass, &s3c2410_irq_driver);
-}
-
-arch_initcall(s3c2410_irq_init);
-
-static struct sysdev_driver s3c2410a_irq_driver = {
-	.add		= s3c2410_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
-};
-
-static int __init s3c2410a_irq_init(void)
-{
-	return sysdev_driver_register(&s3c2410a_sysclass, &s3c2410a_irq_driver);
-}
-
-arch_initcall(s3c2410a_irq_init);
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 2970ea9f7c2b..1e2d536adda9 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -17,7 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/dm9000.h>
@@ -214,17 +214,16 @@ static struct s3c2410_uartcfg bast_uartcfgs[] __initdata = {
 /* NAND Flash on BAST board */
 
 #ifdef CONFIG_PM
-static int bast_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int bast_pm_suspend(void)
 {
 	/* ensure that an nRESET is not generated on resume. */
 	gpio_direction_output(S3C2410_GPA(21), 1);
 	return 0;
 }
 
-static int bast_pm_resume(struct sys_device *sd)
+static void bast_pm_resume(void)
 {
 	s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
-	return 0;
 }
 
 #else
@@ -232,16 +231,11 @@ static int bast_pm_resume(struct sys_device *sd)
 #define bast_pm_resume NULL
 #endif
 
-static struct sysdev_class bast_pm_sysclass = {
-	.name		= "mach-bast",
+static struct syscore_ops bast_pm_syscore_ops = {
 	.suspend	= bast_pm_suspend,
 	.resume		= bast_pm_resume,
 };
 
-static struct sys_device bast_pm_sysdev = {
-	.cls		= &bast_pm_sysclass,
-};
-
 static int smartmedia_map[] = { 0 };
 static int chip0_map[] = { 1 };
 static int chip1_map[] = { 2 };
@@ -642,8 +636,7 @@ static void __init bast_map_io(void)
 
 static void __init bast_init(void)
 {
-	sysdev_class_register(&bast_pm_sysclass);
-	sysdev_register(&bast_pm_sysdev);
+	register_syscore_ops(&bast_pm_syscore_ops);
 
 	s3c_i2c0_set_platdata(&bast_i2c_info);
 	s3c_nand_set_platdata(&bast_nand_info);
diff --git a/arch/arm/mach-s3c2410/pm.c b/arch/arm/mach-s3c2410/pm.c
index 725636fc4dc3..4728f9aa7df1 100644
--- a/arch/arm/mach-s3c2410/pm.c
+++ b/arch/arm/mach-s3c2410/pm.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <linux/io.h>
 
@@ -92,7 +93,7 @@ static void s3c2410_pm_prepare(void)
 	}
 }
 
-static int s3c2410_pm_resume(struct sys_device *dev)
+static void s3c2410_pm_resume(void)
 {
 	unsigned long tmp;
 
@@ -104,10 +105,12 @@ static int s3c2410_pm_resume(struct sys_device *dev)
 
 	if ( machine_is_aml_m5900() )
 		s3c2410_gpio_setpin(S3C2410_GPF(2), 0);
-
-	return 0;
 }
 
+struct syscore_ops s3c2410_pm_syscore_ops = {
+	.resume		= s3c2410_pm_resume,
+};
+
 static int s3c2410_pm_add(struct sys_device *dev)
 {
 	pm_cpu_prep = s3c2410_pm_prepare;
@@ -119,7 +122,6 @@ static int s3c2410_pm_add(struct sys_device *dev)
 #if defined(CONFIG_CPU_S3C2410)
 static struct sysdev_driver s3c2410_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 /* register ourselves */
@@ -133,7 +135,6 @@ arch_initcall(s3c2410_pm_drvinit);
 
 static struct sysdev_driver s3c2410a_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2410a_pm_drvinit(void)
@@ -147,7 +148,6 @@ arch_initcall(s3c2410a_pm_drvinit);
 #if defined(CONFIG_CPU_S3C2440)
 static struct sysdev_driver s3c2440_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2440_pm_drvinit(void)
@@ -161,7 +161,6 @@ arch_initcall(s3c2440_pm_drvinit);
 #if defined(CONFIG_CPU_S3C2442)
 static struct sysdev_driver s3c2442_pm_driver = {
 	.add		= s3c2410_pm_add,
-	.resume		= s3c2410_pm_resume,
 };
 
 static int __init s3c2442_pm_drvinit(void)
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c
index adc90a3c5890..f1d3bd8f6f17 100644
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -19,6 +19,7 @@
 #include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -40,6 +41,7 @@
 #include <plat/devs.h>
 #include <plat/clock.h>
 #include <plat/pll.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -168,6 +170,9 @@ int __init s3c2410_init(void)
 {
 	printk("S3C2410: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2410_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2412/irq.c b/arch/arm/mach-s3c2412/irq.c
index f3355d2ec634..1a1aa220972b 100644
--- a/arch/arm/mach-s3c2412/irq.c
+++ b/arch/arm/mach-s3c2412/irq.c
@@ -202,8 +202,6 @@ static int s3c2412_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2412_irq_driver = {
 	.add		= s3c2412_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int s3c2412_irq_init(void)
diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c
index 923e01bdf017..85dcaeb9e62f 100644
--- a/arch/arm/mach-s3c2412/mach-jive.c
+++ b/arch/arm/mach-s3c2412/mach-jive.c
@@ -17,7 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
@@ -486,7 +486,7 @@ static struct s3c2410_udc_mach_info jive_udc_cfg __initdata = {
 /* Jive power management device */
 
 #ifdef CONFIG_PM
-static int jive_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int jive_pm_suspend(void)
 {
 	/* Write the magic value u-boot uses to check for resume into
 	 * the INFORM0 register, and ensure INFORM1 is set to the
@@ -498,10 +498,9 @@ static int jive_pm_suspend(struct sys_device *sd, pm_message_t state)
 	return 0;
 }
 
-static int jive_pm_resume(struct sys_device *sd)
+static void jive_pm_resume(void)
 {
 	__raw_writel(0x0, S3C2412_INFORM0);
-	return 0;
 }
 
 #else
@@ -509,16 +508,11 @@ static int jive_pm_resume(struct sys_device *sd)
 #define jive_pm_resume NULL
 #endif
 
-static struct sysdev_class jive_pm_sysclass = {
-	.name		= "jive-pm",
+static struct syscore_ops jive_pm_syscore_ops = {
 	.suspend	= jive_pm_suspend,
 	.resume		= jive_pm_resume,
 };
 
-static struct sys_device jive_pm_sysdev = {
-	.cls		= &jive_pm_sysclass,
-};
-
 static void __init jive_map_io(void)
 {
 	s3c24xx_init_io(jive_iodesc, ARRAY_SIZE(jive_iodesc));
@@ -536,10 +530,9 @@ static void jive_power_off(void)
 
 static void __init jive_machine_init(void)
 {
-	/* register system devices for managing low level suspend */
+	/* register system core operations for managing low level suspend */
 
-	sysdev_class_register(&jive_pm_sysclass);
-	sysdev_register(&jive_pm_sysdev);
+	register_syscore_ops(&jive_pm_syscore_ops);
 
 	/* write our sleep configurations for the IO. Pull down all unused
 	 * IO, ensure that we have turned off all peripherals we do not
diff --git a/arch/arm/mach-s3c2412/pm.c b/arch/arm/mach-s3c2412/pm.c
index a7417c479ffe..752b13a7b3db 100644
--- a/arch/arm/mach-s3c2412/pm.c
+++ b/arch/arm/mach-s3c2412/pm.c
@@ -17,6 +17,7 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
@@ -86,13 +87,24 @@ static struct sleep_save s3c2412_sleep[] = {
 	SAVE_ITEM(S3C2413_GPJSLPCON),
 };
 
-static int s3c2412_pm_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_driver s3c2412_pm_driver = {
+	.add		= s3c2412_pm_add,
+};
+
+static __init int s3c2412_pm_init(void)
+{
+	return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver);
+}
+
+arch_initcall(s3c2412_pm_init);
+
+static int s3c2412_pm_suspend(void)
 {
 	s3c_pm_do_save(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep));
 	return 0;
 }
 
-static int s3c2412_pm_resume(struct sys_device *dev)
+static void s3c2412_pm_resume(void)
 {
 	unsigned long tmp;
 
@@ -102,18 +114,9 @@ static int s3c2412_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp, S3C2412_PWRCFG);
 
 	s3c_pm_do_restore(s3c2412_sleep, ARRAY_SIZE(s3c2412_sleep));
-	return 0;
 }
 
-static struct sysdev_driver s3c2412_pm_driver = {
-	.add		= s3c2412_pm_add,
+struct syscore_ops s3c2412_pm_syscore_ops = {
 	.suspend	= s3c2412_pm_suspend,
 	.resume		= s3c2412_pm_resume,
 };
-
-static __init int s3c2412_pm_init(void)
-{
-	return sysdev_driver_register(&s3c2412_sysclass, &s3c2412_pm_driver);
-}
-
-arch_initcall(s3c2412_pm_init);
diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c
index 4c6df51ddf33..ef0958d3e5c6 100644
--- a/arch/arm/mach-s3c2412/s3c2412.c
+++ b/arch/arm/mach-s3c2412/s3c2412.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
@@ -244,5 +245,8 @@ int __init s3c2412_init(void)
 {
 	printk("S3C2412: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2412_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2412_sysdev);
 }
diff --git a/arch/arm/mach-s3c2416/irq.c b/arch/arm/mach-s3c2416/irq.c
index 77b38f2381c1..28ad20d42445 100644
--- a/arch/arm/mach-s3c2416/irq.c
+++ b/arch/arm/mach-s3c2416/irq.c
@@ -236,8 +236,6 @@ static int __init s3c2416_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2416_irq_driver = {
 	.add		= s3c2416_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int __init s3c2416_irq_init(void)
diff --git a/arch/arm/mach-s3c2416/pm.c b/arch/arm/mach-s3c2416/pm.c
index 4a04205b04d5..41db2b21e213 100644
--- a/arch/arm/mach-s3c2416/pm.c
+++ b/arch/arm/mach-s3c2416/pm.c
@@ -11,6 +11,7 @@
 */
 
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -55,30 +56,26 @@ static int s3c2416_pm_add(struct sys_device *sysdev)
 	return 0;
 }
 
-static int s3c2416_pm_suspend(struct sys_device *dev, pm_message_t state)
+static struct sysdev_driver s3c2416_pm_driver = {
+	.add		= s3c2416_pm_add,
+};
+
+static __init int s3c2416_pm_init(void)
 {
-	return 0;
+	return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver);
 }
 
-static int s3c2416_pm_resume(struct sys_device *dev)
+arch_initcall(s3c2416_pm_init);
+
+
+static void s3c2416_pm_resume(void)
 {
 	/* unset the return-from-sleep amd inform flags */
 	__raw_writel(0x0, S3C2443_PWRMODE);
 	__raw_writel(0x0, S3C2412_INFORM0);
 	__raw_writel(0x0, S3C2412_INFORM1);
-
-	return 0;
 }
 
-static struct sysdev_driver s3c2416_pm_driver = {
-	.add		= s3c2416_pm_add,
-	.suspend	= s3c2416_pm_suspend,
+struct syscore_ops s3c2416_pm_syscore_ops = {
 	.resume		= s3c2416_pm_resume,
 };
-
-static __init int s3c2416_pm_init(void)
-{
-	return sysdev_driver_register(&s3c2416_sysclass, &s3c2416_pm_driver);
-}
-
-arch_initcall(s3c2416_pm_init);
diff --git a/arch/arm/mach-s3c2416/s3c2416.c b/arch/arm/mach-s3c2416/s3c2416.c
index ba7fd8737434..494ce913dc95 100644
--- a/arch/arm/mach-s3c2416/s3c2416.c
+++ b/arch/arm/mach-s3c2416/s3c2416.c
@@ -32,6 +32,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 
@@ -54,6 +55,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/sdhci.h>
+#include <plat/pm.h>
 
 #include <plat/iic-core.h>
 #include <plat/fb-core.h>
@@ -95,6 +97,9 @@ int __init s3c2416_init(void)
 
 	s3c_fb_setname("s3c2443-fb");
 
+	register_syscore_ops(&s3c2416_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2416_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c
index 14dc67897757..d88536393310 100644
--- a/arch/arm/mach-s3c2440/mach-osiris.c
+++ b/arch/arm/mach-s3c2440/mach-osiris.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/gpio.h>
 #include <linux/device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/serial_core.h>
 #include <linux/clk.h>
 #include <linux/i2c.h>
@@ -284,7 +284,7 @@ static struct platform_device osiris_pcmcia = {
 #ifdef CONFIG_PM
 static unsigned char pm_osiris_ctrl0;
 
-static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
+static int osiris_pm_suspend(void)
 {
 	unsigned int tmp;
 
@@ -304,7 +304,7 @@ static int osiris_pm_suspend(struct sys_device *sd, pm_message_t state)
 	return 0;
 }
 
-static int osiris_pm_resume(struct sys_device *sd)
+static void osiris_pm_resume(void)
 {
 	if (pm_osiris_ctrl0 & OSIRIS_CTRL0_FIX8)
 		__raw_writeb(OSIRIS_CTRL1_FIX8, OSIRIS_VA_CTRL1);
@@ -312,8 +312,6 @@ static int osiris_pm_resume(struct sys_device *sd)
 	__raw_writeb(pm_osiris_ctrl0, OSIRIS_VA_CTRL0);
 
 	s3c_gpio_cfgpin(S3C2410_GPA(21), S3C2410_GPA21_nRSTOUT);
-
-	return 0;
 }
 
 #else
@@ -321,16 +319,11 @@ static int osiris_pm_resume(struct sys_device *sd)
 #define osiris_pm_resume NULL
 #endif
 
-static struct sysdev_class osiris_pm_sysclass = {
-	.name		= "mach-osiris",
+static struct syscore_ops osiris_pm_syscore_ops = {
 	.suspend	= osiris_pm_suspend,
 	.resume		= osiris_pm_resume,
 };
 
-static struct sys_device osiris_pm_sysdev = {
-	.cls		= &osiris_pm_sysclass,
-};
-
 /* Link for DVS driver to TPS65011 */
 
 static void osiris_tps_release(struct device *dev)
@@ -439,8 +432,7 @@ static void __init osiris_map_io(void)
 
 static void __init osiris_init(void)
 {
-	sysdev_class_register(&osiris_pm_sysclass);
-	sysdev_register(&osiris_pm_sysdev);
+	register_syscore_ops(&osiris_pm_syscore_ops);
 
 	s3c_i2c0_set_platdata(NULL);
 	s3c_nand_set_platdata(&osiris_nand_info);
diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c
index f7663f731ea0..ce99ff72838d 100644
--- a/arch/arm/mach-s3c2440/s3c2440.c
+++ b/arch/arm/mach-s3c2440/s3c2440.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -33,6 +34,7 @@
 #include <plat/devs.h>
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -51,6 +53,12 @@ int __init s3c2440_init(void)
 	s3c_device_wdt.resource[1].start = IRQ_S3C2440_WDT;
 	s3c_device_wdt.resource[1].end   = IRQ_S3C2440_WDT;
 
+	/* register suspend/resume handlers */
+
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c244x_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	/* register our system device for everything else */
 
 	return sysdev_register(&s3c2440_sysdev);
diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c
index ecf813546554..6224bad4d604 100644
--- a/arch/arm/mach-s3c2440/s3c2442.c
+++ b/arch/arm/mach-s3c2440/s3c2442.c
@@ -29,6 +29,7 @@
 #include <linux/err.h>
 #include <linux/device.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/mutex.h>
@@ -45,6 +46,7 @@
 #include <plat/clock.h>
 #include <plat/cpu.h>
 #include <plat/s3c244x.h>
+#include <plat/pm.h>
 
 #include <plat/gpio-core.h>
 #include <plat/gpio-cfg.h>
@@ -167,6 +169,10 @@ int __init s3c2442_init(void)
 {
 	printk("S3C2442: Initialising architecture\n");
 
+	register_syscore_ops(&s3c2410_pm_syscore_ops);
+	register_syscore_ops(&s3c244x_pm_syscore_ops);
+	register_syscore_ops(&s3c24xx_irq_syscore_ops);
+
 	return sysdev_register(&s3c2442_sysdev);
 }
 
diff --git a/arch/arm/mach-s3c2440/s3c244x-irq.c b/arch/arm/mach-s3c2440/s3c244x-irq.c
index de07c2feaa32..c63e8f26d901 100644
--- a/arch/arm/mach-s3c2440/s3c244x-irq.c
+++ b/arch/arm/mach-s3c2440/s3c244x-irq.c
@@ -116,8 +116,6 @@ static int s3c244x_irq_add(struct sys_device *sysdev)
 
 static struct sysdev_driver s3c2440_irq_driver = {
 	.add		= s3c244x_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 static int s3c2440_irq_init(void)
@@ -129,8 +127,6 @@ arch_initcall(s3c2440_irq_init);
 
 static struct sysdev_driver s3c2442_irq_driver = {
 	.add		= s3c244x_irq_add,
-	.suspend	= s3c24xx_irq_suspend,
-	.resume		= s3c24xx_irq_resume,
 };
 
 
diff --git a/arch/arm/mach-s3c2440/s3c244x.c b/arch/arm/mach-s3c2440/s3c244x.c
index 90c1707b9c95..7e8a23d2098a 100644
--- a/arch/arm/mach-s3c2440/s3c244x.c
+++ b/arch/arm/mach-s3c2440/s3c244x.c
@@ -19,6 +19,7 @@
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 
@@ -134,45 +135,14 @@ void __init s3c244x_init_clocks(int xtal)
 	s3c2410_baseclk_add();
 }
 
-#ifdef CONFIG_PM
-
-static struct sleep_save s3c244x_sleep[] = {
-	SAVE_ITEM(S3C2440_DSC0),
-	SAVE_ITEM(S3C2440_DSC1),
-	SAVE_ITEM(S3C2440_GPJDAT),
-	SAVE_ITEM(S3C2440_GPJCON),
-	SAVE_ITEM(S3C2440_GPJUP)
-};
-
-static int s3c244x_suspend(struct sys_device *dev, pm_message_t state)
-{
-	s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
-	return 0;
-}
-
-static int s3c244x_resume(struct sys_device *dev)
-{
-	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
-	return 0;
-}
-
-#else
-#define s3c244x_suspend NULL
-#define s3c244x_resume  NULL
-#endif
-
 /* Since the S3C2442 and S3C2440 share  items, put both sysclasses here */
 
 struct sysdev_class s3c2440_sysclass = {
 	.name		= "s3c2440-core",
-	.suspend	= s3c244x_suspend,
-	.resume		= s3c244x_resume
 };
 
 struct sysdev_class s3c2442_sysclass = {
 	.name		= "s3c2442-core",
-	.suspend	= s3c244x_suspend,
-	.resume		= s3c244x_resume
 };
 
 /* need to register class before we actually register the device, and
@@ -194,3 +164,33 @@ static int __init s3c2442_core_init(void)
 }
 
 core_initcall(s3c2442_core_init);
+
+
+#ifdef CONFIG_PM
+static struct sleep_save s3c244x_sleep[] = {
+	SAVE_ITEM(S3C2440_DSC0),
+	SAVE_ITEM(S3C2440_DSC1),
+	SAVE_ITEM(S3C2440_GPJDAT),
+	SAVE_ITEM(S3C2440_GPJCON),
+	SAVE_ITEM(S3C2440_GPJUP)
+};
+
+static int s3c244x_suspend(void)
+{
+	s3c_pm_do_save(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
+	return 0;
+}
+
+static void s3c244x_resume(void)
+{
+	s3c_pm_do_restore(s3c244x_sleep, ARRAY_SIZE(s3c244x_sleep));
+}
+#else
+#define s3c244x_suspend NULL
+#define s3c244x_resume  NULL
+#endif
+
+struct syscore_ops s3c244x_pm_syscore_ops = {
+	.suspend	= s3c244x_suspend,
+	.resume		= s3c244x_resume,
+};
diff --git a/arch/arm/mach-s3c64xx/irq-pm.c b/arch/arm/mach-s3c64xx/irq-pm.c
index da1bec64b9da..8bec61e242c7 100644
--- a/arch/arm/mach-s3c64xx/irq-pm.c
+++ b/arch/arm/mach-s3c64xx/irq-pm.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/serial_core.h>
 #include <linux/irq.h>
@@ -54,7 +54,7 @@ static struct irq_grp_save {
 
 static u32 irq_uart_mask[CONFIG_SERIAL_SAMSUNG_UARTS];
 
-static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
+static int s3c64xx_irq_pm_suspend(void)
 {
 	struct irq_grp_save *grp = eint_grp_save;
 	int i;
@@ -75,7 +75,7 @@ static int s3c64xx_irq_pm_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int s3c64xx_irq_pm_resume(struct sys_device *dev)
+static void s3c64xx_irq_pm_resume(void)
 {
 	struct irq_grp_save *grp = eint_grp_save;
 	int i;
@@ -94,18 +94,18 @@ static int s3c64xx_irq_pm_resume(struct sys_device *dev)
 	}
 
 	S3C_PMDBG("%s: IRQ configuration restored\n", __func__);
-	return 0;
 }
 
-static struct sysdev_driver s3c64xx_irq_driver = {
+struct syscore_ops s3c64xx_irq_syscore_ops = {
 	.suspend = s3c64xx_irq_pm_suspend,
 	.resume	 = s3c64xx_irq_pm_resume,
 };
 
-static int __init s3c64xx_irq_pm_init(void)
+static __init int s3c64xx_syscore_init(void)
 {
-	return sysdev_driver_register(&s3c64xx_sysclass, &s3c64xx_irq_driver);
-}
+	register_syscore_ops(&s3c64xx_irq_syscore_ops);
 
-arch_initcall(s3c64xx_irq_pm_init);
+	return 0;
+}
 
+core_initcall(s3c64xx_syscore_init);
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index 549d7924fd4c..24febae3d4c0 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -16,6 +16,7 @@
 
 #include <linux/init.h>
 #include <linux/suspend.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 
 #include <plat/cpu.h>
@@ -140,7 +141,17 @@ static int s5pv210_pm_add(struct sys_device *sysdev)
 	return 0;
 }
 
-static int s5pv210_pm_resume(struct sys_device *dev)
+static struct sysdev_driver s5pv210_pm_driver = {
+	.add		= s5pv210_pm_add,
+};
+
+static __init int s5pv210_pm_drvinit(void)
+{
+	return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver);
+}
+arch_initcall(s5pv210_pm_drvinit);
+
+static void s5pv210_pm_resume(void)
 {
 	u32 tmp;
 
@@ -150,17 +161,15 @@ static int s5pv210_pm_resume(struct sys_device *dev)
 	__raw_writel(tmp , S5P_OTHERS);
 
 	s3c_pm_do_restore_core(s5pv210_core_save, ARRAY_SIZE(s5pv210_core_save));
-
-	return 0;
 }
 
-static struct sysdev_driver s5pv210_pm_driver = {
-	.add		= s5pv210_pm_add,
+static struct syscore_ops s5pv210_pm_syscore_ops = {
 	.resume		= s5pv210_pm_resume,
 };
 
-static __init int s5pv210_pm_drvinit(void)
+static __init int s5pv210_pm_syscore_init(void)
 {
-	return sysdev_driver_register(&s5pv210_sysclass, &s5pv210_pm_driver);
+	register_syscore_ops(&s5pv210_pm_syscore_ops);
+	return 0;
 }
-arch_initcall(s5pv210_pm_drvinit);
+arch_initcall(s5pv210_pm_syscore_init);
diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c
index 27ea852e3370..c10d10c56e2e 100644
--- a/arch/arm/plat-s3c24xx/dma.c
+++ b/arch/arm/plat-s3c24xx/dma.c
@@ -22,7 +22,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/io.h>
@@ -1195,19 +1195,12 @@ int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *d
 
 EXPORT_SYMBOL(s3c2410_dma_getposition);
 
-static inline struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev)
-{
-	return container_of(dev, struct s3c2410_dma_chan, dev);
-}
-
-/* system device class */
+/* system core operations */
 
 #ifdef CONFIG_PM
 
-static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state)
+static void s3c2410_dma_suspend_chan(s3c2410_dma_chan *cp)
 {
-	struct s3c2410_dma_chan *cp = to_dma_chan(dev);
-
 	printk(KERN_DEBUG "suspending dma channel %d\n", cp->number);
 
 	if (dma_rdreg(cp, S3C2410_DMA_DMASKTRIG) & S3C2410_DMASKTRIG_ON) {
@@ -1222,13 +1215,21 @@ static int s3c2410_dma_suspend(struct sys_device *dev, pm_message_t state)
 
 		s3c2410_dma_dostop(cp);
 	}
+}
+
+static int s3c2410_dma_suspend(void)
+{
+	struct s3c2410_dma_chan *cp = s3c2410_chans;
+	int channel;
+
+	for (channel = 0; channel < dma_channels; cp++, channel++)
+		s3c2410_dma_suspend_chan(cp);
 
 	return 0;
 }
 
-static int s3c2410_dma_resume(struct sys_device *dev)
+static void s3c2410_dma_resume_chan(struct s3c2410_dma_chan *cp)
 {
-	struct s3c2410_dma_chan *cp = to_dma_chan(dev);
 	unsigned int no = cp->number | DMACH_LOW_LEVEL;
 
 	/* restore channel's hardware configuration */
@@ -1249,13 +1250,21 @@ static int s3c2410_dma_resume(struct sys_device *dev)
 	return 0;
 }
 
+static void s3c2410_dma_resume(void)
+{
+	struct s3c2410_dma_chan *cp = s3c2410_chans + dma_channels - 1;
+	int channel;
+
+	for (channel = dma_channels - 1; channel >= 0; cp++, channel--)
+		s3c2410_dma_resume_chan(cp);
+}
+
 #else
 #define s3c2410_dma_suspend NULL
 #define s3c2410_dma_resume  NULL
 #endif /* CONFIG_PM */
 
-struct sysdev_class dma_sysclass = {
-	.name		= "s3c24xx-dma",
+struct syscore_ops dma_syscore_ops = {
 	.suspend	= s3c2410_dma_suspend,
 	.resume		= s3c2410_dma_resume,
 };
@@ -1269,39 +1278,14 @@ static void s3c2410_dma_cache_ctor(void *p)
 
 /* initialisation code */
 
-static int __init s3c24xx_dma_sysclass_init(void)
+static int __init s3c24xx_dma_syscore_init(void)
 {
-	int ret = sysdev_class_register(&dma_sysclass);
-
-	if (ret != 0)
-		printk(KERN_ERR "dma sysclass registration failed\n");
-
-	return ret;
-}
-
-core_initcall(s3c24xx_dma_sysclass_init);
-
-static int __init s3c24xx_dma_sysdev_register(void)
-{
-	struct s3c2410_dma_chan *cp = s3c2410_chans;
-	int channel, ret;
-
-	for (channel = 0; channel < dma_channels; cp++, channel++) {
-		cp->dev.cls = &dma_sysclass;
-		cp->dev.id  = channel;
-		ret = sysdev_register(&cp->dev);
-
-		if (ret) {
-			printk(KERN_ERR "error registering dev for dma %d\n",
-			       channel);
- 			return ret;
-		}
-	}
+	register_syscore_ops(&dma_syscore_ops);
 
 	return 0;
 }
 
-late_initcall(s3c24xx_dma_sysdev_register);
+late_initcall(s3c24xx_dma_syscore_init);
 
 int __init s3c24xx_dma_init(unsigned int channels, unsigned int irq,
 			    unsigned int stride)
diff --git a/arch/arm/plat-s3c24xx/irq-pm.c b/arch/arm/plat-s3c24xx/irq-pm.c
index c3624d898630..0efb2e2848c8 100644
--- a/arch/arm/plat-s3c24xx/irq-pm.c
+++ b/arch/arm/plat-s3c24xx/irq-pm.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 #include <linux/irq.h>
 
 #include <plat/cpu.h>
@@ -65,7 +64,7 @@ static unsigned long save_extint[3];
 static unsigned long save_eintflt[4];
 static unsigned long save_eintmask;
 
-int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
+int s3c24xx_irq_suspend(void)
 {
 	unsigned int i;
 
@@ -81,7 +80,7 @@ int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-int s3c24xx_irq_resume(struct sys_device *dev)
+void s3c24xx_irq_resume(void)
 {
 	unsigned int i;
 
@@ -93,6 +92,4 @@ int s3c24xx_irq_resume(struct sys_device *dev)
 
 	s3c_pm_do_restore(irq_save, ARRAY_SIZE(irq_save));
 	__raw_writel(save_eintmask, S3C24XX_EINTMASK);
-
-	return 0;
 }
diff --git a/arch/arm/plat-s5p/irq-pm.c b/arch/arm/plat-s5p/irq-pm.c
index 5259ad458bc8..327acb3a4464 100644
--- a/arch/arm/plat-s5p/irq-pm.c
+++ b/arch/arm/plat-s5p/irq-pm.c
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 
 #include <plat/cpu.h>
 #include <plat/irqs.h>
@@ -77,17 +76,15 @@ static struct sleep_save eint_save[] = {
 	SAVE_ITEM(S5P_EINT_MASK(3)),
 };
 
-int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state)
+int s3c24xx_irq_suspend(void)
 {
 	s3c_pm_do_save(eint_save, ARRAY_SIZE(eint_save));
 
 	return 0;
 }
 
-int s3c24xx_irq_resume(struct sys_device *dev)
+void s3c24xx_irq_resume(void)
 {
 	s3c_pm_do_restore(eint_save, ARRAY_SIZE(eint_save));
-
-	return 0;
 }
 
diff --git a/arch/arm/plat-samsung/include/plat/cpu.h b/arch/arm/plat-samsung/include/plat/cpu.h
index cedfff51c82b..3aedac0034ba 100644
--- a/arch/arm/plat-samsung/include/plat/cpu.h
+++ b/arch/arm/plat-samsung/include/plat/cpu.h
@@ -68,6 +68,12 @@ extern void s3c24xx_init_uartdevs(char *name,
 struct sys_timer;
 extern struct sys_timer s3c24xx_timer;
 
+extern struct syscore_ops s3c2410_pm_syscore_ops;
+extern struct syscore_ops s3c2412_pm_syscore_ops;
+extern struct syscore_ops s3c2416_pm_syscore_ops;
+extern struct syscore_ops s3c244x_pm_syscore_ops;
+extern struct syscore_ops s3c64xx_irq_syscore_ops;
+
 /* system device classes */
 
 extern struct sysdev_class s3c2410_sysclass;
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index 937cc2ace517..7fb6f6be8c81 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -103,14 +103,16 @@ extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count);
 
 #ifdef CONFIG_PM
 extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
-extern int s3c24xx_irq_suspend(struct sys_device *dev, pm_message_t state);
-extern int s3c24xx_irq_resume(struct sys_device *dev);
+extern int s3c24xx_irq_suspend(void);
+extern void s3c24xx_irq_resume(void);
 #else
 #define s3c_irqext_wake NULL
 #define s3c24xx_irq_suspend NULL
 #define s3c24xx_irq_resume  NULL
 #endif
 
+extern struct syscore_ops s3c24xx_irq_syscore_ops;
+
 /* PM debug functions */
 
 #ifdef CONFIG_SAMSUNG_PM_DEBUG
-- 
cgit v1.2.3


From 67f9cbf9affe39f67cd3f1d2e2a2a43089d9ab3a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Apr 2011 22:03:31 +0200
Subject: PM / Blackfin: Use struct syscore_ops instead of sysdevs for PM

Convert some Blackfin architecture's code to using struct syscore_ops
objects for power management instead of sysdev classes and sysdevs.

This simplifies the code and reduces the kernel's memory footprint.
It also is necessary for removing sysdevs from the kernel entirely in
the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
---
 arch/blackfin/kernel/nmi.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 0b5f72f17fd0..401eb1d8e3b4 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -12,7 +12,7 @@
 
 #include <linux/bitops.h>
 #include <linux/hardirq.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/pm.h>
 #include <linux/nmi.h>
 #include <linux/smp.h>
@@ -196,43 +196,31 @@ void touch_nmi_watchdog(void)
 
 /* Suspend/resume support */
 #ifdef CONFIG_PM
-static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
+static int nmi_wdt_suspend(void)
 {
 	nmi_wdt_stop();
 	return 0;
 }
 
-static int nmi_wdt_resume(struct sys_device *dev)
+static void nmi_wdt_resume(void)
 {
 	if (nmi_active)
 		nmi_wdt_start();
-	return 0;
 }
 
-static struct sysdev_class nmi_sysclass = {
-	.name		= DRV_NAME,
+static struct syscore_ops nmi_syscore_ops = {
 	.resume		= nmi_wdt_resume,
 	.suspend	= nmi_wdt_suspend,
 };
 
-static struct sys_device device_nmi_wdt = {
-	.id	= 0,
-	.cls	= &nmi_sysclass,
-};
-
-static int __init init_nmi_wdt_sysfs(void)
+static int __init init_nmi_wdt_syscore(void)
 {
-	int error;
-
-	if (!nmi_active)
-		return 0;
+	if (nmi_active)
+		register_syscore_ops(&nmi_syscore_ops);
 
-	error = sysdev_class_register(&nmi_sysclass);
-	if (!error)
-		error = sysdev_register(&device_nmi_wdt);
-	return error;
+	return 0;
 }
-late_initcall(init_nmi_wdt_sysfs);
+late_initcall(init_nmi_wdt_syscore);
 
 #endif	/* CONFIG_PM */
 
-- 
cgit v1.2.3


From 745b1f4f62852340e97a67d8c414d52fa1529185 Mon Sep 17 00:00:00 2001
From: Paul Parsons <lost.distance@yahoo.com>
Date: Fri, 15 Apr 2011 14:39:27 +0000
Subject: ARM: pxa/hx4700: bq24022 regulator needs to be enabled

Add REGULATOR_CHANGE_STATUS flag to hx4700 bq24022 regulator. Without this
flag the bq24022 cannot be enabled and the battery will not charge.

Signed-off-by: Paul Parsons <lost.distance@yahoo.com>
Cc: Philipp Zabel <philipp.zabel@gmail.com>
Tested-by: Dmitry Artamonow <mad_soft@inbox.ru>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 arch/arm/mach-pxa/hx4700.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index 6de0ad0eea65..9cdcca597924 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -711,7 +711,7 @@ static struct regulator_consumer_supply bq24022_consumers[] = {
 static struct regulator_init_data bq24022_init_data = {
 	.constraints = {
 		.max_uA         = 500000,
-		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT|REGULATOR_CHANGE_STATUS,
 	},
 	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
 	.consumer_supplies      = bq24022_consumers,
-- 
cgit v1.2.3


From e454d1632000b6e86003209811bda1186b34f8f3 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Wed, 20 Apr 2011 15:41:29 +0200
Subject: ARM: pxa/magician: bq24022 regulator needs to be enabled

Add REGULATOR_CHANGE_STATUS flag to magician bq24022 regulator to enable charging.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 arch/arm/mach-pxa/magician.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index a72993dde2b3..9984ef70bd79 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -599,7 +599,7 @@ static struct regulator_consumer_supply bq24022_consumers[] = {
 static struct regulator_init_data bq24022_init_data = {
 	.constraints = {
 		.max_uA         = 500000,
-		.valid_ops_mask = REGULATOR_CHANGE_CURRENT,
+		.valid_ops_mask = REGULATOR_CHANGE_CURRENT | REGULATOR_CHANGE_STATUS,
 	},
 	.num_consumer_supplies  = ARRAY_SIZE(bq24022_consumers),
 	.consumer_supplies      = bq24022_consumers,
-- 
cgit v1.2.3


From ad10e1051ec97a7e56e001eb1e9721b3d883e030 Mon Sep 17 00:00:00 2001
From: Michael Williamson <michael.williamson@criticallink.com>
Date: Wed, 23 Mar 2011 12:15:41 +0000
Subject: davinci: mityomapl138: Use correct id for NAND controller

For the MityDSP-L138/MityARM-1808 SOMS, the NAND controller id (which needs
to correspond to the chipselect, and is used for controlling the HW ECC
computation) is not correct.  Fix it.

Signed-off-by: Michael Williamson <michael.williamson@criticallink.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/board-mityomapl138.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 2aa79c54f98e..e5d554cc79a6 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -414,7 +414,7 @@ static struct resource mityomapl138_nandflash_resource[] = {
 
 static struct platform_device mityomapl138_nandflash_device = {
 	.name		= "davinci_nand",
-	.id		= 0,
+	.id		= 1,
 	.dev		= {
 		.platform_data	= &mityomapl138_nandflash_data,
 	},
-- 
cgit v1.2.3


From 336f402790defe4c4d33838b41bcd239537a5431 Mon Sep 17 00:00:00 2001
From: Michael Williamson <michael.williamson@criticallink.com>
Date: Wed, 23 Mar 2011 12:15:42 +0000
Subject: davinci: mityomapl138: Use auto-probe to determine attached PHY ID

Current board configurations involving the MityDSP-L138 and MityARM-1808
only have one attached PHY, but it's address may not be the same.  Default
the behavior to auto-probe for the PHY and use the first one found.

Signed-off-by: Michael Williamson <michael.williamson@criticallink.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/board-mityomapl138.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index e5d554cc79a6..606a6f27ed6c 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -29,7 +29,7 @@
 #include <mach/mux.h>
 #include <mach/spi.h>
 
-#define MITYOMAPL138_PHY_ID		"0:03"
+#define MITYOMAPL138_PHY_ID		""
 
 #define FACTORY_CONFIG_MAGIC	0x012C0138
 #define FACTORY_CONFIG_VERSION	0x00010001
-- 
cgit v1.2.3


From 9e7d24f622fc57ff5d58af3d44ba503430148354 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Wed, 6 Apr 2011 17:17:21 +0000
Subject: DA830: fix SPI1 base address

Commit 54ce6883d29630ff334bee4256a25e3f8719a181 (davinci: da8xx: add spi
resources and registration routine) wrongly assumed that SPI1 is mapped at
the same address on DA830/OMAP-L137 and DA850/OMAP-L138; actually, the base
address was valid only for the latter SoC. Teach the code to pass the correct
SPI1 memory resource for both SoCs...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/devices-da8xx.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 625d4b66718b..58a02dc7b15a 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -39,7 +39,8 @@
 #define DA8XX_GPIO_BASE			0x01e26000
 #define DA8XX_I2C1_BASE			0x01e28000
 #define DA8XX_SPI0_BASE			0x01c41000
-#define DA8XX_SPI1_BASE			0x01f0e000
+#define DA830_SPI1_BASE			0x01e12000
+#define DA850_SPI1_BASE			0x01f0e000
 
 #define DA8XX_EMAC_CTRL_REG_OFFSET	0x3000
 #define DA8XX_EMAC_MOD_REG_OFFSET	0x2000
@@ -762,8 +763,8 @@ static struct resource da8xx_spi0_resources[] = {
 
 static struct resource da8xx_spi1_resources[] = {
 	[0] = {
-		.start	= DA8XX_SPI1_BASE,
-		.end	= DA8XX_SPI1_BASE + SZ_4K - 1,
+		.start	= DA830_SPI1_BASE,
+		.end	= DA830_SPI1_BASE + SZ_4K - 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
@@ -832,5 +833,10 @@ int __init da8xx_register_spi(int instance, struct spi_board_info *info,
 
 	da8xx_spi_pdata[instance].num_chipselect = len;
 
+	if (instance == 1 && cpu_is_davinci_da850()) {
+		da8xx_spi1_resources[0].start = DA850_SPI1_BASE;
+		da8xx_spi1_resources[0].end = DA850_SPI1_BASE + SZ_4K - 1;
+	}
+
 	return platform_device_register(&da8xx_spi_device[instance]);
 }
-- 
cgit v1.2.3


From 45b146d746ea1b7f87b023a79d5186d0e87793eb Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Date: Fri, 15 Apr 2011 12:32:40 +0000
Subject: ARM: Davinci: Fix I2C build errors

Several Davinci platforms select the I2C EEPROM support, but don't
select I2C support.  This causes I2C EEPROM support to be built into
the kernel, but I2C support may not be configured to be built in.
This leads to linker errors due to missing I2C symbols.

Arrange for I2C to be selected whenever EEPROM_AT24 is selected.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/Kconfig | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig
index 32f147998cd9..c0deacae778d 100644
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -63,6 +63,7 @@ config MACH_DAVINCI_EVM
 	depends on ARCH_DAVINCI_DM644x
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify the whether the board used
 	  for development is a DM644x EVM
@@ -72,6 +73,7 @@ config MACH_SFFSDR
 	depends on ARCH_DAVINCI_DM644x
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the Lyrtech Small Form Factor
 	  Software Defined Radio (SFFSDR) board.
@@ -105,6 +107,7 @@ config MACH_DAVINCI_DM6467_EVM
 	select MACH_DAVINCI_DM6467TEVM
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify the whether the board used
 	  for development is a DM6467 EVM
@@ -118,6 +121,7 @@ config MACH_DAVINCI_DM365_EVM
 	depends on ARCH_DAVINCI_DM365
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Configure this option to specify whether the board used
 	  for development is a DM365 EVM
@@ -129,6 +133,7 @@ config MACH_DAVINCI_DA830_EVM
 	select GPIO_PCF857X
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the TI DA830/OMAP-L137/AM17x Evaluation Module.
 
@@ -205,6 +210,7 @@ config MACH_MITYOMAPL138
 	depends on ARCH_DAVINCI_DA850
 	select MISC_DEVICES
 	select EEPROM_AT24
+	select I2C
 	help
 	  Say Y here to select the Critical Link MityDSP-L138/MityARM-1808
 	  System on Module.  Information on this SoM may be found at
-- 
cgit v1.2.3


From 87dc669ba25777b67796d7262c569429e58b1ed4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 8 Apr 2011 17:29:36 +0200
Subject: x86, hw_breakpoints: Fix racy access to ptrace breakpoints

While the tracer accesses ptrace breakpoints, the child task may
concurrently exit due to a SIGKILL and thus release its breakpoints
at the same time. We can then dereference some freed pointers.

To fix this, hold a reference on the child breakpoints before
manipulating them.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: v2.6.33.. <stable@kernel.org>
Link: http://lkml.kernel.org/r/1302284067-7860-3-git-send-email-fweisbec@gmail.com
---
 arch/x86/kernel/ptrace.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72a..f65e5b521dbd 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -608,6 +608,9 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
 	unsigned len, type;
 	struct perf_event *bp;
 
+	if (ptrace_get_breakpoints(tsk) < 0)
+		return -ESRCH;
+
 	data &= ~DR_CONTROL_RESERVED;
 	old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
 restore:
@@ -655,6 +658,9 @@ restore:
 		}
 		goto restore;
 	}
+
+	ptrace_put_breakpoints(tsk);
+
 	return ((orig_ret < 0) ? orig_ret : rc);
 }
 
@@ -668,10 +674,17 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
 
 	if (n < HBP_NUM) {
 		struct perf_event *bp;
+
+		if (ptrace_get_breakpoints(tsk) < 0)
+			return -ESRCH;
+
 		bp = thread->ptrace_bps[n];
 		if (!bp)
-			return 0;
-		val = bp->hw.info.address;
+			val = 0;
+		else
+			val = bp->hw.info.address;
+
+		ptrace_put_breakpoints(tsk);
 	} else if (n == 6) {
 		val = thread->debugreg6;
 	 } else if (n == 7) {
@@ -686,6 +699,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 	struct perf_event *bp;
 	struct thread_struct *t = &tsk->thread;
 	struct perf_event_attr attr;
+	int err = 0;
+
+	if (ptrace_get_breakpoints(tsk) < 0)
+		return -ESRCH;
 
 	if (!t->ptrace_bps[nr]) {
 		ptrace_breakpoint_init(&attr);
@@ -709,24 +726,23 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 		 * writing for the user. And anyway this is the previous
 		 * behaviour.
 		 */
-		if (IS_ERR(bp))
-			return PTR_ERR(bp);
+		if (IS_ERR(bp)) {
+			err = PTR_ERR(bp);
+			goto put;
+		}
 
 		t->ptrace_bps[nr] = bp;
 	} else {
-		int err;
-
 		bp = t->ptrace_bps[nr];
 
 		attr = bp->attr;
 		attr.bp_addr = addr;
 		err = modify_user_hw_breakpoint(bp, &attr);
-		if (err)
-			return err;
 	}
 
-
-	return 0;
+put:
+	ptrace_put_breakpoints(tsk);
+	return err;
 }
 
 /*
-- 
cgit v1.2.3


From 07fa7a0a8a586c01a8b416358c7012dcb9dc688d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 8 Apr 2011 17:29:36 +0200
Subject: powerpc, hw_breakpoints: Fix racy access to ptrace breakpoints

While the tracer accesses ptrace breakpoints, the child task may
concurrently exit due to a SIGKILL and thus release its breakpoints
at the same time. We can then dereference some freed pointers.

To fix this, hold a reference on the child breakpoints before
manipulating them.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Prasad <prasad@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: v2.6.33.. <stable@kernel.org>
Link: http://lkml.kernel.org/r/1302284067-7860-4-git-send-email-fweisbec@gmail.com
---
 arch/powerpc/kernel/ptrace.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 55613e33e263..4edeeb325429 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1591,7 +1591,10 @@ long arch_ptrace(struct task_struct *child, long request,
 	}
 
 	case PTRACE_SET_DEBUGREG:
+		if (ptrace_get_breakpoints(child) < 0)
+			return -ESRCH;
 		ret = ptrace_set_debugreg(child, addr, data);
+		ptrace_put_breakpoints(child);
 		break;
 
 #ifdef CONFIG_PPC64
-- 
cgit v1.2.3


From bf0b8f4b55e591ba417c2dbaff42769e1fc773b0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 8 Apr 2011 17:29:36 +0200
Subject: arm, hw_breakpoints: Fix racy access to ptrace breakpoints

While the tracer accesses ptrace breakpoints, the child task may
concurrently exit due to a SIGKILL and thus release its breakpoints
at the same time. We can then dereference some freed pointers.

To fix this, hold a reference on the child breakpoints before
manipulating them.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Link: http://lkml.kernel.org/r/1302284067-7860-5-git-send-email-fweisbec@gmail.com
---
 arch/arm/kernel/ptrace.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2bf27f364d09..8182f45ca493 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -767,12 +767,20 @@ long arch_ptrace(struct task_struct *child, long request,
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		case PTRACE_GETHBPREGS:
+			if (ptrace_get_breakpoints(child) < 0)
+				return -ESRCH;
+
 			ret = ptrace_gethbpregs(child, addr,
 						(unsigned long __user *)data);
+			ptrace_put_breakpoints(child);
 			break;
 		case PTRACE_SETHBPREGS:
+			if (ptrace_get_breakpoints(child) < 0)
+				return -ESRCH;
+
 			ret = ptrace_sethbpregs(child, addr,
 						(unsigned long __user *)data);
+			ptrace_put_breakpoints(child);
 			break;
 #endif
 
-- 
cgit v1.2.3


From e0ac8457d020c0289ea566917267da9e5e6d9865 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 8 Apr 2011 17:29:36 +0200
Subject: sh, hw_breakpoints: Fix racy access to ptrace breakpoints

While the tracer accesses ptrace breakpoints, the child task may
concurrently exit due to a SIGKILL and thus release its breakpoints
at the same time. We can then dereference some freed pointers.

To fix this, hold a reference on the child breakpoints before
manipulating them.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1302284067-7860-6-git-send-email-fweisbec@gmail.com
---
 arch/sh/kernel/ptrace_32.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 2130ca674e9b..3d7b209b2178 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -117,7 +117,11 @@ void user_enable_single_step(struct task_struct *child)
 
 	set_tsk_thread_flag(child, TIF_SINGLESTEP);
 
+	if (ptrace_get_breakpoints(child) < 0)
+		return;
+
 	set_single_step(child, pc);
+	ptrace_put_breakpoints(child);
 }
 
 void user_disable_single_step(struct task_struct *child)
-- 
cgit v1.2.3


From 39b68976ac653cfdc7f872a293e8b7928de2dcc6 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 25 Apr 2011 14:52:37 -0700
Subject: x86, setup: When probing memory with e801, use ax/bx as a pair

When we use BIOS function e801 to probe memory, we should use ax/bx
(or cx/dx) as a pair, not mix and match.  This was a typo during the
translation from assembly code, and breaks at least one set of
machines in the field (which return cx = dx = 0).

Reported-and-tested-by: Chris Samuel <chris@csamuel.org>
Fix-proposed-by: Thomas Meyer <thomas@m3y3r.de>
Link: http://lkml.kernel.org/r/1303566747.12067.10.camel@localhost.localdomain
---
 arch/x86/boot/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index cae3feb1035e..db75d07c3645 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -91,7 +91,7 @@ static int detect_memory_e801(void)
 	if (oreg.ax > 15*1024) {
 		return -1;	/* Bogus! */
 	} else if (oreg.ax == 15*1024) {
-		boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+		boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax;
 	} else {
 		/*
 		 * This ignores memory above 16MB if we have a memory
-- 
cgit v1.2.3


From 6ba5932ca4b610d036cb89d0ce2a465d06504c4d Mon Sep 17 00:00:00 2001
From: Oskar Andero <oskar.andero@sonyericsson.com>
Date: Tue, 26 Apr 2011 02:24:50 -0700
Subject: arm: omap2: enable smc instruction for sleep34xx

This fixes broken build when using binutils 2.21.

Signed-off-by: Oskar Andero <oskar.andero@sonyericsson.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index a45cd6409686..512b15204450 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -68,7 +68,7 @@ obj-$(CONFIG_OMAP_SMARTREFLEX)          += sr_device.o smartreflex.o
 obj-$(CONFIG_OMAP_SMARTREFLEX_CLASS3)	+= smartreflex-class3.o
 
 AFLAGS_sleep24xx.o			:=-Wa,-march=armv6
-AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a
+AFLAGS_sleep34xx.o			:=-Wa,-march=armv7-a$(plus_sec)
 
 ifeq ($(CONFIG_PM_VERBOSE),y)
 CFLAGS_pm_bus.o				+= -DDEBUG
-- 
cgit v1.2.3


From bc16b3777ec3749c086a17f81c99f8643f4a6576 Mon Sep 17 00:00:00 2001
From: omar ramirez <omar.ramirez@ti.com>
Date: Tue, 26 Apr 2011 02:24:50 -0700
Subject: OMAP3: l3: fix for "irq 10: nobody cared" message

If an error occurs in the L3 on any other initiator than MPU,
the interrupt goes unhandled given that the 'base' register
was calculated with the initialized err_source value (which
coincidentally points to MPU) and not with the actual source
of the error.

Removed parenthesis that are not needed for the touched lines.

Signed-off-by: Omar Ramirez Luna <omar.ramirez@ti.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_l3_smx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/omap_l3_smx.c b/arch/arm/mach-omap2/omap_l3_smx.c
index 5f2da7565b68..4321e7938929 100644
--- a/arch/arm/mach-omap2/omap_l3_smx.c
+++ b/arch/arm/mach-omap2/omap_l3_smx.c
@@ -196,11 +196,11 @@ static irqreturn_t omap3_l3_app_irq(int irq, void *_l3)
 		/* No timeout error for debug sources */
 	}
 
-	base = ((l3->rt) + (*(omap3_l3_bases[int_type] + err_source)));
-
 	/* identify the error source */
 	for (err_source = 0; !(status & (1 << err_source)); err_source++)
 									;
+
+	base = l3->rt + *(omap3_l3_bases[int_type] + err_source);
 	error = omap3_l3_readll(base, L3_ERROR_LOG);
 
 	if (error) {
-- 
cgit v1.2.3


From 26a064d5246e8ac51241d6ec2792aebf24f3b41a Mon Sep 17 00:00:00 2001
From: Felipe Contreras <felipe.contreras@gmail.com>
Date: Tue, 26 Apr 2011 02:45:28 -0700
Subject: omap: rx51: mark reserved memory earlier

So that omap_vram_set_sdram_vram() is called before
omap_vram_reserve_sdram_memblock().

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
Acked-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-rx51.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index e964895b80e8..f8ba20a14e62 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -141,14 +141,19 @@ static void __init rx51_init(void)
 static void __init rx51_map_io(void)
 {
 	omap2_set_globals_3xxx();
-	rx51_video_mem_init();
 	omap34xx_map_common_io();
 }
 
+static void __init rx51_reserve(void)
+{
+	rx51_video_mem_init();
+	omap_reserve();
+}
+
 MACHINE_START(NOKIA_RX51, "Nokia RX-51 board")
 	/* Maintainer: Lauri Leukkunen <lauri.leukkunen@nokia.com> */
 	.boot_params	= 0x80000100,
-	.reserve	= omap_reserve,
+	.reserve	= rx51_reserve,
 	.map_io		= rx51_map_io,
 	.init_early	= rx51_init_early,
 	.init_irq	= omap_init_irq,
-- 
cgit v1.2.3


From 919686458fabc67a13ffa412f9e5a8fed46d10b8 Mon Sep 17 00:00:00 2001
From: Shweta Gulati <shweta.gulati@ti.com>
Date: Tue, 26 Apr 2011 02:32:26 -0700
Subject: OMAP4: Intialize IVA Device in addition to DSP device.

OMAP4 has two different Devices IVA and DSP. DSP is bound
with IVA for DVFS. The registration of IVA dev in API
'omap2_init_processor_devices' was missing. Init dev for
'iva_dev' is added.

This also fixes the following error seen during boot as
omap2_set_init_voltage can now find the iva device

	omap2_set_init_voltage: Invalid parameters!
	omap2_set_init_voltage: Unable to put vdd_iva to its init voltage

Signed-off-by: Shweta Gulati <shweta.gulati@ti.com>
Acked-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/pm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 30af3351c2d6..49486f522dca 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -89,6 +89,7 @@ static void omap2_init_processor_devices(void)
 	if (cpu_is_omap44xx()) {
 		_init_omap_device("l3_main_1", &l3_dev);
 		_init_omap_device("dsp", &dsp_dev);
+		_init_omap_device("iva", &iva_dev);
 	} else {
 		_init_omap_device("l3_main", &l3_dev);
 	}
-- 
cgit v1.2.3


From 3f126087ee143775961947b39416aad03044c988 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 26 Apr 2011 02:33:10 -0700
Subject: OMAP3+: voltage: remove initial voltage

Blindly setting 1.2V in the initial structure may not even match the
default voltages stored in the voltage table which are supported for
the domain. For example, OMAP3430 core domain does not use 1.2V and
ends up generating a warning on the first transition.

Further, since omap2_set_init_voltage is called as part of the pm
framework's initialization sequence to configure the voltage required
for the current OPP, the call does(and has to) setup the system
voltage(curr_volt as a result) using the right mechanisms appropriate
for the system at that point of time. This also overrides
initialization we are currently doing in voltage.c making it
redundant. So, remove the wrong and redundant initialization.

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/voltage.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c
index 6fb520999b6e..0c1552d9d995 100644
--- a/arch/arm/mach-omap2/voltage.c
+++ b/arch/arm/mach-omap2/voltage.c
@@ -114,7 +114,6 @@ static int __init _config_common_vdd_data(struct omap_vdd_info *vdd)
 	sys_clk_speed /= 1000;
 
 	/* Generic voltage parameters */
-	vdd->curr_volt = 1200000;
 	vdd->volt_scale = vp_forceupdate_scale_voltage;
 	vdd->vp_enabled = false;
 
-- 
cgit v1.2.3


From 18a073a3acd3a47fbb5e23333df7fad28d576345 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 26 Apr 2011 13:24:33 +0200
Subject: perf, x86: Fix BTS condition

Currently the x86 backend incorrectly assumes that any BRANCH_INSN
with sample_period==1 is a BTS request. This is not true when we do
frequency driven profiling such as 'perf record -e branches'.

Solves this error:

  $ perf record -e branches ./array
  Error: sys_perf_event_open() syscall returned with 95 (Operation not supported).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Ingo Molnar <mingo@elte.hu>
Cc: "Metzger, Markus T" <markus.t.metzger@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-rd2y4ct71hjawzz6fpvsy9hg@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c       | 4 ++--
 arch/x86/kernel/cpu/perf_event_intel.c | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 632e5dc9c9c0..fac0654021b8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -613,8 +613,8 @@ static int x86_setup_perfctr(struct perf_event *event)
 	/*
 	 * Branch tracing:
 	 */
-	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1)) {
+	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+	    !attr->freq && hwc->sample_period == 1) {
 		/* BTS is not supported by this architecture. */
 		if (!x86_pmu.bts_active)
 			return -EOPNOTSUPP;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 43fa20b13817..9194b0698d63 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -998,6 +998,9 @@ intel_bts_constraints(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned int hw_event, bts_event;
 
+	if (event->attr.freq)
+		return NULL;
+
 	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
 	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
-- 
cgit v1.2.3


From ec75a71634dabe439db91c1ef51d5099f4493808 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 Apr 2011 11:51:41 +0200
Subject: perf events, x86: Work around the Nehalem AAJ80 erratum

On Nehalem CPUs the retired branch-misses event can be completely bogus,
when there are no branch-misses occuring. When there are a lot of branch
misses then the count is pretty accurate. Still, this leaves us with an
event that over-counts a lot.

Detect this erratum and work it around by using BR_MISP_EXEC.ANY events.
These will also count speculated branches but still it's a lot more
precise in practice than the architectural event.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-yyfg0bxo9jsqxd6a0ovfny27@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9194b0698d63..9ae4a2aa7398 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -25,7 +25,7 @@ struct intel_percore {
 /*
  * Intel PerfMon, used on Core and later.
  */
-static const u64 intel_perfmon_event_map[] =
+static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
 {
   [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
   [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
@@ -1308,7 +1308,7 @@ static void intel_clovertown_quirks(void)
 	 * AJ106 could possibly be worked around by not allowing LBR
 	 *       usage from PEBS, including the fixup.
 	 * AJ68  could possibly be worked around by always programming
-	 * 	 a pebs_event_reset[0] value and coping with the lost events.
+	 *	 a pebs_event_reset[0] value and coping with the lost events.
 	 *
 	 * But taken together it might just make sense to not enable PEBS on
 	 * these chips.
@@ -1412,6 +1412,18 @@ static __init int intel_pmu_init(void)
 		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
+
+		if (ebx & 0x40) {
+			/*
+			 * Erratum AAJ80 detected, we work it around by using
+			 * the BR_MISP_EXEC.ANY event. This will over-count
+			 * branch-misses, but it's still much better than the
+			 * architectural event which is often completely bogus:
+			 */
+			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+
+			pr_cont("erratum AAJ80 worked around, ");
+		}
 		pr_cont("Nehalem events, ");
 		break;
 
-- 
cgit v1.2.3


From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 24 Apr 2011 08:18:31 +0200
Subject: perf events: Add stalled cycles generic event -
 PERF_COUNT_HW_STALLED_CYCLES

The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
cycles the CPU does nothing useful, because it is stalled on a
cache-miss or some other condition.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 +++
 include/linux/perf_event.h             | 1 +
 tools/perf/util/parse-events.c         | 1 +
 tools/perf/util/python.c               | 1 +
 4 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9ae4a2aa7398..efa2704c9dfd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,6 +1413,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
+		/* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2;
+
 		if (ebx & 0x40) {
 			/*
 			 * Erratum AAJ80 detected, we work it around by using
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee9f1e782800..ac636dd20a0c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES		= 7,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 952b4ae3d954..1869e4c646db 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -38,6 +38,7 @@ static struct event_symbol event_symbols[] = {
   { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
   { CHW(BRANCH_MISSES),		"branch-misses",	""		},
   { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+  { CHW(STALLED_CYCLES),	"stalled-cycles",	""		},
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index f5e38451fdc5..406f613ee619 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -798,6 +798,7 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
+	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },
-- 
cgit v1.2.3


From 5c543e3c442d6382db127152c7096ca6a55283de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 27 Apr 2011 12:02:04 +0200
Subject: perf events, x86: Mark constrant tables read mostly

Various constraint tables were not marked read-mostly.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-wpqwwvmhxucy5e718wnamjiv@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index efa2704c9dfd..067a48b13a76 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -36,7 +36,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
-static struct event_constraint intel_core_event_constraints[] =
+static struct event_constraint intel_core_event_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -47,7 +47,7 @@ static struct event_constraint intel_core_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_core2_event_constraints[] =
+static struct event_constraint intel_core2_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -70,7 +70,7 @@ static struct event_constraint intel_core2_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_nehalem_event_constraints[] =
+static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -86,19 +86,19 @@ static struct event_constraint intel_nehalem_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_nehalem_extra_regs[] =
+static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
 	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_nehalem_percore_constraints[] =
+static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0xb7, 0),
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_westmere_event_constraints[] =
+static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -110,7 +110,7 @@ static struct event_constraint intel_westmere_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_snb_event_constraints[] =
+static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
@@ -123,21 +123,21 @@ static struct event_constraint intel_snb_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_westmere_extra_regs[] =
+static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
 	INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
 	INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_westmere_percore_constraints[] =
+static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
 {
 	INTEL_EVENT_CONSTRAINT(0xb7, 0),
 	INTEL_EVENT_CONSTRAINT(0xbb, 0),
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_gen_event_constraints[] =
+static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-- 
cgit v1.2.3


From 76b4eda866c4936af8d696f040abea56bf688e16 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 14 Apr 2011 22:32:01 +0000
Subject: powerpc: Add A2 cpu support

Add the cputable entry, regs and setup & restore entries for
the PowerPC A2 core.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h    |   8 +-
 arch/powerpc/include/asm/reg_a2.h      | 156 +++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/Makefile           |   1 +
 arch/powerpc/kernel/cpu_setup_a2.S     | 114 ++++++++++++++++++++++++
 arch/powerpc/kernel/cputable.c         |  25 +++++-
 arch/powerpc/platforms/Kconfig.cputype |   4 +
 6 files changed, 304 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/include/asm/reg_a2.h
 create mode 100644 arch/powerpc/kernel/cpu_setup_a2.S

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 2fe37d781933..2d71523ebb03 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -437,9 +437,13 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
 
+#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
+	    CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_16M_PAGE)
+
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E5500)
+#define CPU_FTRS_POSSIBLE	(CPU_FTRS_E5500 | CPU_FTRS_A2)
 #else
 #define CPU_FTRS_POSSIBLE	\
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
@@ -488,7 +492,7 @@ enum {
 
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_ALWAYS		(CPU_FTRS_E5500)
+#define CPU_FTRS_ALWAYS		(CPU_FTRS_E5500 & CPU_FTRS_A2)
 #else
 #define CPU_FTRS_ALWAYS		\
 	    (CPU_FTRS_POWER3 & CPU_FTRS_RS64 & CPU_FTRS_POWER4 &	\
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
new file mode 100644
index 000000000000..3ba9c6f096fc
--- /dev/null
+++ b/arch/powerpc/include/asm/reg_a2.h
@@ -0,0 +1,156 @@
+/*
+ *  Register definitions specific to the A2 core
+ *
+ *  Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ASM_POWERPC_REG_A2_H__
+#define __ASM_POWERPC_REG_A2_H__
+
+#define SPRN_TENSR	0x1b5
+#define SPRN_TENS	0x1b6	/* Thread ENable Set */
+#define SPRN_TENC	0x1b7	/* Thread ENable Clear */
+
+#define SPRN_A2_CCR0	0x3f0	/* Core Configuration Register 0 */
+#define SPRN_A2_CCR1	0x3f1	/* Core Configuration Register 1 */
+#define SPRN_A2_CCR2	0x3f2	/* Core Configuration Register 2 */
+#define SPRN_MMUCR0	0x3fc	/* MMU Control Register 0 */
+#define SPRN_MMUCR1	0x3fd	/* MMU Control Register 1 */
+#define SPRN_MMUCR2	0x3fe	/* MMU Control Register 2 */
+#define SPRN_MMUCR3	0x3ff	/* MMU Control Register 3 */
+
+#define SPRN_IAR	0x372
+
+#define SPRN_IUCR0	0x3f3
+#define IUCR0_ICBI_ACK	0x1000
+
+#define SPRN_XUCR0	0x3f6	/* Execution Unit Config Register 0 */
+
+#define A2_IERAT_SIZE	16
+#define A2_DERAT_SIZE	32
+
+/* A2 MMUCR0 bits */
+#define MMUCR0_ECL	0x80000000	/* Extended Class for TLB fills */
+#define MMUCR0_TID_NZ	0x40000000	/* TID is non-zero */
+#define MMUCR0_TS	0x10000000	/* Translation space for TLB fills */
+#define MMUCR0_TGS	0x20000000	/* Guest space for TLB fills */
+#define MMUCR0_TLBSEL	0x0c000000	/* TLB or ERAT target for TLB fills */
+#define MMUCR0_TLBSEL_U	0x00000000	/*  TLBSEL = UTLB */
+#define MMUCR0_TLBSEL_I	0x08000000	/*  TLBSEL = I-ERAT */
+#define MMUCR0_TLBSEL_D	0x0c000000	/*  TLBSEL = D-ERAT */
+#define MMUCR0_LOCKSRSH	0x02000000	/* Use TLB lock on tlbsx. */
+#define MMUCR0_TID_MASK	0x000000ff	/* TID field */
+
+/* A2 MMUCR1 bits */
+#define MMUCR1_IRRE		0x80000000	/* I-ERAT round robin enable */
+#define MMUCR1_DRRE		0x40000000	/* D-ERAT round robin enable */
+#define MMUCR1_REE		0x20000000	/* Reference Exception Enable*/
+#define MMUCR1_CEE		0x10000000	/* Change exception enable */
+#define MMUCR1_CSINV_ALL	0x00000000	/* Inval ERAT on all CS evts */
+#define MMUCR1_CSINV_NISYNC	0x04000000	/* Inval ERAT on all ex isync*/
+#define MMUCR1_CSINV_NEVER	0x0c000000	/* Don't inval ERAT on CS */
+#define MMUCR1_ICTID		0x00080000	/* IERAT class field as TID */
+#define MMUCR1_ITTID		0x00040000	/* IERAT thdid field as TID */
+#define MMUCR1_DCTID		0x00020000	/* DERAT class field as TID */
+#define MMUCR1_DTTID		0x00010000	/* DERAT thdid field as TID */
+#define MMUCR1_DCCD		0x00008000	/* DERAT class ignore */
+#define MMUCR1_TLBWE_BINV	0x00004000	/* back invalidate on tlbwe */
+
+/* A2 MMUCR2 bits */
+#define MMUCR2_PSSEL_SHIFT	4
+
+/* A2 MMUCR3 bits */
+#define MMUCR3_THID		0x0000000f	/* Thread ID */
+
+/* *** ERAT TLB bits definitions */
+#define TLB0_EPN_MASK		ASM_CONST(0xfffffffffffff000)
+#define TLB0_CLASS_MASK		ASM_CONST(0x0000000000000c00)
+#define TLB0_CLASS_00		ASM_CONST(0x0000000000000000)
+#define TLB0_CLASS_01		ASM_CONST(0x0000000000000400)
+#define TLB0_CLASS_10		ASM_CONST(0x0000000000000800)
+#define TLB0_CLASS_11		ASM_CONST(0x0000000000000c00)
+#define TLB0_V			ASM_CONST(0x0000000000000200)
+#define TLB0_X			ASM_CONST(0x0000000000000100)
+#define TLB0_SIZE_MASK		ASM_CONST(0x00000000000000f0)
+#define TLB0_SIZE_4K		ASM_CONST(0x0000000000000010)
+#define TLB0_SIZE_64K		ASM_CONST(0x0000000000000030)
+#define TLB0_SIZE_1M		ASM_CONST(0x0000000000000050)
+#define TLB0_SIZE_16M		ASM_CONST(0x0000000000000070)
+#define TLB0_SIZE_1G		ASM_CONST(0x00000000000000a0)
+#define TLB0_THDID_MASK		ASM_CONST(0x000000000000000f)
+#define TLB0_THDID_0		ASM_CONST(0x0000000000000001)
+#define TLB0_THDID_1		ASM_CONST(0x0000000000000002)
+#define TLB0_THDID_2		ASM_CONST(0x0000000000000004)
+#define TLB0_THDID_3		ASM_CONST(0x0000000000000008)
+#define TLB0_THDID_ALL		ASM_CONST(0x000000000000000f)
+
+#define TLB1_RESVATTR		ASM_CONST(0x00f0000000000000)
+#define TLB1_U0			ASM_CONST(0x0008000000000000)
+#define TLB1_U1			ASM_CONST(0x0004000000000000)
+#define TLB1_U2			ASM_CONST(0x0002000000000000)
+#define TLB1_U3			ASM_CONST(0x0001000000000000)
+#define TLB1_R			ASM_CONST(0x0000800000000000)
+#define TLB1_C			ASM_CONST(0x0000400000000000)
+#define TLB1_RPN_MASK		ASM_CONST(0x000003fffffff000)
+#define TLB1_W			ASM_CONST(0x0000000000000800)
+#define TLB1_I			ASM_CONST(0x0000000000000400)
+#define TLB1_M			ASM_CONST(0x0000000000000200)
+#define TLB1_G			ASM_CONST(0x0000000000000100)
+#define TLB1_E			ASM_CONST(0x0000000000000080)
+#define TLB1_VF			ASM_CONST(0x0000000000000040)
+#define TLB1_UX			ASM_CONST(0x0000000000000020)
+#define TLB1_SX			ASM_CONST(0x0000000000000010)
+#define TLB1_UW			ASM_CONST(0x0000000000000008)
+#define TLB1_SW			ASM_CONST(0x0000000000000004)
+#define TLB1_UR			ASM_CONST(0x0000000000000002)
+#define TLB1_SR			ASM_CONST(0x0000000000000001)
+
+/* A2 erativax attributes definitions */
+#define ERATIVAX_RS_IS_ALL		0x000
+#define ERATIVAX_RS_IS_TID		0x040
+#define ERATIVAX_RS_IS_CLASS		0x080
+#define ERATIVAX_RS_IS_FULLMATCH	0x0c0
+#define ERATIVAX_CLASS_00		0x000
+#define ERATIVAX_CLASS_01		0x010
+#define ERATIVAX_CLASS_10		0x020
+#define ERATIVAX_CLASS_11		0x030
+#define ERATIVAX_PSIZE_4K		(TLB_PSIZE_4K >> 1)
+#define ERATIVAX_PSIZE_64K		(TLB_PSIZE_64K >> 1)
+#define ERATIVAX_PSIZE_1M		(TLB_PSIZE_1M >> 1)
+#define ERATIVAX_PSIZE_16M		(TLB_PSIZE_16M >> 1)
+#define ERATIVAX_PSIZE_1G		(TLB_PSIZE_1G >> 1)
+
+/* A2 eratilx attributes definitions */
+#define ERATILX_T_ALL			0
+#define ERATILX_T_TID			1
+#define ERATILX_T_TGS			2
+#define ERATILX_T_FULLMATCH		3
+#define ERATILX_T_CLASS0		4
+#define ERATILX_T_CLASS1		5
+#define ERATILX_T_CLASS2		6
+#define ERATILX_T_CLASS3		7
+
+/* XUCR0 bits */
+#define XUCR0_TRACE_UM_T0		0x40000000	/* Thread 0 */
+#define XUCR0_TRACE_UM_T1		0x20000000	/* Thread 1 */
+#define XUCR0_TRACE_UM_T2		0x10000000	/* Thread 2 */
+#define XUCR0_TRACE_UM_T3		0x08000000	/* Thread 3 */
+
+/* A2 CCR0 register */
+#define A2_CCR0_PME_DISABLED		0x00000000
+#define A2_CCR0_PME_SLEEP		0x40000000
+#define A2_CCR0_PME_RVW			0x80000000
+#define A2_CCR0_PME_DISABLED2		0xc0000000
+
+/* A2 CCR2 register */
+#define A2_CCR2_ERAT_ONLY_MODE		0x00000001
+#define A2_CCR2_ENABLE_ICSWX		0x00000002
+#define A2_CCR2_ENABLE_PC		0x20000000
+#define A2_CCR2_ENABLE_TRACE		0x40000000
+
+#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0fd6273bb8a9..058bc8bac488 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power7.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
new file mode 100644
index 000000000000..7f818feaa7a5
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -0,0 +1,114 @@
+/*
+ *  A2 specific assembly support code
+ *
+ *  Copyright 2009 Ben Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/reg_a2.h>
+#include <asm/reg.h>
+#include <asm/thread_info.h>
+
+/*
+ * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity.
+ * This also prevents external LPID accesses but that isn't a problem when not a
+ * guest. Under PV, this setting will be ignored and MMUCR will return the right
+ * number of PID bits we can use.
+ */
+#define MMUCR1_EXTEND_PID \
+	(MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \
+	 MMUCR1_DTTID | MMUCR1_DCCD)
+
+/*
+ * Use extended PIDs if enabled.
+ * Don't clear the ERATs on context sync events and enable I & D LRU.
+ * Enable ERAT back invalidate when tlbwe overwrites an entry.
+ */
+#define INITIAL_MMUCR1 \
+	(MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \
+	 MMUCR1_DRRE | MMUCR1_TLBWE_BINV)
+
+_GLOBAL(__setup_cpu_a2)
+	/* Some of these are actually thread local and some are
+	 * core local but doing it always won't hurt
+	 */
+
+#ifdef CONFIG_PPC_WSP_COPRO
+	/* Make sure ACOP starts out as zero */
+	li	r3,0
+	mtspr   SPRN_ACOP,r3
+
+	/* Enable icswx instruction */
+	mfspr   r3,SPRN_A2_CCR2
+	ori     r3,r3,A2_CCR2_ENABLE_ICSWX
+	mtspr   SPRN_A2_CCR2,r3
+
+	/* Unmask all CTs in HACOP */
+	li      r3,-1
+	mtspr   SPRN_HACOP,r3
+#endif /* CONFIG_PPC_WSP_COPRO */
+
+	/* Enable doorbell */
+	mfspr   r3,SPRN_A2_CCR2
+	oris     r3,r3,A2_CCR2_ENABLE_PC@h
+	mtspr   SPRN_A2_CCR2,r3
+	isync
+
+	/* Setup CCR0 to disable power saving for now as it's busted
+	 * in the current implementations. Setup CCR1 to wake on
+	 * interrupts normally (we write the default value but who
+	 * knows what FW may have clobbered...)
+	 */
+	li	r3,0
+	mtspr	SPRN_A2_CCR0, r3
+	LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f)
+	mtspr	SPRN_A2_CCR1, r3
+
+	/* Initialise MMUCR1 */
+	lis	r3,INITIAL_MMUCR1@h
+	ori	r3,r3,INITIAL_MMUCR1@l
+	mtspr	SPRN_MMUCR1,r3
+
+	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+	LOAD_REG_IMMEDIATE(r3, 0x000a7531)
+	mtspr	SPRN_MMUCR2,r3
+
+	/* Set MMUCR3 to write all thids bit to the TLB */
+	LOAD_REG_IMMEDIATE(r3, 0x0000000f)
+	mtspr	SPRN_MMUCR3,r3
+
+	/* Don't do ERAT stuff if running guest mode */
+	mfmsr	r3
+	andis.	r0,r3,MSR_GS@h
+	bne	1f
+
+	/* Now set the I-ERAT watermark to 15 */
+	lis	r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
+	mtspr	SPRN_MMUCR0, r4
+	li	r4,A2_IERAT_SIZE-1
+	PPC_ERATWE(r4,r4,3)
+
+	/* Now set the D-ERAT watermark to 31 */
+	lis	r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
+	mtspr	SPRN_MMUCR0, r4
+	li	r4,A2_DERAT_SIZE-1
+	PPC_ERATWE(r4,r4,3)
+
+	/* And invalidate the beast just in case. That won't get rid of
+	 * a bolted entry though it will be in LRU and so will go away eventually
+	 * but let's not bother for now
+	 */
+	PPC_ERATILX(0,0,0)
+1:
+	blr
+
+_GLOBAL(__restore_cpu_a2)
+	b	__setup_cpu_a2
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b65b4908d3c7..3d7b65ad4962 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_pa6t(void);
 extern void __restore_cpu_ppc970(void);
 extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power7(void);
+extern void __restore_cpu_a2(void);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -2011,7 +2013,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_PPC32 */
 #endif /* CONFIG_E500 */
 
-#ifdef CONFIG_PPC_BOOK3E_64
+#ifdef CONFIG_PPC_A2
+	{	/* Standard A2 (>= DD2) + FPU core */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00480000,
+		.cpu_name		= "A2 (>= DD2)",
+		.cpu_features		= CPU_FTRS_A2,
+		.cpu_user_features	= COMMON_USER_PPC64,
+		.mmu_features		= MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
+					  MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL |
+					  MMU_FTR_USE_TLBRSRV |
+					  MMU_FTR_USE_PAIRED_MAS,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 0,
+		.cpu_setup		= __setup_cpu_a2,
+		.cpu_restore		= __restore_cpu_a2,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppca2",
+	},
 	{	/* This is a default entry to get going, to be replaced by
 		 * a real one at some stage
 		 */
@@ -2032,7 +2053,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_generic,
 		.platform		= "power6",
 	},
-#endif
+#endif /* CONFIG_PPC_A2 */
 };
 
 static struct cpu_spec the_cpu_spec;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 111138c55f9c..7c1e1c64437b 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -107,6 +107,10 @@ config POWER4
 	depends on PPC64 && PPC_BOOK3S
 	def_bool y
 
+config PPC_A2
+	bool
+	depends on PPC_BOOK3E_64
+
 config TUNE_CELL
 	bool "Optimize for Cell Broadband Engine"
 	depends on PPC64 && PPC_BOOK3S
-- 
cgit v1.2.3


From bd491781097f150687906008d639936a0c00ed90 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 14 Apr 2011 22:32:02 +0000
Subject: powerpc: Add TLB size detection for TYPE_3E MMUs

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu-book3e.h | 15 +++++++++++++++
 arch/powerpc/mm/mmu_context_nohash.c  | 12 +++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 17194fcd4040..80d68afb0200 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -137,6 +137,21 @@
 #define MMUCSR0_TLB2PS	0x00078000	/* TLB2 Page Size */
 #define MMUCSR0_TLB3PS	0x00780000	/* TLB3 Page Size */
 
+/* MMUCFG bits */
+#define MMUCFG_MAVN_NASK	0x00000003
+#define MMUCFG_MAVN_V1_0	0x00000000
+#define MMUCFG_MAVN_V2_0	0x00000001
+#define MMUCFG_NTLB_MASK	0x0000000c
+#define MMUCFG_NTLB_SHIFT	2
+#define MMUCFG_PIDSIZE_MASK	0x000007c0
+#define MMUCFG_PIDSIZE_SHIFT	6
+#define MMUCFG_TWC		0x00008000
+#define MMUCFG_LRAT		0x00010000
+#define MMUCFG_RASIZE_MASK	0x00fe0000
+#define MMUCFG_RASIZE_SHIFT	17
+#define MMUCFG_LPIDSIZE_MASK	0x0f000000
+#define MMUCFG_LPIDSIZE_SHIFT	24
+
 /* TLBnCFG encoding */
 #define TLBnCFG_N_ENTRY		0x00000fff	/* number of entries */
 #define TLBnCFG_HES		0x00002000	/* HW select supported */
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4d8fa911c73d..336807de550e 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -409,7 +409,17 @@ void __init mmu_context_init(void)
 	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
 		first_context = 1;
 		last_context = 65535;
-	} else {
+	} else
+#ifdef CONFIG_PPC_BOOK3E_MMU
+	if (mmu_has_feature(MMU_FTR_TYPE_3E)) {
+		u32 mmucfg = mfspr(SPRN_MMUCFG);
+		u32 pid_bits = (mmucfg & MMUCFG_PIDSIZE_MASK)
+				>> MMUCFG_PIDSIZE_SHIFT;
+		first_context = 1;
+		last_context = (1UL << (pid_bits + 1)) - 1;
+	} else
+#endif
+	{
 		first_context = 1;
 		last_context = 255;
 	}
-- 
cgit v1.2.3


From ca1769f7a372898f5e3dbb8e4ff53f53f0626ef4 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 14 Apr 2011 22:32:04 +0000
Subject: powerpc: Index crit/dbg/mcheck stacks using cpu number on 64bit

In exc_lvl_ctx_init() we index into the crit/dbg/mcheck stacks using
the hard cpu id, but that assumes the hard cpu id is zero based and
contiguous. That is not the case on A2.

The root of the problem is that the 32bit code has no equivalent of the
paca to allow it to do the hard->soft mapping in assembler. Until the
32bit code is updated to handle that, index the stacks using the soft
cpu ids on 64bit and hard on 32 bit.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f621b7d2d869..ea09512a68c3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -397,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
 void exc_lvl_ctx_init(void)
 {
 	struct thread_info *tp;
-	int i, hw_cpu;
+	int i, cpu_nr;
 
 	for_each_possible_cpu(i) {
-		hw_cpu = get_hard_smp_processor_id(i);
-		memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = critirq_ctx[hw_cpu];
-		tp->cpu = i;
+#ifdef CONFIG_PPC64
+		cpu_nr = i;
+#else
+		cpu_nr = get_hard_smp_processor_id(i);
+#endif
+		memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = critirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = 0;
 
 #ifdef CONFIG_BOOKE
-		memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = dbgirq_ctx[hw_cpu];
-		tp->cpu = i;
+		memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = dbgirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = 0;
 
-		memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE);
-		tp = mcheckirq_ctx[hw_cpu];
-		tp->cpu = i;
+		memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
+		tp = mcheckirq_ctx[cpu_nr];
+		tp->cpu = cpu_nr;
 		tp->preempt_count = HARDIRQ_OFFSET;
 #endif
 	}
-- 
cgit v1.2.3


From 1a51dde139d5305b2592c716c50c005d6ab9624b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 14 Apr 2011 22:32:04 +0000
Subject: powerpc/book3e: Use way 3 for linear mapping bolted entry

An erratum on A2 can lead to the bolted entry we insert for the linear
mapping being evicted, to avoid that write the bolted entry to way 3.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/exceptions-64e.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 9651acc3504a..8fe0fc233f02 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -864,8 +864,9 @@ have_hes:
 	 * that will have to be made dependent on whether we are running under
 	 * a hypervisor I suppose.
 	 */
-	ori	r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS
-	mtspr	SPRN_MAS0,r3
+	ori	r11,r3,MAS0_WQ_ALLWAYS
+	oris	r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+	mtspr	SPRN_MAS0,r11
 	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
 	ori	r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
 	mtspr	SPRN_MAS1,r3
-- 
cgit v1.2.3


From f0aae3238fc1c28b543cbaaa0e7c5d57685f5f89 Mon Sep 17 00:00:00 2001
From: Jack Miller <jack@codezen.org>
Date: Thu, 14 Apr 2011 22:32:05 +0000
Subject: powerpc/book3e: Flush IPROT protected TLB entries leftover by
 firmware

When we set up the TLB for ourselves on Book3E, we need to flush out any
old mappings established by the firmware or bootloader.  At present we
attempt this with a tlbilx to flush everything, but this will leave behind
any entries with the IPROT bit set.

There are several good reason firmware might establish mappings with IPROT,
and in fact ePAPR compliant firmwares are required to establish their
initial mapped area with IPROT.

This patch, therefore adds more complex code to scan through the TLB upon
entry and flush away any entries that are not our own.

Signed-off-by: Jack Miller <jack@codezen.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu-book3e.h |  1 +
 arch/powerpc/kernel/exceptions-64e.S  | 45 ++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 80d68afb0200..ec61e7b998c0 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -43,6 +43,7 @@
 #define MAS0_TLBSEL(x)		(((x) << 28) & 0x30000000)
 #define MAS0_ESEL(x)		(((x) << 16) & 0x0FFF0000)
 #define MAS0_NV(x)		((x) & 0x00000FFF)
+#define MAS0_ESEL_MASK		0x0FFF0000
 #define MAS0_HES		0x00004000
 #define MAS0_WQ_ALLWAYS		0x00000000
 #define MAS0_WQ_COND		0x00001000
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 8fe0fc233f02..23bd83b20be4 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -886,8 +886,51 @@ have_hes:
 	bctr
 
 1:	/* We are now running at PAGE_OFFSET, clean the TLB of everything
-	 * else (XXX we should scan for bolted crap from the firmware too)
+	 * else (including IPROTed things left by firmware)
+	 * r4 = TLBnCFG
+	 * r3 = current address (more or less)
 	 */
+
+	li	r5,0
+	mtspr	SPRN_MAS6,r5
+	tlbsx	0,r3
+
+	rlwinm	r9,r4,0,TLBnCFG_N_ENTRY
+	rlwinm	r10,r4,8,0xff
+	addi	r10,r10,-1	/* Get inner loop mask */
+
+	li	r3,1
+
+	mfspr	r5,SPRN_MAS1
+	rlwinm	r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+	mfspr	r6,SPRN_MAS2
+	rldicr	r6,r6,0,51		/* Extract EPN */
+
+	mfspr	r7,SPRN_MAS0
+	rlwinm	r7,r7,0,0xffff0fff	/* Clear HES and WQ */
+
+	rlwinm	r8,r7,16,0xfff		/* Extract ESEL */
+
+2:	add	r4,r3,r8
+	and	r4,r4,r10
+
+	rlwimi	r7,r4,16,MAS0_ESEL_MASK
+
+	mtspr	SPRN_MAS0,r7
+	mtspr	SPRN_MAS1,r5
+	mtspr	SPRN_MAS2,r6
+	tlbwe
+
+	addi	r3,r3,1
+	and.	r4,r3,r10
+
+	bne	3f
+	addis	r6,r6,(1<<30)@h
+3:
+	cmpw	r3,r9
+	blt	2b
+
 	PPC_TLBILX(0,0,0)
 	sync
 	isync
-- 
cgit v1.2.3


From efcac6589a277c10060e4be44b9455cf43838dc1 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@au1.ibm.com>
Date: Wed, 2 Mar 2011 15:18:48 +0000
Subject: powerpc: Per process DSCR + some fixes (try#4)

The DSCR (aka Data Stream Control Register) is supported on some
server PowerPC chips and allow some control over the prefetch
of data streams.

This patch allows the value to be specified per thread by emulating
the corresponding mfspr and mtspr instructions. Children of such
threads inherit the value. Other threads use a default value that
can be specified in sysfs - /sys/devices/system/cpu/dscr_default.

If a thread starts with non default value in the sysfs entry,
all children threads inherit this non default value even if
the sysfs value is changed later.

Signed-off-by: Alexey Kardashevskiy <aik@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/emulated_ops.h |  4 ++++
 arch/powerpc/include/asm/ppc-opcode.h   |  4 ++++
 arch/powerpc/include/asm/processor.h    |  4 ++++
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kernel/entry_64.S          | 15 +++++++++++++
 arch/powerpc/kernel/process.c           | 16 ++++++++++++++
 arch/powerpc/kernel/sysfs.c             | 38 +++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/traps.c             | 24 +++++++++++++++++++++
 8 files changed, 106 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f0fb4fc1f6e6..45921672b97a 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -52,6 +52,10 @@ extern struct ppc_emulated {
 #ifdef CONFIG_VSX
 	struct ppc_emulated_entry vsx;
 #endif
+#ifdef CONFIG_PPC64
+	struct ppc_emulated_entry mfdscr;
+	struct ppc_emulated_entry mtdscr;
+#endif
 } ppc_emulated;
 
 extern u32 ppc_warn_emulated;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 3e25b258568e..e472659d906c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -41,6 +41,10 @@
 #define PPC_INST_RFCI			0x4c000066
 #define PPC_INST_RFDI			0x4c00004e
 #define PPC_INST_RFMCI			0x4c00004c
+#define PPC_INST_MFSPR_DSCR		0x7c1102a6
+#define PPC_INST_MFSPR_DSCR_MASK	0xfc1fffff
+#define PPC_INST_MTSPR_DSCR		0x7c1103a6
+#define PPC_INST_MTSPR_DSCR_MASK	0xfc1fffff
 
 #define PPC_INST_STRING			0x7c00042a
 #define PPC_INST_STRING_MASK		0xfc0007fe
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index de1967a1ff57..d50c2b6d9bc3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -238,6 +238,10 @@ struct thread_struct {
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	void*		kvm_shadow_vcpu; /* KVM internal data */
 #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
+#ifdef CONFIG_PPC64
+	unsigned long	dscr;
+	int		dscr_inherit;
+#endif
 };
 
 #define ARCH_MIN_TASKALIGN 16
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 23e6a93145ab..6887661ac072 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -74,6 +74,7 @@ int main(void)
 	DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
+	DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
 #else
 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index dbf5bfafd7bc..64693706ebfd 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -421,6 +421,12 @@ BEGIN_FTR_SECTION
 	std	r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	mfspr	r25,SPRN_DSCR
+	std	r25,THREAD_DSCR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
 	and.	r0,r0,r22
 	beq+	1f
 	andc	r22,r22,r0
@@ -522,6 +528,15 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+	ld	r0,THREAD_DSCR(r4)
+	cmpd	r0,r25
+	beq	1f
+	mtspr	SPRN_DSCR,r0
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
 
 	/* r3-r13 are destroyed -- Cort */
 	REST_8GPRS(14, r1)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f74f355a9617..a01c2d93fd2f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -702,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk)
 /*
  * Copy a thread..
  */
+extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
+
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long unused, struct task_struct *p,
 		struct pt_regs *regs)
@@ -769,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		p->thread.ksp_vsid = sp_vsid;
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC64 
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		if (current->thread.dscr_inherit) {
+			p->thread.dscr_inherit = 1;
+			p->thread.dscr = current->thread.dscr;
+		} else if (0 != dscr_default) {
+			p->thread.dscr_inherit = 1;
+			p->thread.dscr = dscr_default;
+		} else {
+			p->thread.dscr_inherit = 0;
+			p->thread.dscr = 0;
+		}
+	}
+#endif
 
 	/*
 	 * The PPC64 ABI makes use of a TOC to contain function 
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c0d8c2006bf4..f0f2199e64e1 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
 static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL);
 static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr);
 static SYSDEV_ATTR(purr, 0600, show_purr, store_purr);
+
+unsigned long dscr_default = 0;
+EXPORT_SYMBOL(dscr_default);
+
+static ssize_t show_dscr_default(struct sysdev_class *class,
+		struct sysdev_class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lx\n", dscr_default);
+}
+
+static ssize_t __used store_dscr_default(struct sysdev_class *class,
+		struct sysdev_class_attribute *attr, const char *buf,
+		size_t count)
+{
+	unsigned long val;
+	int ret = 0;
+	
+	ret = sscanf(buf, "%lx", &val);
+	if (ret != 1)
+		return -EINVAL;
+	dscr_default = val;
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(dscr_default, 0600,
+		show_dscr_default, store_dscr_default);
+
+static void sysfs_create_dscr_default(void)
+{
+	int err = 0;
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		err = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			&attr_dscr_default.attr);
+}
 #endif /* CONFIG_PPC64 */
 
 #ifdef HAS_PPC_PMC_PA6T
@@ -617,6 +652,9 @@ static int __init topology_init(void)
 		if (cpu_online(cpu))
 			register_cpu_online(cpu);
 	}
+#ifdef CONFIG_PPC64
+	sysfs_create_dscr_default();
+#endif /* CONFIG_PPC64 */
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ddb801bc154..cb71cf29edea 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -909,6 +909,26 @@ static int emulate_instruction(struct pt_regs *regs)
 		return emulate_isel(regs, instword);
 	}
 
+#ifdef CONFIG_PPC64
+	/* Emulate the mfspr rD, DSCR. */
+	if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mfdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		regs->gpr[rd] = mfspr(SPRN_DSCR);
+		return 0;
+	}
+	/* Emulate the mtspr DSCR, rD. */
+	if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) &&
+			cpu_has_feature(CPU_FTR_DSCR)) {
+		PPC_WARN_EMULATED(mtdscr, regs);
+		rd = (instword >> 21) & 0x1f;
+		mtspr(SPRN_DSCR, regs->gpr[rd]);
+		current->thread.dscr_inherit = 1;
+		return 0;
+	}
+#endif
+
 	return -EINVAL;
 }
 
@@ -1506,6 +1526,10 @@ struct ppc_emulated ppc_emulated = {
 #ifdef CONFIG_VSX
 	WARN_EMULATED_SETUP(vsx),
 #endif
+#ifdef CONFIG_PPC64
+	WARN_EMULATED_SETUP(mfdscr),
+	WARN_EMULATED_SETUP(mtdscr),
+#endif
 };
 
 u32 ppc_warn_emulated;
-- 
cgit v1.2.3


From 21176fed25c3b0cb17c6c42d71b4e3d68b8f9dd4 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Mon, 11 Apr 2011 21:25:01 +0000
Subject: powerpc/pci: Split IO vs MMIO indirect access hooks

The goal is to avoid adding overhead to MMIO when only PIO is needed

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io.h          | 16 +++++++++++-----
 arch/powerpc/platforms/Kconfig         | 10 ++++++++--
 arch/powerpc/platforms/cell/Kconfig    |  3 ++-
 arch/powerpc/platforms/iseries/Kconfig |  3 ++-
 4 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 001f2f11c19b..2f365f5007a0 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -481,10 +481,16 @@ __do_out_asm(_rec_outl, "stwbrx")
 				_memcpy_fromio(dst,PCI_FIX_ADDR(src),n)
 #endif /* !CONFIG_EEH */
 
-#ifdef CONFIG_PPC_INDIRECT_IO
-#define DEF_PCI_HOOK(x)		x
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#define DEF_PCI_HOOK_pio(x)	x
+#else
+#define DEF_PCI_HOOK_pio(x)	NULL
+#endif
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+#define DEF_PCI_HOOK_mem(x)	x
 #else
-#define DEF_PCI_HOOK(x)		NULL
+#define DEF_PCI_HOOK_mem(x)	NULL
 #endif
 
 /* Structure containing all the hooks */
@@ -504,7 +510,7 @@ extern struct ppc_pci_io {
 #define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
 static inline ret name at					\
 {								\
-	if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL)		\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)	\
 		return ppc_pci_io.name al;			\
 	return __do_##name al;					\
 }
@@ -512,7 +518,7 @@ static inline ret name at					\
 #define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
 static inline void name at					\
 {								\
-	if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL)		\
+	if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL)		\
 		ppc_pci_io.name al;				\
 	else							\
 		__do_##name al;					\
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 658ffc50493d..54db9fbab1dc 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -154,11 +154,17 @@ config PPC_P7_NAP
 config PPC_INDIRECT_IO
 	bool
 	select GENERIC_IOMAP
-	default n
+
+config PPC_INDIRECT_PIO
+	bool
+	select PPC_INDIRECT_IO
+
+config PPC_INDIRECT_MMIO
+	bool
+	select PPC_INDIRECT_IO
 
 config GENERIC_IOMAP
 	bool
-	default n
 
 source "drivers/cpufreq/Kconfig"
 
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 81239ebed83f..3c7f1de06cdf 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -6,7 +6,8 @@ config PPC_CELL_COMMON
 	bool
 	select PPC_CELL
 	select PPC_DCR_MMIO
-	select PPC_INDIRECT_IO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
 	select PPC_NATIVE
 	select PPC_RTAS
 	select IRQ_EDGE_EOI_HANDLER
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index e5bc9f75d474..ea1d3622b41c 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -1,7 +1,8 @@
 config PPC_ISERIES
 	bool "IBM Legacy iSeries"
 	depends on PPC64 && PPC_BOOK3S
-	select PPC_INDIRECT_IO
+	select PPC_INDIRECT_PIO
+	select PPC_INDIRECT_MMIO
 	select PPC_PCI_CHOICE if EXPERT
 
 menu "iSeries device drivers"
-- 
cgit v1.2.3


From 3cc30d0726d258ac336283bcde66a8ab58283b61 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Mon, 11 Apr 2011 21:25:01 +0000
Subject: powerpc/pci: Move IO workarounds to the common kernel dir

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io-workarounds.h    |  49 +++++++
 arch/powerpc/kernel/Makefile                 |   2 +
 arch/powerpc/kernel/io-workarounds.c         | 184 ++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig               |   3 +
 arch/powerpc/platforms/cell/Kconfig          |   1 +
 arch/powerpc/platforms/cell/Makefile         |   8 +-
 arch/powerpc/platforms/cell/celleb_pci.c     |   1 -
 arch/powerpc/platforms/cell/celleb_pci.h     |   3 +-
 arch/powerpc/platforms/cell/io-workarounds.c | 185 ---------------------------
 arch/powerpc/platforms/cell/io-workarounds.h |  49 -------
 arch/powerpc/platforms/cell/qpace_setup.c    |   1 -
 arch/powerpc/platforms/cell/setup.c          |   2 +-
 arch/powerpc/platforms/cell/spider-pci.c     |   3 +-
 13 files changed, 247 insertions(+), 244 deletions(-)
 create mode 100644 arch/powerpc/include/asm/io-workarounds.h
 create mode 100644 arch/powerpc/kernel/io-workarounds.c
 delete mode 100644 arch/powerpc/platforms/cell/io-workarounds.c
 delete mode 100644 arch/powerpc/platforms/cell/io-workarounds.h

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
new file mode 100644
index 000000000000..6efc7782ebf2
--- /dev/null
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -0,0 +1,49 @@
+/*
+ * Support PCI IO workaround
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _IO_WORKAROUNDS_H
+#define _IO_WORKAROUNDS_H
+
+#include <linux/io.h>
+#include <asm/pci-bridge.h>
+
+/* Bus info */
+struct iowa_bus {
+	struct pci_controller *phb;
+	struct ppc_pci_io *ops;
+	void   *private;
+};
+
+void __devinit io_workaround_init(void);
+void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
+				 int (*)(struct iowa_bus *, void *), void *);
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
+struct iowa_bus *iowa_pio_find_bus(unsigned long);
+
+extern struct ppc_pci_io spiderpci_ops;
+extern int spiderpci_iowa_init(struct iowa_bus *, void *);
+
+#define SPIDER_PCI_REG_BASE		0xd000
+#define SPIDER_PCI_REG_SIZE		0x1000
+#define SPIDER_PCI_VCI_CNTL_STAT	0x0110
+#define SPIDER_PCI_DUMMY_READ		0x0810
+#define SPIDER_PCI_DUMMY_READ_BASE	0x0814
+
+#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 058bc8bac488..82e0bed0650d 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,6 +106,8 @@ obj-$(CONFIG_KEXEC)		+= machine_kexec.o crash.o \
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
+obj-$(CONFIG_PPC_IO_WORKAROUNDS)	+= io-workarounds.o
+
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_callchain.o
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
new file mode 100644
index 000000000000..7e5845798788
--- /dev/null
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -0,0 +1,184 @@
+/*
+ * Support PCI IO workaround
+ *
+ *  Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *		       IBM, Corp.
+ *  (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+
+#include <linux/kernel.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
+
+#define IOWA_MAX_BUS	8
+
+static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
+static unsigned int iowa_bus_count;
+
+static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
+{
+	int i, j;
+	struct resource *res;
+	unsigned long vstart, vend;
+
+	for (i = 0; i < iowa_bus_count; i++) {
+		struct iowa_bus *bus = &iowa_busses[i];
+		struct pci_controller *phb = bus->phb;
+
+		if (vaddr) {
+			vstart = (unsigned long)phb->io_base_virt;
+			vend = vstart + phb->pci_io_size - 1;
+			if ((vaddr >= vstart) && (vaddr <= vend))
+				return bus;
+		}
+
+		if (paddr)
+			for (j = 0; j < 3; j++) {
+				res = &phb->mem_resources[j];
+				if (paddr >= res->start && paddr <= res->end)
+					return bus;
+			}
+	}
+
+	return NULL;
+}
+
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+	struct iowa_bus *bus;
+	int token;
+
+	token = PCI_GET_ADDR_TOKEN(addr);
+
+	if (token && token <= iowa_bus_count)
+		bus = &iowa_busses[token - 1];
+	else {
+		unsigned long vaddr, paddr;
+		pte_t *ptep;
+
+		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
+			return NULL;
+
+		ptep = find_linux_pte(init_mm.pgd, vaddr);
+		if (ptep == NULL)
+			paddr = 0;
+		else
+			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
+		bus = iowa_pci_find(vaddr, paddr);
+
+		if (bus == NULL)
+			return NULL;
+	}
+
+	return bus;
+}
+
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+	unsigned long vaddr = (unsigned long)pci_io_base + port;
+	return iowa_pci_find(vaddr, 0);
+}
+
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
+static ret iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name)			\
+		return bus->ops->name al;			\
+	return __do_##name al;					\
+}
+
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
+static void iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name) {		\
+		bus->ops->name al;				\
+		return;						\
+	}							\
+	__do_##name al;						\
+}
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+static const struct ppc_pci_io __devinitconst iowa_pci_io = {
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)	.name = iowa_##name,
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)	.name = iowa_##name,
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+};
+
+static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+				  unsigned long flags, void *caller)
+{
+	struct iowa_bus *bus;
+	void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+	int busno;
+
+	bus = iowa_pci_find(0, (unsigned long)addr);
+	if (bus != NULL) {
+		busno = bus - iowa_busses;
+		PCI_SET_ADDR_TOKEN(res, busno + 1);
+	}
+	return res;
+}
+
+/* Regist new bus to support workaround */
+void __devinit iowa_register_bus(struct pci_controller *phb,
+			struct ppc_pci_io *ops,
+			int (*initfunc)(struct iowa_bus *, void *), void *data)
+{
+	struct iowa_bus *bus;
+	struct device_node *np = phb->dn;
+
+	if (iowa_bus_count >= IOWA_MAX_BUS) {
+		pr_err("IOWA:Too many pci bridges, "
+		       "workarounds disabled for %s\n", np->full_name);
+		return;
+	}
+
+	bus = &iowa_busses[iowa_bus_count];
+	bus->phb = phb;
+	bus->ops = ops;
+
+	if (initfunc)
+		if ((*initfunc)(bus, data))
+			return;
+
+	iowa_bus_count++;
+
+	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+}
+
+/* enable IO workaround */
+void __devinit io_workaround_init(void)
+{
+	static int io_workaround_inited;
+
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	ppc_md.ioremap = iowa_ioremap;
+	io_workaround_inited = 1;
+}
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 54db9fbab1dc..f2352fc5cbbe 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -163,6 +163,9 @@ config PPC_INDIRECT_MMIO
 	bool
 	select PPC_INDIRECT_IO
 
+config PPC_IO_WORKAROUNDS
+	bool
+
 config GENERIC_IOMAP
 	bool
 
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 3c7f1de06cdf..67d5009b4e86 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -16,6 +16,7 @@ config PPC_CELL_NATIVE
 	bool
 	select PPC_CELL_COMMON
 	select MPIC
+	select PPC_IO_WORKAROUNDS
 	select IBM_NEW_EMAC_EMAC4
 	select IBM_NEW_EMAC_RGMII
 	select IBM_NEW_EMAC_ZMII #test only
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 83fafe922641..8839ef6c7188 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_PPC_CELL_COMMON)		+= cbe_regs.o interrupt.o pervasive.o
 
 obj-$(CONFIG_PPC_CELL_NATIVE)		+= iommu.o setup.o spider-pic.o \
-					   pmu.o io-workarounds.o spider-pci.o
+					   pmu.o spider-pci.o
 obj-$(CONFIG_CBE_RAS)			+= ras.o
 
 obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
@@ -39,9 +39,9 @@ obj-y					+= celleb_setup.o \
 					   celleb_pci.o celleb_scc_epci.o \
 					   celleb_scc_pciex.o \
 					   celleb_scc_uhc.o \
-					   io-workarounds.o spider-pci.o \
-					   beat.o beat_htab.o beat_hvCall.o \
-					   beat_interrupt.o beat_iommu.o
+					   spider-pci.o beat.o beat_htab.o \
+					   beat_hvCall.o beat_interrupt.o \
+					   beat_iommu.o
 
 obj-$(CONFIG_SMP)			+= beat_smp.o
 obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 404d1fc04d59..c19b783a7c99 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -41,7 +41,6 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
-#include "io-workarounds.h"
 #include "celleb_pci.h"
 
 #define MAX_PCI_DEVICES    32
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
index 4cba1523ec50..a801fcc5f389 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -26,8 +26,9 @@
 #include <asm/pci-bridge.h>
 #include <asm/prom.h>
 #include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
 
-#include "io-workarounds.h"
+struct iowa_bus;
 
 struct celleb_phb_spec {
 	int (*setup)(struct device_node *, struct pci_controller *);
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
deleted file mode 100644
index 5c1118e31940..000000000000
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Support PCI IO workaround
- *
- *  Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *		       IBM, Corp.
- *  (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/pgtable.h>
-#include <asm/ppc-pci.h>
-
-#include "io-workarounds.h"
-
-#define IOWA_MAX_BUS	8
-
-static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
-static unsigned int iowa_bus_count;
-
-static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
-{
-	int i, j;
-	struct resource *res;
-	unsigned long vstart, vend;
-
-	for (i = 0; i < iowa_bus_count; i++) {
-		struct iowa_bus *bus = &iowa_busses[i];
-		struct pci_controller *phb = bus->phb;
-
-		if (vaddr) {
-			vstart = (unsigned long)phb->io_base_virt;
-			vend = vstart + phb->pci_io_size - 1;
-			if ((vaddr >= vstart) && (vaddr <= vend))
-				return bus;
-		}
-
-		if (paddr)
-			for (j = 0; j < 3; j++) {
-				res = &phb->mem_resources[j];
-				if (paddr >= res->start && paddr <= res->end)
-					return bus;
-			}
-	}
-
-	return NULL;
-}
-
-struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
-{
-	struct iowa_bus *bus;
-	int token;
-
-	token = PCI_GET_ADDR_TOKEN(addr);
-
-	if (token && token <= iowa_bus_count)
-		bus = &iowa_busses[token - 1];
-	else {
-		unsigned long vaddr, paddr;
-		pte_t *ptep;
-
-		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
-		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
-			return NULL;
-
-		ptep = find_linux_pte(init_mm.pgd, vaddr);
-		if (ptep == NULL)
-			paddr = 0;
-		else
-			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
-		bus = iowa_pci_find(vaddr, paddr);
-
-		if (bus == NULL)
-			return NULL;
-	}
-
-	return bus;
-}
-
-struct iowa_bus *iowa_pio_find_bus(unsigned long port)
-{
-	unsigned long vaddr = (unsigned long)pci_io_base + port;
-	return iowa_pci_find(vaddr, 0);
-}
-
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
-static ret iowa_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	bus = iowa_##space##_find_bus(aa);			\
-	if (bus && bus->ops && bus->ops->name)			\
-		return bus->ops->name al;			\
-	return __do_##name al;					\
-}
-
-#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
-static void iowa_##name at					\
-{								\
-	struct iowa_bus *bus;					\
-	bus = iowa_##space##_find_bus(aa);			\
-	if (bus && bus->ops && bus->ops->name) {		\
-		bus->ops->name al;				\
-		return;						\
-	}							\
-	__do_##name al;						\
-}
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-static const struct ppc_pci_io __devinitconst iowa_pci_io = {
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)	.name = iowa_##name,
-#define DEF_PCI_AC_NORET(name, at, al, space, aa)	.name = iowa_##name,
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-};
-
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
-				  unsigned long flags, void *caller)
-{
-	struct iowa_bus *bus;
-	void __iomem *res = __ioremap_caller(addr, size, flags, caller);
-	int busno;
-
-	bus = iowa_pci_find(0, (unsigned long)addr);
-	if (bus != NULL) {
-		busno = bus - iowa_busses;
-		PCI_SET_ADDR_TOKEN(res, busno + 1);
-	}
-	return res;
-}
-
-/* Regist new bus to support workaround */
-void __devinit iowa_register_bus(struct pci_controller *phb,
-			struct ppc_pci_io *ops,
-			int (*initfunc)(struct iowa_bus *, void *), void *data)
-{
-	struct iowa_bus *bus;
-	struct device_node *np = phb->dn;
-
-	if (iowa_bus_count >= IOWA_MAX_BUS) {
-		pr_err("IOWA:Too many pci bridges, "
-		       "workarounds disabled for %s\n", np->full_name);
-		return;
-	}
-
-	bus = &iowa_busses[iowa_bus_count];
-	bus->phb = phb;
-	bus->ops = ops;
-
-	if (initfunc)
-		if ((*initfunc)(bus, data))
-			return;
-
-	iowa_bus_count++;
-
-	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
-}
-
-/* enable IO workaround */
-void __devinit io_workaround_init(void)
-{
-	static int io_workaround_inited;
-
-	if (io_workaround_inited)
-		return;
-	ppc_pci_io = iowa_pci_io;
-	ppc_md.ioremap = iowa_ioremap;
-	io_workaround_inited = 1;
-}
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/platforms/cell/io-workarounds.h
deleted file mode 100644
index 6efc7782ebf2..000000000000
--- a/arch/powerpc/platforms/cell/io-workarounds.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Support PCI IO workaround
- *
- * (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _IO_WORKAROUNDS_H
-#define _IO_WORKAROUNDS_H
-
-#include <linux/io.h>
-#include <asm/pci-bridge.h>
-
-/* Bus info */
-struct iowa_bus {
-	struct pci_controller *phb;
-	struct ppc_pci_io *ops;
-	void   *private;
-};
-
-void __devinit io_workaround_init(void);
-void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
-				 int (*)(struct iowa_bus *, void *), void *);
-struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
-struct iowa_bus *iowa_pio_find_bus(unsigned long);
-
-extern struct ppc_pci_io spiderpci_ops;
-extern int spiderpci_iowa_init(struct iowa_bus *, void *);
-
-#define SPIDER_PCI_REG_BASE		0xd000
-#define SPIDER_PCI_REG_SIZE		0x1000
-#define SPIDER_PCI_VCI_CNTL_STAT	0x0110
-#define SPIDER_PCI_DUMMY_READ		0x0810
-#define SPIDER_PCI_DUMMY_READ_BASE	0x0814
-
-#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
index d31c594cfdf3..51e290126bc1 100644
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ b/arch/powerpc/platforms/cell/qpace_setup.c
@@ -42,7 +42,6 @@
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
-#include "io-workarounds.h"
 
 static void qpace_show_cpuinfo(struct seq_file *m)
 {
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index fd57bfe00edf..af7b13cd7110 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -51,11 +51,11 @@
 #include <asm/udbg.h>
 #include <asm/mpic.h>
 #include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
 
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
-#include "io-workarounds.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index ca7731c0b595..f1f7878893f3 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -27,8 +27,7 @@
 
 #include <asm/ppc-pci.h>
 #include <asm/pci-bridge.h>
-
-#include "io-workarounds.h"
+#include <asm/io-workarounds.h>
 
 #define SPIDER_PCI_DISABLE_PREFETCH
 
-- 
cgit v1.2.3


From d1109b7529f362c06c47140ae09dbd2b853ffddc Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Mon, 11 Apr 2011 21:25:02 +0000
Subject: powerpc/pci: Make IO workarounds init implicit when first bus is
 registered

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io-workarounds.h |  1 -
 arch/powerpc/kernel/io-workarounds.c      | 27 +++++++++++++++------------
 arch/powerpc/platforms/cell/celleb_pci.c  | 18 +++++-------------
 arch/powerpc/platforms/cell/setup.c       |  2 --
 4 files changed, 20 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
index 6efc7782ebf2..fbae49286926 100644
--- a/arch/powerpc/include/asm/io-workarounds.h
+++ b/arch/powerpc/include/asm/io-workarounds.h
@@ -31,7 +31,6 @@ struct iowa_bus {
 	void   *private;
 };
 
-void __devinit io_workaround_init(void);
 void __devinit iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
 				 int (*)(struct iowa_bus *, void *), void *);
 struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 7e5845798788..d36515eaa314 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -144,7 +144,19 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
 	return res;
 }
 
-/* Regist new bus to support workaround */
+/* Enable IO workaround */
+static void __devinit io_workaround_init(void)
+{
+	static int io_workaround_inited;
+
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	ppc_md.ioremap = iowa_ioremap;
+	io_workaround_inited = 1;
+}
+
+/* Register new bus to support workaround */
 void __devinit iowa_register_bus(struct pci_controller *phb,
 			struct ppc_pci_io *ops,
 			int (*initfunc)(struct iowa_bus *, void *), void *data)
@@ -152,6 +164,8 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	struct iowa_bus *bus;
 	struct device_node *np = phb->dn;
 
+	io_workaround_init();
+
 	if (iowa_bus_count >= IOWA_MAX_BUS) {
 		pr_err("IOWA:Too many pci bridges, "
 		       "workarounds disabled for %s\n", np->full_name);
@@ -171,14 +185,3 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
 }
 
-/* enable IO workaround */
-void __devinit io_workaround_init(void)
-{
-	static int io_workaround_inited;
-
-	if (io_workaround_inited)
-		return;
-	ppc_pci_io = iowa_pci_io;
-	ppc_md.ioremap = iowa_ioremap;
-	io_workaround_inited = 1;
-}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index c19b783a7c99..2904b0a6b2c5 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -468,18 +468,6 @@ static struct of_device_id celleb_phb_match[] __initdata = {
 	},
 };
 
-static int __init celleb_io_workaround_init(struct pci_controller *phb,
-					    struct celleb_phb_spec *phb_spec)
-{
-	if (phb_spec->ops) {
-		iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
-				  phb_spec->iowa_data);
-		io_workaround_init();
-	}
-
-	return 0;
-}
-
 int __init celleb_setup_phb(struct pci_controller *phb)
 {
 	struct device_node *dev = phb->dn;
@@ -499,7 +487,11 @@ int __init celleb_setup_phb(struct pci_controller *phb)
 	if (rc)
 		return 1;
 
-	return celleb_io_workaround_init(phb, phb_spec);
+	if (phb_spec->ops)
+		iowa_register_bus(phb, phb_spec->ops,
+				  phb_spec->iowa_init,
+				  phb_spec->iowa_data);
+	return 0;
 }
 
 int celleb_pci_probe_mode(struct pci_bus *bus)
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index af7b13cd7110..c73cf4c43fc2 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -136,8 +136,6 @@ static int __devinit cell_setup_phb(struct pci_controller *phb)
 
 	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
 				  (void *)SPIDER_PCI_REG_BASE);
-	io_workaround_init();
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 69b123684b50040b0926eed1e02795dac8cb9587 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ozlabs.org>
Date: Mon, 11 Apr 2011 21:25:02 +0000
Subject: powerpc/pci: Properly initialize IO workaround "private"

Even when no initfunc is provided.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/io-workarounds.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index d36515eaa314..ffafaea3d261 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -175,6 +175,7 @@ void __devinit iowa_register_bus(struct pci_controller *phb,
 	bus = &iowa_busses[iowa_bus_count];
 	bus->phb = phb;
 	bus->ops = ops;
+	bus->private = data;
 
 	if (initfunc)
 		if ((*initfunc)(bus, data))
-- 
cgit v1.2.3


From e70606eb9beb683ce3991936267deab64ab56d95 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Sun, 10 Apr 2011 20:42:05 +0000
Subject: powerpc/numa: Look for ibm, associativity-reference-points at the
 root

If we don't find ibm,associativity-reference-points as a child of
/rtas, look for it at the root of the tree instead. We use this on
Book3E where we have no RTAS but still use the sPAPR conventions
for NUMA.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/numa.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 5ec1dad2a19d..e49b799b59a3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -311,14 +311,13 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
 static int __init find_min_common_depth(void)
 {
 	int depth;
-	struct device_node *rtas_root;
 	struct device_node *chosen;
+	struct device_node *root;
 	const char *vec5;
 
-	rtas_root = of_find_node_by_path("/rtas");
-
-	if (!rtas_root)
-		return -1;
+	root = of_find_node_by_path("/rtas");
+	if (!root)
+		root = of_find_node_by_path("/");
 
 	/*
 	 * This property is a set of 32-bit integers, each representing
@@ -332,7 +331,7 @@ static int __init find_min_common_depth(void)
 	 * NUMA boundary and the following are progressively less significant
 	 * boundaries. There can be more than one level of NUMA.
 	 */
-	distance_ref_points = of_get_property(rtas_root,
+	distance_ref_points = of_get_property(root,
 					"ibm,associativity-reference-points",
 					&distance_ref_points_depth);
 
@@ -376,11 +375,11 @@ static int __init find_min_common_depth(void)
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
-	of_node_put(rtas_root);
+	of_node_put(root);
 	return depth;
 
 err:
-	of_node_put(rtas_root);
+	of_node_put(root);
 	return -1;
 }
 
-- 
cgit v1.2.3


From 73706c3283d755d3725c6a48a18e677a15ced8be Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Sun, 10 Apr 2011 20:26:15 +0000
Subject: powerpc/irq: Dump chip data pointer in virq_mapping

This can be useful for differentiating interrupts on the same host
but with different chip data.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ea09512a68c3..4f5d6e751a65 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -1086,10 +1086,11 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	struct irq_desc *desc;
 	const char *p;
 	static const char none[] = "none";
+	void *data;
 	int i;
 
-	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
-		      "chip name", "host name");
+	seq_printf(m, "%-5s  %-7s  %-15s  %-18s  %s\n", "virq", "hwirq",
+		      "chip name", "chip data", "host name");
 
 	for (i = 1; i < nr_irqs; i++) {
 		desc = irq_to_desc(i);
@@ -1111,6 +1112,9 @@ static int virq_debug_show(struct seq_file *m, void *private)
 				p = none;
 			seq_printf(m, "%-15s  ", p);
 
+			data = irq_desc_get_chip_data(desc);
+			seq_printf(m, "0x%16p  ", data);
+
 			if (irq_map[i].host && irq_map[i].host->of_node)
 				p = irq_map[i].host->of_node->full_name;
 			else
-- 
cgit v1.2.3


From b618d2f043506e45b1d72b48a4ff7cb5b1a7011c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sat, 9 Apr 2011 22:59:07 +0000
Subject: powerpc/ps3: Update debug message for irq_set_chip_data()

commit ec775d0e70eb6b7116406b3441cb8501c2849dd2 ("powerpc: Convert to new irq_*
function names") changed a call from set_irq_chip_data() to
irq_set_chip_data(), but forgot to update the corresponding debug message

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/ps3/interrupt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index f2f6413b81d3..523bd0d34d9d 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -197,7 +197,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
 	result = irq_set_chip_data(*virq, pd);
 
 	if (result) {
-		pr_debug("%s:%d: set_irq_chip_data failed\n",
+		pr_debug("%s:%d: irq_set_chip_data failed\n",
 			__func__, __LINE__);
 		goto fail_set;
 	}
-- 
cgit v1.2.3


From 0407a31429500e7e56da33a326ca7cf35c2c9d65 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Sat, 9 Apr 2011 08:09:46 +0000
Subject: powerpc: Fix build warning of the defconfigs

BT_L2CAP and BT_SCO have changed to bool .
Value 'm' has invalid .

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/configs/c2k_defconfig    | 4 ++--
 arch/powerpc/configs/pmac32_defconfig | 4 ++--
 arch/powerpc/configs/ppc6xx_defconfig | 4 ++--
 arch/powerpc/configs/ps3_defconfig    | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
index f9e6a3ea5a64..2a84fd7f631c 100644
--- a/arch/powerpc/configs/c2k_defconfig
+++ b/arch/powerpc/configs/c2k_defconfig
@@ -132,8 +132,8 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_IND=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index ac4fc41035f6..f8b394a76ac3 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -112,8 +112,8 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y
 CONFIG_IRDA_FAST_RR=y
 CONFIG_IRTTY_SIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 0a10fb009ef7..214208924a9c 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -351,8 +351,8 @@ CONFIG_VLSI_FIR=m
 CONFIG_VIA_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index caba919f65d8..6472322bf13b 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -52,8 +52,8 @@ CONFIG_IP_PNP_DHCP=y
 # CONFIG_INET_DIAG is not set
 CONFIG_IPV6=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
-- 
cgit v1.2.3


From 9d4a2925c290a053bb279e75e7a649069fdcaf6b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 7 Apr 2011 21:56:02 +0000
Subject: powerpc: Add MSR_64BIT

The MSR bit which indicates 64-bit-ness is different between server and
booke, so add a #define which gives you the right mask regardless.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/reg.h       | 10 ++++++++--
 arch/powerpc/include/asm/reg_booke.h |  6 ++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 76d7d5fea5be..1f9ac12742e6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -99,17 +99,23 @@
 #define MSR_LE		__MASK(MSR_LE_LG)	/* Little Endian */
 
 #if defined(CONFIG_PPC_BOOK3S_64)
+#define MSR_64BIT	MSR_SF
+
 /* Server variant */
 #define MSR_		MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV
-#define MSR_KERNEL      MSR_ | MSR_SF
+#define MSR_KERNEL	MSR_ | MSR_64BIT
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE
-#define MSR_USER64	MSR_USER32 | MSR_SF
+#define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
 /* Default MSR for kernel mode. */
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
 #endif
 
+#ifndef MSR_64BIT
+#define MSR_64BIT	0
+#endif
+
 /* Floating Point Status and Control Register (FPSCR) Fields */
 #define FPSCR_FX	0x80000000	/* FPU exception summary */
 #define FPSCR_FEX	0x40000000	/* FPU enabled exception summary */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index b316794aa2b5..817bd1ac1752 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -27,10 +27,12 @@
 #define MSR_CM		(1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
 
 #if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_64BIT	MSR_CM
+
 #define MSR_		MSR_ME | MSR_CE
-#define MSR_KERNEL      MSR_ | MSR_CM
+#define MSR_KERNEL	MSR_ | MSR_64BIT
 #define MSR_USER32	MSR_ | MSR_PR | MSR_EE | MSR_DE
-#define MSR_USER64	MSR_USER32 | MSR_CM | MSR_DE
+#define MSR_USER64	MSR_USER32 | MSR_64BIT
 #elif defined (CONFIG_40x)
 #define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
 #define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
-- 
cgit v1.2.3


From 9f0b079320ad1cc71ad7ea4e0ed0b64cd72bbd6d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 7 Apr 2011 21:56:03 +0000
Subject: powerpc: Use MSR_64BIT in places

Use the new MSR_64BIT in a few places. Some of these are already ifdef'ed
for BOOKE vs BOOKS, but it's still clearer, MSR_SF does not immediately
parse as "MSR bit for 64bit".

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_64.S   |  2 +-
 arch/powerpc/kernel/signal_64.c |  4 ++--
 arch/powerpc/kernel/traps.c     |  2 +-
 arch/powerpc/xmon/xmon.c        | 14 +++++++-------
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 6d17c37f22a1..73d6e9afcdf1 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -655,7 +655,7 @@ _GLOBAL(enable_64b_mode)
 	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
 	mtmsr	r11
 #else /* CONFIG_PPC_BOOK3E */
-	li	r12,(MSR_SF | MSR_ISF)@highest
+	li	r12,(MSR_64BIT | MSR_ISF)@highest
 	sldi	r12,r12,48
 	or	r11,r11,r12
 	mtmsrd	r11
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 27c4a4584f80..da989fff19cc 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -381,7 +381,7 @@ badframe:
 	       regs, uc, &uc->uc_mcontext);
 #endif
 	if (show_unhandled_signals && printk_ratelimit())
-		printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+		printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 			current->comm, current->pid, "rt_sigreturn",
 			(long)uc, regs->nip, regs->link);
 
@@ -469,7 +469,7 @@ badframe:
 	       regs, frame, newsp);
 #endif
 	if (show_unhandled_signals && printk_ratelimit())
-		printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+		printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 			current->comm, current->pid, "setup_rt_frame",
 			(long)frame, regs->nip, regs->link);
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index cb71cf29edea..4a6a109b6816 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -199,7 +199,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 	} else if (show_unhandled_signals &&
 		    unhandled_signal(current, signr) &&
 		    printk_ratelimit()) {
-			printk(regs->msr & MSR_SF ? fmt64 : fmt32,
+			printk(regs->msr & MSR_64BIT ? fmt64 : fmt32,
 				current->comm, current->pid, signr,
 				addr, regs->nip, regs->link, code);
 		}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 33794c1d92c3..ef9756ee284e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -399,7 +399,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	cpu_set(cpu, cpus_in_xmon);
 
 	bp = NULL;
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
 		bp = at_breakpoint(regs->nip);
 	if (bp || unrecoverable_excp(regs))
 		fromipi = 0;
@@ -529,7 +529,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
 #else
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
 		bp = at_breakpoint(regs->nip);
 		if (bp != NULL) {
 			int stepped = emulate_step(regs, bp->instr[0]);
@@ -578,7 +578,7 @@ static int xmon_bpt(struct pt_regs *regs)
 	struct bpt *bp;
 	unsigned long offset;
 
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 
 	/* Are we at the trap at bp->instr[1] for some bp? */
@@ -609,7 +609,7 @@ static int xmon_sstep(struct pt_regs *regs)
 
 static int xmon_dabr_match(struct pt_regs *regs)
 {
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 	if (dabr.enabled == 0)
 		return 0;
@@ -619,7 +619,7 @@ static int xmon_dabr_match(struct pt_regs *regs)
 
 static int xmon_iabr_match(struct pt_regs *regs)
 {
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) != (MSR_IR|MSR_SF))
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
 		return 0;
 	if (iabr == NULL)
 		return 0;
@@ -644,7 +644,7 @@ static int xmon_fault_handler(struct pt_regs *regs)
 	if (in_xmon && catch_memory_errors)
 		handle_fault(regs);	/* doesn't return */
 
-	if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF)) {
+	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) {
 		bp = in_breakpoint_table(regs->nip, &offset);
 		if (bp != NULL) {
 			regs->nip = bp->address + offset;
@@ -929,7 +929,7 @@ static int do_step(struct pt_regs *regs)
 	int stepped;
 
 	/* check we are in 64-bit kernel mode, translation enabled */
-	if ((regs->msr & (MSR_SF|MSR_PR|MSR_IR)) == (MSR_SF|MSR_IR)) {
+	if ((regs->msr & (MSR_64BIT|MSR_PR|MSR_IR)) == (MSR_64BIT|MSR_IR)) {
 		if (mread(regs->nip, &instr, 4) == 4) {
 			stepped = emulate_step(regs, instr);
 			if (stepped < 0) {
-- 
cgit v1.2.3


From b91e136cdf88e19e998dbf4631ead266de4b80b5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 7 Apr 2011 21:56:04 +0000
Subject: powerpc: Use MSR_64BIT in sstep.c, fix kprobes on BOOK3E

We check MSR_SF a lot in sstep.c, to decide if we need to emulate the
truncation of values when running in 32-bit mode. Factor out that code
into a helper, and convert it and the other uses to use MSR_64BIT.

This fixes a bug on BOOK3E where kprobes would end up returning to a
32-bit address, because regs->nip was truncated, because (msr & MSR_SF)
was false.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/lib/sstep.c | 61 +++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ae5189ab0049..0e5e540c7778 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -44,6 +44,18 @@ extern int do_lxvd2x(int rn, unsigned long ea);
 extern int do_stxvd2x(int rn, unsigned long ea);
 #endif
 
+/*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val)
+{
+#ifdef __powerpc64__
+	if ((msr & MSR_64BIT) == 0)
+		val &= 0xffffffffUL;
+#endif
+	return val;
+}
+
 /*
  * Determine whether a conditional branch instruction would branch.
  */
@@ -90,11 +102,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs
 		if (instr & 0x04000000)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-#endif
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 
 #ifdef __powerpc64__
@@ -113,9 +122,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg
 		if ((instr & 3) == 1)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 #endif /* __powerpc64 */
 
@@ -136,11 +144,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs
 		if (do_update)		/* update forms */
 			regs->gpr[ra] = ea;
 	}
-#ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
-		ea &= 0xffffffffUL;
-#endif
-	return ea;
+
+	return truncate_if_32bit(regs->msr, ea);
 }
 
 /*
@@ -466,7 +471,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd)
 
 	regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
 #ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF))
+	if (!(regs->msr & MSR_64BIT))
 		val = (int) val;
 #endif
 	if (val < 0)
@@ -487,7 +492,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
 		++val;
 	regs->gpr[rd] = val;
 #ifdef __powerpc64__
-	if (!(regs->msr & MSR_SF)) {
+	if (!(regs->msr & MSR_64BIT)) {
 		val = (unsigned int) val;
 		val1 = (unsigned int) val1;
 	}
@@ -570,8 +575,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		if ((instr & 2) == 0)
 			imm += regs->nip;
 		regs->nip += 4;
-		if ((regs->msr & MSR_SF) == 0)
-			regs->nip &= 0xffffffffUL;
+		regs->nip = truncate_if_32bit(regs->msr, regs->nip);
 		if (instr & 1)
 			regs->link = regs->nip;
 		if (branch_taken(instr, regs))
@@ -604,13 +608,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			imm -= 0x04000000;
 		if ((instr & 2) == 0)
 			imm += regs->nip;
-		if (instr & 1) {
-			regs->link = regs->nip + 4;
-			if ((regs->msr & MSR_SF) == 0)
-				regs->link &= 0xffffffffUL;
-		}
-		if ((regs->msr & MSR_SF) == 0)
-			imm &= 0xffffffffUL;
+		if (instr & 1)
+			regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
+		imm = truncate_if_32bit(regs->msr, imm);
 		regs->nip = imm;
 		return 1;
 	case 19:
@@ -618,11 +618,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		case 16:	/* bclr */
 		case 528:	/* bcctr */
 			imm = (instr & 0x400)? regs->ctr: regs->link;
-			regs->nip += 4;
-			if ((regs->msr & MSR_SF) == 0) {
-				regs->nip &= 0xffffffffUL;
-				imm &= 0xffffffffUL;
-			}
+			regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+			imm = truncate_if_32bit(regs->msr, imm);
 			if (instr & 1)
 				regs->link = regs->nip;
 			if (branch_taken(instr, regs))
@@ -1616,11 +1613,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		return 0;	/* invoke DSI if -EFAULT? */
 	}
  instr_done:
-	regs->nip += 4;
-#ifdef __powerpc64__
-	if ((regs->msr & MSR_SF) == 0)
-		regs->nip &= 0xffffffffUL;
-#endif
+	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
 	return 1;
 
  logical_done:
-- 
cgit v1.2.3


From a7b8ad405862fb10e496ce839d423dfc94ac821b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Thu, 7 Apr 2011 21:22:23 +0000
Subject: powerpc/book3e: Fix extlb size

The calculation of the size for the exception save area of the TLB
miss handler is wrong, luckily it's too big not too small.

Rework it to make it a bit clearer, and also correct. We want 3 save
areas, each EX_TLB_SIZE _bytes_.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Acked-by: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/paca.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index f6da4f517fca..65c13c48db43 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -106,7 +106,8 @@ struct paca_struct {
 	pgd_t *pgd;			/* Current PGD */
 	pgd_t *kernel_pgd;		/* Kernel PGD */
 	u64 exgen[8] __attribute__((aligned(0x80)));
-	u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
+	/* We can have up to 3 levels of reentrancy in the TLB miss handler */
+	u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80)));
 	u64 exmc[8];		/* used for machine checks */
 	u64 excrit[8];		/* used for crit interrupts */
 	u64 exdbg[8];		/* used for debug interrupts */
-- 
cgit v1.2.3


From eca590f402332ab873d13f2d8d00fa0b91cfff36 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 7 Apr 2011 01:54:07 +0000
Subject: powerpc/rtas: Only sleep in rtas_busy_delay if we have useful work to
 do

RTAS returns extended error codes as a hint of how long the
OS might want to wait before retrying a call. If we have nothing
else useful to do we may as well call back straight away.

This was found when testing the new dynamic dma window feature.
Firmware split the zeroing of the TCE table into 32k chunks but
returned 9901 (which is a suggested wait of 10ms). All up this took
about 10 minutes to complete since msleep is jiffies based and will
round 10ms up to 20ms.

With the patch below we take 3 seconds to complete the same test.
The hint firmware is returning in the RTAS call should definitely
be decreased, but even if we slept 1ms each iteration this would
take 32s.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/rtas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 2097f2b3cba8..f48446635c89 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -494,7 +494,7 @@ unsigned int rtas_busy_delay(int status)
 
 	might_sleep();
 	ms = rtas_busy_delay_time(status);
-	if (ms)
+	if (ms && need_resched())
 		msleep(ms);
 
 	return ms;
-- 
cgit v1.2.3


From 44ae3ab3358e962039c36ad4ae461ae9fb29596c Mon Sep 17 00:00:00 2001
From: Matt Evans <matt@ozlabs.org>
Date: Wed, 6 Apr 2011 19:48:50 +0000
Subject: powerpc: Free up some CPU feature bits by moving out MMU-related
 features

Some of the 64bit PPC CPU features are MMU-related, so this patch moves
them to MMU_FTR_ bits.  All cpu_has_feature()-style tests are moved to
mmu_has_feature(), and seven feature bits are freed as a result.

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h        | 37 +++++++++--------------
 arch/powerpc/include/asm/mmu.h             | 48 ++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/mmu_context.h     |  2 +-
 arch/powerpc/kernel/cputable.c             | 45 ++++++++++++----------------
 arch/powerpc/kernel/entry_64.S             |  8 ++---
 arch/powerpc/kernel/exceptions-64s.S       |  4 +--
 arch/powerpc/kernel/process.c              |  4 +--
 arch/powerpc/kernel/prom.c                 | 17 ++++++-----
 arch/powerpc/kernel/setup_64.c             |  2 +-
 arch/powerpc/mm/hash_low_64.S              |  8 ++---
 arch/powerpc/mm/hash_native_64.c           |  8 ++---
 arch/powerpc/mm/hash_utils_64.c            | 18 +++++------
 arch/powerpc/mm/hugetlbpage.c              |  2 +-
 arch/powerpc/mm/slb.c                      |  4 +--
 arch/powerpc/mm/slb_low.S                  |  8 ++---
 arch/powerpc/mm/stab.c                     |  2 +-
 arch/powerpc/platforms/iseries/exception.S |  3 +-
 arch/powerpc/platforms/iseries/setup.c     |  4 +--
 arch/powerpc/platforms/pseries/lpar.c      |  2 +-
 arch/powerpc/xmon/xmon.c                   |  2 +-
 20 files changed, 132 insertions(+), 96 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 2d71523ebb03..3db2476704d6 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -178,23 +178,17 @@ extern const char *powerpc_base_platform;
 #define LONG_ASM_CONST(x)		0
 #endif
 
-#define CPU_FTR_SLB			LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_16M_PAGE		LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_TLBIEL			LONG_ASM_CONST(0x0000000400000000)
+
 #define CPU_FTR_HVMODE_206		LONG_ASM_CONST(0x0000000800000000)
 #define CPU_FTR_IABR			LONG_ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
 #define CPU_FTR_SMT			LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_LOCKLESS_TLBIE		LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_CI_LARGE_PAGE		LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000200000000000)
 #define CPU_FTR_PURR			LONG_ASM_CONST(0x0000400000000000)
 #define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000800000000000)
 #define CPU_FTR_SPURR			LONG_ASM_CONST(0x0001000000000000)
 #define CPU_FTR_DSCR			LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_1T_SEGMENT		LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_NO_SLBIE_B		LONG_ASM_CONST(0x0008000000000000)
 #define CPU_FTR_VSX			LONG_ASM_CONST(0x0010000000000000)
 #define CPU_FTR_SAO			LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0040000000000000)
@@ -206,9 +200,10 @@ extern const char *powerpc_base_platform;
 
 #ifndef __ASSEMBLY__
 
-#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_SLB | \
-				 CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | \
-				 CPU_FTR_NODSISRALIGN | CPU_FTR_16M_PAGE)
+#define CPU_FTR_PPCAS_ARCH_V2	(CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
+
+#define MMU_FTR_PPCAS_ARCH_V2 	(MMU_FTR_SLB | MMU_FTR_TLBIEL | \
+				 MMU_FTR_16M_PAGE)
 
 /* We only set the altivec features if the kernel was compiled with altivec
  * support
@@ -408,38 +403,34 @@ extern const char *powerpc_base_platform;
 #define CPU_FTRS_POWER5	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
-	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
-	    CPU_FTR_POPCNTB)
+	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
 #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
 #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | \
-	    CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
+	    CPU_FTR_PAUSE_ZERO  | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
 	    CPU_FTR_UNALIGNED_LD_STD)
 #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | \
-	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
-	    CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_NO_SLBIE_B)
+	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_PURR | CPU_FTR_REAL_LE)
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
 
 #define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
-	    CPU_FTR_TLBIEL | CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \
-	    CPU_FTR_16M_PAGE)
+		     CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
 
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
@@ -449,7 +440,7 @@ extern const char *powerpc_base_platform;
 	    (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |	\
 	    CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |	\
 	    CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |		\
-	    CPU_FTR_1T_SEGMENT | CPU_FTR_VSX)
+	    CPU_FTR_VSX)
 #endif
 #else
 enum {
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb40a06d3b77..a39304b74f84 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -70,6 +70,54 @@
  */
 #define MMU_FTR_USE_PAIRED_MAS		ASM_CONST(0x01000000)
 
+/* MMU is SLB-based
+ */
+#define MMU_FTR_SLB			ASM_CONST(0x02000000)
+
+/* Support 16M large pages
+ */
+#define MMU_FTR_16M_PAGE		ASM_CONST(0x04000000)
+
+/* Supports TLBIEL variant
+ */
+#define MMU_FTR_TLBIEL			ASM_CONST(0x08000000)
+
+/* Supports tlbies w/o locking
+ */
+#define MMU_FTR_LOCKLESS_TLBIE		ASM_CONST(0x10000000)
+
+/* Large pages can be marked CI
+ */
+#define MMU_FTR_CI_LARGE_PAGE		ASM_CONST(0x20000000)
+
+/* 1T segments available
+ */
+#define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
+
+/* Doesn't support the B bit (1T segment) in SLBIE
+ */
+#define MMU_FTR_NO_SLBIE_B		ASM_CONST(0x80000000)
+
+/* MMU feature bit sets for various CPUs */
+#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
+	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
+#define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970		MMU_FTRS_POWER4
+#define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | \
+				MMU_FTR_TLBIE_206
+#define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE
+#define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
+				MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
+#define MMU_FTRS_A2		MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | \
+				MMU_FTR_USE_TLBIVAX_BCAST | \
+				MMU_FTR_LOCK_BCAST_INVAL | \
+				MMU_FTR_USE_TLBRSRV | \
+				MMU_FTR_USE_PAIRED_MAS | \
+				MMU_FTR_TLBIEL | \
+				MMU_FTR_16M_PAGE
 #ifndef __ASSEMBLY__
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 81fb41289d6c..8e13f65b498c 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -67,7 +67,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * sub architectures.
 	 */
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		switch_slb(tsk, next);
 	else
 		switch_stab(tsk, next);
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 3d7b65ad4962..34d2722b9451 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -201,7 +201,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4 (gp)",
 		.cpu_features		= CPU_FTRS_POWER4,
 		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER4,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -216,7 +216,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4+ (gq)",
 		.cpu_features		= CPU_FTRS_POWER4,
 		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER4,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -232,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -250,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -286,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -304,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
@@ -320,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5 (gr)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -340,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+ (gs)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -356,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+ (gs)",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -373,7 +373,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER5+",
 		.cpu_features		= CPU_FTRS_POWER5,
 		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER5,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
@@ -387,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_POWER6,
 		.cpu_user_features	= COMMON_USER_POWER6 |
 			PPC_FEATURE_POWER6_EXT,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -406,7 +406,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER6 (architected)",
 		.cpu_features		= CPU_FTRS_POWER6,
 		.cpu_user_features	= COMMON_USER_POWER6,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
@@ -419,8 +419,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (architected)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_type		= PPC_OPROFILE_POWER4,
@@ -435,8 +434,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7 (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -453,8 +451,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER7+ (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.mmu_features		= MMU_FTR_HPTE_TABLE |
-			MMU_FTR_TLBIE_206,
+		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -473,7 +470,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_user_features	= COMMON_USER_PPC64 |
 			PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
 			PPC_FEATURE_SMT,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_CELL,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 4,
@@ -488,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "PA6T",
 		.cpu_features		= CPU_FTRS_PA6T,
 		.cpu_user_features	= COMMON_USER_PA6T,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PA6T,
 		.icache_bsize		= 64,
 		.dcache_bsize		= 64,
 		.num_pmcs		= 6,
@@ -505,7 +502,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "POWER4 (compatible)",
 		.cpu_features		= CPU_FTRS_COMPATIBLE,
 		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
@@ -2020,11 +2017,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_name		= "A2 (>= DD2)",
 		.cpu_features		= CPU_FTRS_A2,
 		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
-					  MMU_FTR_USE_TLBIVAX_BCAST |
-					  MMU_FTR_LOCK_BCAST_INVAL |
-					  MMU_FTR_USE_TLBRSRV |
-					  MMU_FTR_USE_PAIRED_MAS,
+		.mmu_features		= MMU_FTRS_A2,
 		.icache_bsize		= 64,
 		.dcache_bsize		= 64,
 		.num_pmcs		= 0,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 64693706ebfd..d834425186ae 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -468,10 +468,10 @@ BEGIN_FTR_SECTION
   FTR_SECTION_ELSE_NESTED(95)
 	clrrdi	r6,r8,40	/* get its 1T ESID */
 	clrrdi	r9,r1,40	/* get current sp 1T ESID */
-  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95)
+  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
 FTR_SECTION_ELSE
 	b	2f
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
 	clrldi.	r0,r6,2		/* is new ESID c00000000? */
 	cmpd	cr1,r6,r9	/* or is new ESID the same as current ESID? */
 	cror	eq,4*cr1+eq,eq
@@ -485,7 +485,7 @@ BEGIN_FTR_SECTION
 	li	r9,MMU_SEGSIZE_1T	/* insert B field */
 	oris	r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
 	rldimi	r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 
 	/* Update the last bolted SLB.  No write barriers are needed
 	 * here, provided we only update the current CPU's SLB shadow
@@ -497,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
 	std	r7,SLBSHADOW_STACKVSID(r9)  /* Save VSID */
 	std	r0,SLBSHADOW_STACKESID(r9)  /* Save ESID */
 
-	/* No need to check for CPU_FTR_NO_SLBIE_B here, since when
+	/* No need to check for MMU_FTR_NO_SLBIE_B here, since when
 	 * we have 1TB segments, the only CPUs known to have the errata
 	 * only support less than 1TB of system memory and we'll never
 	 * actually hit this code path.
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ad06333631ac..226cc8c62224 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -102,7 +102,7 @@ BEGIN_FTR_SECTION
 	EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD)
 FTR_SECTION_ELSE
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD)
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
 
 	. = 0x380
 	.globl data_access_slb_pSeries
@@ -840,7 +840,7 @@ _STATIC(do_hash_page)
 BEGIN_FTR_SECTION
 	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
 	bne-	do_ste_alloc		/* If so handle it */
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 
 	clrrdi	r11,r1,THREAD_SHIFT
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a01c2d93fd2f..095043d79946 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -757,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 				_ALIGN_UP(sizeof(struct thread_info), 16);
 
 #ifdef CONFIG_PPC_STD_MMU_64
-	if (cpu_has_feature(CPU_FTR_SLB)) {
+	if (mmu_has_feature(MMU_FTR_SLB)) {
 		unsigned long sp_vsid;
 		unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
 
-		if (cpu_has_feature(CPU_FTR_1T_SEGMENT))
+		if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
 				<< SLB_VSID_SHIFT_1T;
 		else
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c391dc4c8bad..5f5e6aed2b70 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -123,18 +123,19 @@ static void __init move_device_tree(void)
  */
 static struct ibm_pa_feature {
 	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
+	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
 	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
 	unsigned char	pabyte;		/* byte number in ibm,pa-features */
 	unsigned char	pabit;		/* bit number (big-endian) */
 	unsigned char	invert;		/* if 1, pa bit set => clear feature */
 } ibm_pa_features[] __initdata = {
-	{0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
-	{0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
-	{CPU_FTR_SLB, 0,		0, 2, 0},
-	{CPU_FTR_CTRL, 0,		0, 3, 0},
-	{CPU_FTR_NOEXECUTE, 0,		0, 6, 0},
-	{CPU_FTR_NODSISRALIGN, 0,	1, 1, 1},
-	{CPU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
+	{0, 0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
+	{0, MMU_FTR_SLB, 0,		0, 2, 0},
+	{CPU_FTR_CTRL, 0, 0,		0, 3, 0},
+	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
+	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
+	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
 	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 };
 
@@ -166,9 +167,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
 		if (bit ^ fp->invert) {
 			cur_cpu_spec->cpu_features |= fp->cpu_features;
 			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+			cur_cpu_spec->mmu_features |= fp->mmu_features;
 		} else {
 			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
 			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+			cur_cpu_spec->mmu_features &= ~fp->mmu_features;
 		}
 	}
 }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 91a5cc5f0d02..959c63cf62e4 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -436,7 +436,7 @@ void __init setup_system(void)
 
 static u64 slb0_limit(void)
 {
-	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 		return 1UL << SID_SHIFT_1T;
 	}
 	return 1UL << SID_SHIFT;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 5b7dd4ea02b5..a242b5d7cbe4 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -118,7 +118,7 @@ _GLOBAL(__hash_page_4K)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28
 	rldicl	r3,r3,0,36
@@ -401,7 +401,7 @@ _GLOBAL(__hash_page_4K)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28		/* r29 = (vsid << 28) */
 	rldicl	r3,r3,0,36		/* r3 = (ea & 0x0fffffff) */
@@ -715,7 +715,7 @@ BEGIN_FTR_SECTION
 	andi.	r0,r31,_PAGE_NO_CACHE
 	/* If so, bail out and refault as a 4k page */
 	bne-	ht64_bail_ok
-END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
 	/* Prepare new PTE value (turn access RW into DIRTY, then
 	 * add BUSY and ACCESSED)
 	 */
@@ -736,7 +736,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE)
 BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	/* Calc va and put it in r29 */
 	rldicr	r29,r5,28,63-28
 	rldicl	r3,r3,0,36
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 784a400e0781..c23eef2b81a6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -98,8 +98,8 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize)
 
 static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 {
-	unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 	if (use_local)
 		use_local = mmu_psize_defs[psize].tlbiel;
@@ -503,7 +503,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		} pte_iterate_hashed_end();
 	}
 
-	if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
 	    mmu_psize_defs[psize].tlbiel && local) {
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
@@ -517,7 +517,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		}
 		asm volatile("ptesync":::"memory");
 	} else {
-		int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d95d8f484d2f..26b2872b3d00 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -259,11 +259,11 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 	for (; size >= 4; size -= 4, ++prop) {
 		if (prop[0] == 40) {
 			DBG("1T segment support detected\n");
-			cur_cpu_spec->cpu_features |= CPU_FTR_1T_SEGMENT;
+			cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
 			return 1;
 		}
 	}
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_NO_SLBIE_B;
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
 	return 0;
 }
 
@@ -289,7 +289,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 	if (prop != NULL) {
 		DBG("Page sizes from device-tree:\n");
 		size /= 4;
-		cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
 		while(size > 0) {
 			unsigned int shift = prop[0];
 			unsigned int slbenc = prop[1];
@@ -317,7 +317,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 				break;
 			case 0x18:
 				idx = MMU_PAGE_16M;
-				cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+				cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
 				break;
 			case 0x22:
 				idx = MMU_PAGE_16G;
@@ -412,7 +412,7 @@ static void __init htab_init_page_sizes(void)
 	 * Not in the device-tree, let's fallback on known size
 	 * list for 16M capable GP & GR
 	 */
-	if (cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (mmu_has_feature(MMU_FTR_16M_PAGE))
 		memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
 		       sizeof(mmu_psize_defaults_gp));
  found:
@@ -442,7 +442,7 @@ static void __init htab_init_page_sizes(void)
 		mmu_vmalloc_psize = MMU_PAGE_64K;
 		if (mmu_linear_psize == MMU_PAGE_4K)
 			mmu_linear_psize = MMU_PAGE_64K;
-		if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) {
+		if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
 			/*
 			 * Don't use 64k pages for ioremap on pSeries, since
 			 * that would stop us accessing the HEA ethernet.
@@ -608,7 +608,7 @@ static void __init htab_initialize(void)
 	/* Initialize page sizes */
 	htab_init_page_sizes();
 
-	if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) {
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
 		mmu_kernel_ssize = MMU_SEGSIZE_1T;
 		mmu_highuser_ssize = MMU_SEGSIZE_1T;
 		printk(KERN_INFO "Using 1TB segments\n");
@@ -749,7 +749,7 @@ void __init early_init_mmu(void)
 
 	/* Initialize stab / SLB management except on iSeries
 	 */
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
 	else if (!firmware_has_feature(FW_FEATURE_ISERIES))
 		stab_initialize(get_paca()->stab_real);
@@ -766,7 +766,7 @@ void __cpuinit early_init_mmu_secondary(void)
 	 * in real mode on pSeries and we want a virtual address on
 	 * iSeries anyway
 	 */
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
 	else
 		stab_initialize(get_paca()->stab_addr);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9bb249c3046e..0b9a5c1901b9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -529,7 +529,7 @@ static int __init hugetlbpage_init(void)
 {
 	int psize;
 
-	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
+	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
 		return -ENODEV;
 
 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 5500712781d4..e22276cb67a4 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -167,7 +167,7 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
 	int esid_1t_count;
 
 	/* System is not 1T segment size capable. */
-	if (!cpu_has_feature(CPU_FTR_1T_SEGMENT))
+	if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return (GET_ESID(addr1) == GET_ESID(addr2));
 
 	esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
@@ -202,7 +202,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	hard_irq_disable();
 	offset = get_paca()->slb_cache_ptr;
-	if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) &&
+	if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
 	    offset <= SLB_CACHE_ENTRIES) {
 		int i;
 		asm volatile("isync" : : : "memory");
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 95ce35581696..ef653dc95b65 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -58,7 +58,7 @@ _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
 1:
@@ -87,7 +87,7 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
 6:
 BEGIN_FTR_SECTION
 	b	slb_finish_load
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
 0:	/* user address: proto-VSID = context << 15 | ESID. First check
@@ -138,11 +138,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
 	ld	r9,PACACONTEXTID(r13)
 BEGIN_FTR_SECTION
 	cmpldi	r10,0x1000
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	rldimi	r10,r9,USER_ESID_BITS,0
 BEGIN_FTR_SECTION
 	bge	slb_finish_load_1T
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load
 
 8:	/* invalid EA */
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index 446a01842a73..41e31642a86a 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -243,7 +243,7 @@ void __init stabs_alloc(void)
 {
 	int cpu;
 
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		return;
 
 	for_each_possible_cpu(cpu) {
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index 32a56c6dfa72..a67984c04954 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -31,6 +31,7 @@
 #include <asm/thread_info.h>
 #include <asm/ptrace.h>
 #include <asm/cputable.h>
+#include <asm/mmu.h>
 
 #include "exception.h"
 
@@ -157,7 +158,7 @@ BEGIN_FTR_SECTION
 FTR_SECTION_ELSE
 	EXCEPTION_PROLOG_1(PACA_EXGEN)
 	EXCEPTION_PROLOG_ISERIES_1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB)
 	b	data_access_common
 
 .do_stab_bolted_iSeries:
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 2946ae10fbfd..81cb8d2c4132 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -249,7 +249,7 @@ static unsigned long iSeries_process_mainstore_vpd(struct MemoryBlock *mb_array,
 	unsigned long i;
 	unsigned long mem_blocks = 0;
 
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		mem_blocks = iSeries_process_Regatta_mainstore_vpd(mb_array,
 				max_entries);
 	else
@@ -634,7 +634,7 @@ static int __init iseries_probe(void)
 
 	hpte_init_iSeries();
 	/* iSeries does not support 16M pages */
-	cur_cpu_spec->cpu_features &= ~CPU_FTR_16M_PAGE;
+	cur_cpu_spec->mmu_features &= ~MMU_FTR_16M_PAGE;
 
 	return 1;
 }
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ca5d5898d320..6f0ed3aac77f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -573,7 +573,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 	unsigned long i, pix, rc;
 	unsigned long flags = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-	int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 	unsigned long param[9];
 	unsigned long va;
 	unsigned long hash, index, shift, hidx, slot;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index ef9756ee284e..60593ad861e8 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2663,7 +2663,7 @@ static void dump_stab(void)
 
 void dump_segments(void)
 {
-	if (cpu_has_feature(CPU_FTR_SLB))
+	if (mmu_has_feature(MMU_FTR_SLB))
 		dump_slb();
 	else
 		dump_stab();
-- 
cgit v1.2.3


From 65f47f1339dfcffcd5837a307172fb41aa39e479 Mon Sep 17 00:00:00 2001
From: "Richard A. Lary" <rlary@linux.vnet.ibm.com>
Date: Wed, 6 Apr 2011 12:50:45 +0000
Subject: powerpc/eeh: Add support for ibm,configure-pe RTAS call

Added support for ibm,configure-pe RTAS call introduced with
PAPR 2.2.

Signed-off-by: Richard A. Lary <rlary@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/eeh.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 89649173d3a3..229373053864 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -93,6 +93,7 @@ static int ibm_slot_error_detail;
 static int ibm_get_config_addr_info;
 static int ibm_get_config_addr_info2;
 static int ibm_configure_bridge;
+static int ibm_configure_pe;
 
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
 	pci_regs_buf[0] = 0;
 
 	rtas_pci_enable(pdn, EEH_THAW_MMIO);
+	rtas_configure_bridge(pdn);
+	eeh_restore_bars(pdn);
 	loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
 
 	rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
@@ -895,13 +898,20 @@ rtas_configure_bridge(struct pci_dn *pdn)
 {
 	int config_addr;
 	int rc;
+	int token;
 
 	/* Use PE configuration address, if present */
 	config_addr = pdn->eeh_config_addr;
 	if (pdn->eeh_pe_config_addr)
 		config_addr = pdn->eeh_pe_config_addr;
 
-	rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+	/* Use new configure-pe function, if supported */
+	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
+		token = ibm_configure_pe;
+	else
+		token = ibm_configure_bridge;
+
+	rc = rtas_call(token, 3, 1, NULL,
 	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid));
@@ -1077,6 +1087,7 @@ void __init eeh_init(void)
 	ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
 	ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
 	ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+	ibm_configure_pe = rtas_token("ibm,configure-pe");
 
 	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
 		return;
-- 
cgit v1.2.3


From e9c549998dc24209847007e1f209f3b6c88d21ba Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Date: Tue, 26 Apr 2011 23:28:26 -0700
Subject: Revert wrong fixes for common misspellings

These changes were incorrectly fixed by codespell. They were now
manually corrected.

Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
---
 arch/arm/mach-davinci/dm355.c                | 2 +-
 arch/arm/mach-davinci/dm644x.c               | 2 +-
 arch/powerpc/include/asm/uninorth.h          | 2 +-
 drivers/infiniband/hw/qib/qib_iba6120.c      | 2 +-
 drivers/infiniband/hw/qib/qib_iba7220.c      | 2 +-
 drivers/infiniband/hw/qib/qib_iba7322.c      | 2 +-
 drivers/media/radio/radio-sf16fmr2.c         | 2 +-
 drivers/mtd/nand/diskonchip.c                | 2 +-
 drivers/pcmcia/pcmcia_resource.c             | 2 +-
 drivers/staging/rt2860/common/cmm_data_pci.c | 2 +-
 drivers/staging/rt2860/common/cmm_data_usb.c | 2 +-
 drivers/staging/spectra/ffsport.c            | 2 +-
 drivers/staging/tidspbridge/dynload/cload.c  | 2 +-
 drivers/staging/tty/specialix.c              | 2 +-
 fs/btrfs/ctree.h                             | 2 +-
 fs/ocfs2/ocfs2_fs.h                          | 2 +-
 net/l2tp/l2tp_ip.c                           | 2 +-
 net/sctp/ulpevent.c                          | 2 +-
 sound/aoa/codecs/tas.c                       | 2 +-
 19 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index f68012239641..a3a94e9c9378 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -314,7 +314,7 @@ static struct clk timer2_clk = {
 	.name = "timer2",
 	.parent = &pll1_aux_clk,
 	.lpsc = DAVINCI_LPSC_TIMER2,
-	.usecount = 1,              /* REVISIT: why can't' this be disabled? */
+	.usecount = 1,              /* REVISIT: why can't this be disabled? */
 };
 
 static struct clk timer3_clk = {
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 5f8a65424184..4c82c2716293 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -274,7 +274,7 @@ static struct clk timer2_clk = {
 	.name = "timer2",
 	.parent = &pll1_aux_clk,
 	.lpsc = DAVINCI_LPSC_TIMER2,
-	.usecount = 1,              /* REVISIT: why can't' this be disabled? */
+	.usecount = 1,              /* REVISIT: why can't this be disabled? */
 };
 
 static struct clk_lookup dm644x_clks[] = {
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index ae9c899c8a6d..d12b11d7641e 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -60,7 +60,7 @@
  *
  * Obviously, the GART is not cache coherent and so any change to it
  * must be flushed to memory (or maybe just make the GART space non
- * cachable). AGP memory itself does't seem to be cache coherent neither.
+ * cachable). AGP memory itself doesn't seem to be cache coherent neither.
  *
  * In order to invalidate the GART (which is probably necessary to inval
  * the bridge internal TLBs), the following sequence has to be written,
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 7de4b7ebffc5..d8ca0a0b970d 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1799,7 +1799,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
 	/*
 	 * Keep chip from being accessed until we are ready.  Use
 	 * writeq() directly, to allow the write even though QIB_PRESENT
-	 * isn't' set.
+	 * isn't set.
 	 */
 	dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
 	dd->int_counter = 0; /* so we check interrupts work again */
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 74fe0360bec7..c765a2eb04cf 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2111,7 +2111,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
 	/*
 	 * Keep chip from being accessed until we are ready.  Use
 	 * writeq() directly, to allow the write even though QIB_PRESENT
-	 * isn't' set.
+	 * isn't set.
 	 */
 	dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
 	dd->int_counter = 0; /* so we check interrupts work again */
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 55de3cf3441c..6bab3eaea70f 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3299,7 +3299,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
 	/*
 	 * Keep chip from being accessed until we are ready.  Use
 	 * writeq() directly, to allow the write even though QIB_PRESENT
-	 * isn't' set.
+	 * isn't set.
 	 */
 	dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR);
 	dd->flags |= QIB_DOING_RESET;
diff --git a/drivers/media/radio/radio-sf16fmr2.c b/drivers/media/radio/radio-sf16fmr2.c
index dc3f04c52d5e..87bad7678d92 100644
--- a/drivers/media/radio/radio-sf16fmr2.c
+++ b/drivers/media/radio/radio-sf16fmr2.c
@@ -170,7 +170,7 @@ static int fmr2_setfreq(struct fmr2 *dev)
 	return 0;
 }
 
-/* !!! not tested, in my card this does't work !!! */
+/* !!! not tested, in my card this doesn't work !!! */
 static int fmr2_setvolume(struct fmr2 *dev)
 {
 	int vol[16] = { 0x021, 0x084, 0x090, 0x104,
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 96c0b34ba8db..657b9f4b6f9b 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -400,7 +400,7 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 	doc200x_hwcontrol(mtd, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
 	doc200x_hwcontrol(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 
-	/* We can't' use dev_ready here, but at least we wait for the
+	/* We can't use dev_ready here, but at least we wait for the
 	 * command to complete
 	 */
 	udelay(50);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index fe77e8223841..e8c19def1b0f 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -173,7 +173,7 @@ static int pcmcia_access_config(struct pcmcia_device *p_dev,
 	c = p_dev->function_config;
 
 	if (!(c->state & CONFIG_LOCKED)) {
-		dev_dbg(&p_dev->dev, "Configuration isn't't locked\n");
+		dev_dbg(&p_dev->dev, "Configuration isn't locked\n");
 		mutex_unlock(&s->ops_mutex);
 		return -EACCES;
 	}
diff --git a/drivers/staging/rt2860/common/cmm_data_pci.c b/drivers/staging/rt2860/common/cmm_data_pci.c
index bef0bbd8cef7..f01a51c381f1 100644
--- a/drivers/staging/rt2860/common/cmm_data_pci.c
+++ b/drivers/staging/rt2860/common/cmm_data_pci.c
@@ -444,7 +444,7 @@ int RTMPCheckRxError(struct rt_rtmp_adapter *pAd,
 			return (NDIS_STATUS_FAILURE);
 		}
 	}
-	/* Drop not U2M frames, can't's drop here because we will drop beacon in this case */
+	/* Drop not U2M frames, can't drop here because we will drop beacon in this case */
 	/* I am kind of doubting the U2M bit operation */
 	/* if (pRxD->U2M == 0) */
 	/*      return(NDIS_STATUS_FAILURE); */
diff --git a/drivers/staging/rt2860/common/cmm_data_usb.c b/drivers/staging/rt2860/common/cmm_data_usb.c
index 5637857ae9eb..83a62faa7e57 100644
--- a/drivers/staging/rt2860/common/cmm_data_usb.c
+++ b/drivers/staging/rt2860/common/cmm_data_usb.c
@@ -860,7 +860,7 @@ int RTMPCheckRxError(struct rt_rtmp_adapter *pAd,
 		DBGPRINT_RAW(RT_DEBUG_ERROR, ("received packet too long\n"));
 		return NDIS_STATUS_FAILURE;
 	}
-	/* Drop not U2M frames, can't's drop here because we will drop beacon in this case */
+	/* Drop not U2M frames, can't drop here because we will drop beacon in this case */
 	/* I am kind of doubting the U2M bit operation */
 	/* if (pRxD->U2M == 0) */
 	/*      return(NDIS_STATUS_FAILURE); */
diff --git a/drivers/staging/spectra/ffsport.c b/drivers/staging/spectra/ffsport.c
index 20dae73d3b78..506547b603e1 100644
--- a/drivers/staging/spectra/ffsport.c
+++ b/drivers/staging/spectra/ffsport.c
@@ -653,7 +653,7 @@ static int SBD_setup_device(struct spectra_nand_dev *dev, int which)
 	}
 	dev->queue->queuedata = dev;
 
-	/* As Linux block layer does't support >4KB hardware sector,  */
+	/* As Linux block layer doesn't support >4KB hardware sector,  */
 	/* Here we force report 512 byte hardware sector size to Kernel */
 	blk_queue_logical_block_size(dev->queue, 512);
 
diff --git a/drivers/staging/tidspbridge/dynload/cload.c b/drivers/staging/tidspbridge/dynload/cload.c
index 5cecd237e3f6..fe1ef0addb09 100644
--- a/drivers/staging/tidspbridge/dynload/cload.c
+++ b/drivers/staging/tidspbridge/dynload/cload.c
@@ -718,7 +718,7 @@ static void dload_symbols(struct dload_state *dlthis)
 	 * as a temporary for .dllview record construction.
 	 * Allocate storage for the whole table.  Add 1 to the section count
 	 * in case a trampoline section is auto-generated as well as the
-	 * size of the trampoline section name so DLLView does't get lost.
+	 * size of the trampoline section name so DLLView doesn't get lost.
 	 */
 
 	siz = sym_count * sizeof(struct local_symbol);
diff --git a/drivers/staging/tty/specialix.c b/drivers/staging/tty/specialix.c
index cb24c6d999db..5c3598ec7456 100644
--- a/drivers/staging/tty/specialix.c
+++ b/drivers/staging/tty/specialix.c
@@ -978,7 +978,7 @@ static void sx_change_speed(struct specialix_board *bp,
 	spin_lock_irqsave(&bp->lock, flags);
 	sx_out(bp, CD186x_CAR, port_No(port));
 
-	/* The Specialix board does't implement the RTS lines.
+	/* The Specialix board doesn't implement the RTS lines.
 	   They are used to set the IRQ level. Don't touch them. */
 	if (sx_crtscts(tty))
 		port->MSVR = MSVR_DTR | (sx_in(bp, CD186x_MSVR) & MSVR_RTS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2e61fe1b6b8c..8f4b81de3ae2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -718,7 +718,7 @@ struct btrfs_space_info {
 	u64 total_bytes;	/* total bytes in the space,
 				   this doesn't take mirrors into account */
 	u64 bytes_used;		/* total bytes used,
-				   this does't take mirrors into account */
+				   this doesn't take mirrors into account */
 	u64 bytes_pinned;	/* total bytes pinned, will be freed when the
 				   transaction finishes */
 	u64 bytes_reserved;	/* total bytes the allocator has reserved for
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index b68f87a83924..938387a10d5d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1019,7 +1019,7 @@ struct ocfs2_xattr_entry {
 	__le16	xe_name_offset;  /* byte offset from the 1st entry in the
 				    local xattr storage(inode, xattr block or
 				    xattr bucket). */
-	__u8	xe_name_len;	 /* xattr name len, does't include prefix. */
+	__u8	xe_name_len;	 /* xattr name len, doesn't include prefix. */
 	__u8	xe_type;         /* the low 7 bits indicate the name prefix
 				  * type and the highest bit indicates whether
 				  * the EA is stored in the local storage. */
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index fce9bd3bd3fe..5c04f3e42704 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -667,7 +667,7 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP over IP");
 MODULE_VERSION("1.0");
 
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
+/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
  * enums
  */
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index dff27d5e22fd..61b1f5ada96a 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));
 
 	/* Per TSVWG discussion with Randy. Allow the application to
-	 * resemble a fragmented message.
+	 * reassemble a fragmented message.
 	 */
 	ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;
 
diff --git a/sound/aoa/codecs/tas.c b/sound/aoa/codecs/tas.c
index 58804c7acfcf..fd2188c3df2b 100644
--- a/sound/aoa/codecs/tas.c
+++ b/sound/aoa/codecs/tas.c
@@ -170,7 +170,7 @@ static void tas_set_volume(struct tas *tas)
 	/* analysing the volume and mixer tables shows
 	 * that they are similar enough when we shift
 	 * the mixer table down by 4 bits. The error
-	 * is minuscule, in just one item the error
+	 * is miniscule, in just one item the error
 	 * is 1, at a value of 0x07f17b (mixer table
 	 * value is 0x07f17a) */
 	tmp = tas_gaintable[left];
-- 
cgit v1.2.3


From ed961581a7ca91d6a4852af2e44333e983100505 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 27 Apr 2011 09:34:33 +0200
Subject: [S390] prng: fix pointer arithmetic

The git commit c708c57e247775928b9a6bce7b4d8d14883bf39b fixed the
access beyond the end of the stack in prng_seed but the pointer
arithmetic is still incorrect. The calculation has been off by
a factor of 64, now it is only off by a factor of 8. prng_seed
is called with a maximum of 16 for nbytes, small enough that
the incorrect calculation stays insides the limits of the stack.
Place parentheses for correct pointer arithmetic.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/prng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 44bca3f994b0..8b16c479585b 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -76,7 +76,7 @@ static void prng_seed(int nbytes)
 
 	/* Add the entropy */
 	while (nbytes >= 8) {
-		*((__u64 *)parm_block) ^= *((__u64 *)buf+i);
+		*((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
 		prng_add_entropy();
 		i += 8;
 		nbytes -= 8;
-- 
cgit v1.2.3


From 4aac0b4815ba592052758f4b468f253d383dc9d6 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@googlemail.com>
Date: Tue, 26 Apr 2011 14:51:53 +1200
Subject: m68k/mm: Set all online nodes in N_NORMAL_MEMORY

For m68k, N_NORMAL_MEMORY represents all nodes that have present memory
since it does not support HIGHMEM.  This patch sets the bit at the time
node_present_pages has been set by free_area_init_node.
At the time the node is brought online, the node state would have to be
done unconditionally since information about present memory has not yet
been recorded.

If N_NORMAL_MEMORY is not accurate, slub may encounter errors since it
uses this nodemask to setup per-cache kmem_cache_node data structures.

This pach is an alternative to the one proposed by David Rientjes
<rientjes@google.com> attempting to set node state immediately when
bringing the node online.

Signed-off-by: Michael Schmitz <schmitz@debian.org>
Tested-by: Thorsten Glaser <tg@debian.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
CC: stable@kernel.org
---
 arch/m68k/mm/motorola.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 02b7a03e4226..8b3db1c587fc 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -300,6 +300,8 @@ void __init paging_init(void)
 		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
 		free_area_init_node(i, zones_size,
 				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
+		if (node_present_pages(i))
+			node_set_state(i, N_NORMAL_MEMORY);
 	}
 }
 
-- 
cgit v1.2.3


From 2bce5daca28346f19c190dbdb5542c9fe3e8c6e6 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Wed, 27 Apr 2011 06:32:33 -0400
Subject: perf, x86, nmi: Move LVT un-masking into irq handlers

It was noticed that P4 machines were generating double NMIs for
each perf event.  These extra NMIs lead to 'Dazed and confused'
messages on the screen.

I tracked this down to a P4 quirk that said the overflow bit had
to be cleared before re-enabling the apic LVT mask.  My first
attempt was to move the un-masking inside the perf nmi handler
from before the chipset NMI handler to after.

This broke Nehalem boxes that seem to like the unmasking before
the counters themselves are re-enabled.

In order to keep this change simple for 2.6.39, I decided to
just simply move the apic LVT un-masking to the beginning of all
the chipset NMI handlers, with the exception of Pentium4's to
fix the double NMI issue.

Later on we can move the un-masking to later in the handlers to
save a number of 'extra' NMIs on those particular chipsets.

I tested this change on a P4 machine, an AMD machine, a Nehalem
box, and a core2quad box.  'perf top' worked correctly along
with various other small 'perf record' runs.  Anything high
stress breaks all the machines but that is a different problem.

Thanks to various people for testing different versions of this
patch.

Reported-and-tested-by: Shaun Ruffell <sruffell@digium.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Link: http://lkml.kernel.org/r/1303900353-10242-1-git-send-email-dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
CC: Cyrill Gorcunov <gorcunov@gmail.com>
---
 arch/x86/kernel/cpu/perf_event.c       | 12 ++++++++++--
 arch/x86/kernel/cpu/perf_event_intel.c | 10 ++++++++++
 arch/x86/kernel/cpu/perf_event_p4.c    | 17 +++++++++++++----
 3 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fac0654021b8..e638689279d3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1288,6 +1288,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was pushed
+	 * into all the nmi handlers.
+	 *
+	 * This generic handler doesn't seem to have any issues where the
+	 * unmasking occurs so it was left at the top.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		if (!test_bit(idx, cpuc->active_mask)) {
 			/*
@@ -1374,8 +1384,6 @@ perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 	}
 
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-
 	handled = x86_pmu.handle_irq(args->regs);
 	if (!handled)
 		return NOTIFY_DONE;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9ae4a2aa7398..e61539b07d2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -933,6 +933,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
+	/*
+	 * Some chipsets need to unmask the LVTPC in a particular spot
+	 * inside the nmi handler.  As a result, the unmasking was pushed
+	 * into all the nmi handlers.
+	 *
+	 * This handler doesn't seem to have any issues with the unmasking
+	 * so it was left at the top.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+
 	intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	status = intel_pmu_get_status();
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index d1f77e2934a1..e93fcd55fae1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -950,11 +950,20 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 			x86_pmu_stop(event, 0);
 	}
 
-	if (handled) {
-		/* p4 quirk: unmask it again */
-		apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+	if (handled)
 		inc_irq_stat(apic_perf_irqs);
-	}
+
+	/*
+	 * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
+	 * been observed that the OVF bit flag has to be cleared first _before_
+	 * the LVTPC can be unmasked.
+	 *
+	 * The reason is the NMI line will continue to be asserted while the OVF
+	 * bit is set.  This causes a second NMI to generate if the LVTPC is
+	 * unmasked before the OVF bit is cleared, leading to unknown NMI
+	 * messages.
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	return handled;
 }
-- 
cgit v1.2.3


From 860ad7823fdc00cd61dc70e7f35e07fb327cc9a4 Mon Sep 17 00:00:00 2001
From: Sonny Rao <sonnyrao@chromium.org>
Date: Mon, 18 Apr 2011 22:12:59 +0100
Subject: ARM: 6884/1: Fix infinite loop in ARM user perf_event backtrace code

The ARM user backtrace code can get into an infinite loop if it
runs into an invalid stack frame which points back to itself.
This situation has been observed in practice.  Fix it by capping
the number of entries in the backtrace.  This is also what other
architectures do in their backtrace code.

Signed-off-by: Sonny Rao <sonnyrao@chromium.org>
Acked-by: Jamie Iles <jamie@jamieiles.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/perf_event.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 979da3947f42..139e3c827369 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -746,7 +746,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
-	while (tail && !((unsigned long)tail & 0x3))
+	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+	       tail && !((unsigned long)tail & 0x3))
 		tail = user_backtrace(tail, entry);
 }
 
-- 
cgit v1.2.3


From a8d2518c2a7235f0b772c1f1bd30218130e631a7 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 27 Apr 2011 01:34:27 +0100
Subject: ARM: 6887/1: Mark broadcast_timer_setup() __cpuinit

This function is only called by percpu_timer_setup() which is
also __cpuinit marked. Thus it's safe to mark this function as
__cpuinit as well.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8fe05ad932e4..f29b8a29b174 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -479,7 +479,7 @@ static void broadcast_timer_set_mode(enum clock_event_mode mode,
 {
 }
 
-static void broadcast_timer_setup(struct clock_event_device *evt)
+static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt)
 {
 	evt->name	= "dummy_timer";
 	evt->features	= CLOCK_EVT_FEAT_ONESHOT |
-- 
cgit v1.2.3


From 0a14842f5a3c0e88a1e59fac5c3025db39721f74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 20 Apr 2011 09:27:32 +0000
Subject: net: filter: Just In Time compiler for x86-64

In order to speedup packet filtering, here is an implementation of a
JIT compiler for x86_64

It is disabled by default, and must be enabled by the admin.

echo 1 >/proc/sys/net/core/bpf_jit_enable

It uses module_alloc() and module_free() to get memory in the 2GB text
kernel range since we call helpers functions from the generated code.

EAX : BPF A accumulator
EBX : BPF X accumulator
RDI : pointer to skb   (first argument given to JIT function)
RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
r9d : skb->len - skb->data_len (headlen)
r8  : skb->data

To get a trace of generated code, use :

echo 2 >/proc/sys/net/core/bpf_jit_enable

Example of generated code :

# tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24

flen=18 proglen=147 pass=3 image=ffffffffa00b5000
JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60
JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00
JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00
JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be
JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0
JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00
JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00
JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24
JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31
JIT code: ffffffffa00b5090: c0 c9 c3

BPF program is 144 bytes long, so native program is almost same size ;)

(000) ldh      [12]
(001) jeq      #0x800           jt 2    jf 8
(002) ld       [26]
(003) and      #0xffffff00
(004) jeq      #0xc0a81400      jt 16   jf 5
(005) ld       [30]
(006) and      #0xffffff00
(007) jeq      #0xc0a81400      jt 16   jf 17
(008) jeq      #0x806           jt 10   jf 9
(009) jeq      #0x8035          jt 10   jf 17
(010) ld       [28]
(011) and      #0xffffff00
(012) jeq      #0xc0a81400      jt 16   jf 13
(013) ld       [38]
(014) and      #0xffffff00
(015) jeq      #0xc0a81400      jt 16   jf 17
(016) ret      #65535
(017) ret      #0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt |  11 +
 MAINTAINERS                  |   1 +
 arch/x86/Kbuild              |   1 +
 arch/x86/Kconfig             |   1 +
 arch/x86/net/Makefile        |   4 +
 arch/x86/net/bpf_jit.S       | 140 +++++++++
 arch/x86/net/bpf_jit_comp.c  | 654 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/filter.h       |  76 +++++
 include/linux/netdevice.h    |   1 +
 include/linux/skbuff.h       |   2 +-
 net/Kconfig                  |  13 +
 net/core/filter.c            |  65 +----
 net/core/sysctl_net_core.c   |   9 +
 net/packet/af_packet.c       |   2 +-
 14 files changed, 918 insertions(+), 62 deletions(-)
 create mode 100644 arch/x86/net/Makefile
 create mode 100644 arch/x86/net/bpf_jit.S
 create mode 100644 arch/x86/net/bpf_jit_comp.c

(limited to 'arch')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index cbd05ffc606b..3201a7097e4d 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -32,6 +32,17 @@ Table : Subdirectories in /proc/sys/net
 1. /proc/sys/net/core - Network core options
 -------------------------------------------------------
 
+bpf_jit_enable
+--------------
+
+This enables Berkeley Packet Filter Just in Time compiler.
+Currently supported on x86_64 architecture, bpf_jit provides a framework
+to speed packet filtering, the one used by tcpdump/libpcap for example.
+Values :
+	0 - disable the JIT (default value)
+	1 - enable the JIT
+	2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
 rmem_default
 ------------
 
diff --git a/MAINTAINERS b/MAINTAINERS
index b5266ad50167..17c0917a26ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4372,6 +4372,7 @@ S:	Maintained
 F:	net/ipv4/
 F:	net/ipv6/
 F:	include/net/ip*
+F:	arch/x86/net/*
 
 NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK)
 M:	Paul Moore <paul.moore@hp.com>
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 0e103236b754..0e9dec6cadd1 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -15,3 +15,4 @@ obj-y += vdso/
 obj-$(CONFIG_IA32_EMULATION) += ia32/
 
 obj-y += platform/
+obj-y += net/
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..855a1bdc437d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select ARCH_NO_SYSDEV_OPS
+	select HAVE_BPF_JIT if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
new file mode 100644
index 000000000000..90568c33ddb0
--- /dev/null
+++ b/arch/x86/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
new file mode 100644
index 000000000000..66870223f8c5
--- /dev/null
+++ b/arch/x86/net/bpf_jit.S
@@ -0,0 +1,140 @@
+/* bpf_jit.S : BPF JIT helper functions
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+/*
+ * Calling convention :
+ * rdi : skb pointer
+ * esi : offset of byte(s) to fetch in skb (can be scratched)
+ * r8  : copy of skb->data
+ * r9d : hlen = skb->len - skb->data_len
+ */
+#define SKBDATA	%r8
+
+sk_load_word_ind:
+	.globl	sk_load_word_ind
+
+	add	%ebx,%esi	/* offset += X */
+#	test    %esi,%esi	/* if (offset < 0) goto bpf_error; */
+	js	bpf_error
+
+sk_load_word:
+	.globl	sk_load_word
+
+	mov	%r9d,%eax		# hlen
+	sub	%esi,%eax		# hlen - offset
+	cmp	$3,%eax
+	jle	bpf_slow_path_word
+	mov     (SKBDATA,%rsi),%eax
+	bswap   %eax  			/* ntohl() */
+	ret
+
+
+sk_load_half_ind:
+	.globl sk_load_half_ind
+
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_half:
+	.globl	sk_load_half
+
+	mov	%r9d,%eax
+	sub	%esi,%eax		#	hlen - offset
+	cmp	$1,%eax
+	jle	bpf_slow_path_half
+	movzwl	(SKBDATA,%rsi),%eax
+	rol	$8,%ax			# ntohs()
+	ret
+
+sk_load_byte_ind:
+	.globl sk_load_byte_ind
+	add	%ebx,%esi	/* offset += X */
+	js	bpf_error
+
+sk_load_byte:
+	.globl	sk_load_byte
+
+	cmp	%esi,%r9d   /* if (offset >= hlen) goto bpf_slow_path_byte */
+	jle	bpf_slow_path_byte
+	movzbl	(SKBDATA,%rsi),%eax
+	ret
+
+/**
+ * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
+ *
+ * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
+ * Must preserve A accumulator (%eax)
+ * Inputs : %esi is the offset value, already known positive
+ */
+ENTRY(sk_load_byte_msh)
+	CFI_STARTPROC
+	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
+	jle	bpf_slow_path_byte_msh
+	movzbl	(SKBDATA,%rsi),%ebx
+	and	$15,%bl
+	shl	$2,%bl
+	ret
+	CFI_ENDPROC
+ENDPROC(sk_load_byte_msh)
+
+bpf_error:
+# force a return 0 from jit handler
+	xor		%eax,%eax
+	mov		-8(%rbp),%rbx
+	leaveq
+	ret
+
+/* rsi contains offset and can be scratched */
+#define bpf_slow_path_common(LEN)		\
+	push	%rdi;    /* save skb */		\
+	push	%r9;				\
+	push	SKBDATA;			\
+/* rsi already has offset */			\
+	mov	$LEN,%ecx;	/* len */	\
+	lea	-12(%rbp),%rdx;			\
+	call	skb_copy_bits;			\
+	test    %eax,%eax;			\
+	pop	SKBDATA;			\
+	pop	%r9;				\
+	pop	%rdi
+
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	js	bpf_error
+	mov	-12(%rbp),%eax
+	bswap	%eax
+	ret
+
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	js	bpf_error
+	mov	-12(%rbp),%ax
+	rol	$8,%ax
+	movzwl	%ax,%eax
+	ret
+
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	ret
+
+bpf_slow_path_byte_msh:
+	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
+	bpf_slow_path_common(1)
+	js	bpf_error
+	movzbl	-12(%rbp),%eax
+	and	$15,%al
+	shl	$2,%al
+	xchg	%eax,%ebx
+	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
new file mode 100644
index 000000000000..bfab3fa10edc
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -0,0 +1,654 @@
+/* bpf_jit_comp.c : BPF JIT compiler
+ *
+ * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+
+/*
+ * Conventions :
+ *  EAX : BPF A accumulator
+ *  EBX : BPF X accumulator
+ *  RDI : pointer to skb   (first argument given to JIT function)
+ *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
+ *  ECX,EDX,ESI : scratch registers
+ *  r9d : skb->len - skb->data_len (headlen)
+ *  r8  : skb->data
+ * -8(RBP) : saved RBX value
+ * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
+ */
+int bpf_jit_enable __read_mostly;
+
+/*
+ * assembly code in arch/x86/net/bpf_jit.S
+ */
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+
+static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+	if (len == 1)
+		*ptr = bytes;
+	else if (len == 2)
+		*(u16 *)ptr = bytes;
+	else {
+		*(u32 *)ptr = bytes;
+		barrier();
+	}
+	return ptr + len;
+}
+
+#define EMIT(bytes, len)	do { prog = emit_code(prog, bytes, len); } while (0)
+
+#define EMIT1(b1)		EMIT(b1, 1)
+#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
+
+#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
+#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+
+static inline bool is_imm8(int value)
+{
+	return value <= 127 && value >= -128;
+}
+
+static inline bool is_near(int offset)
+{
+	return offset <= 127 && offset >= -128;
+}
+
+#define EMIT_JMP(offset)						\
+do {									\
+	if (offset) {							\
+		if (is_near(offset))					\
+			EMIT2(0xeb, offset); /* jmp .+off8 */		\
+		else							\
+			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
+	}								\
+} while (0)
+
+/* list of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define X86_JB  0x72
+#define X86_JAE 0x73
+#define X86_JE  0x74
+#define X86_JNE 0x75
+#define X86_JBE 0x76
+#define X86_JA  0x77
+
+#define EMIT_COND_JMP(op, offset)				\
+do {								\
+	if (is_near(offset))					\
+		EMIT2(op, offset); /* jxx .+off8 */		\
+	else {							\
+		EMIT2(0x0f, op + 0x10);				\
+		EMIT(offset, 4); /* jxx .+off32 */		\
+	}							\
+} while (0)
+
+#define COND_SEL(CODE, TOP, FOP)	\
+	case CODE:			\
+		t_op = TOP;		\
+		f_op = FOP;		\
+		goto cond_branch
+
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG    2 /* ebx is used */
+#define SEEN_MEM     4 /* use mem[] for temporary storage */
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	smp_wmb();
+	flush_icache_range((unsigned long)start, (unsigned long)end);
+	set_fs(old_fs);
+}
+
+
+void bpf_jit_compile(struct sk_filter *fp)
+{
+	u8 temp[64];
+	u8 *prog;
+	unsigned int proglen, oldproglen = 0;
+	int ilen, i;
+	int t_offset, f_offset;
+	u8 t_op, f_op, seen = 0, pass;
+	u8 *image = NULL;
+	u8 *func;
+	int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
+	unsigned int cleanup_addr; /* epilogue code offset */
+	unsigned int *addrs;
+	const struct sock_filter *filter = fp->insns;
+	int flen = fp->len;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/* Before first pass, make a rough estimation of addrs[]
+	 * each bpf instruction is translated to less than 64 bytes
+	 */
+	for (proglen = 0, i = 0; i < flen; i++) {
+		proglen += 64;
+		addrs[i] = proglen;
+	}
+	cleanup_addr = proglen; /* epilogue address */
+
+	for (pass = 0; pass < 10; pass++) {
+		/* no prologue/epilogue for trivial filters (RET something) */
+		proglen = 0;
+		prog = temp;
+
+		if (seen) {
+			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
+			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
+			/* note : must save %rbx in case bpf_error is hit */
+			if (seen & (SEEN_XREG | SEEN_DATAREF))
+				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
+			if (seen & SEEN_XREG)
+				CLEAR_X(); /* make sure we dont leek kernel memory */
+
+			/*
+			 * If this filter needs to access skb data,
+			 * loads r9 and r8 with :
+			 *  r9 = skb->len - skb->data_len
+			 *  r8 = skb->data
+			 */
+			if (seen & SEEN_DATAREF) {
+				if (offsetof(struct sk_buff, len) <= 127)
+					/* mov    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
+				else {
+					/* mov    off32(%rdi),%r9d */
+					EMIT3(0x44, 0x8b, 0x8f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				if (is_imm8(offsetof(struct sk_buff, data_len)))
+					/* sub    off8(%rdi),%r9d */
+					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
+				else {
+					EMIT3(0x44, 0x2b, 0x8f);
+					EMIT(offsetof(struct sk_buff, data_len), 4);
+				}
+
+				if (is_imm8(offsetof(struct sk_buff, data)))
+					/* mov off8(%rdi),%r8 */
+					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
+				else {
+					/* mov off32(%rdi),%r8 */
+					EMIT3(0x4c, 0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, data), 4);
+				}
+			}
+		}
+
+		switch (filter[0].code) {
+		case BPF_S_RET_K:
+		case BPF_S_LD_W_LEN:
+		case BPF_S_ANC_PROTOCOL:
+		case BPF_S_ANC_IFINDEX:
+		case BPF_S_ANC_MARK:
+		case BPF_S_ANC_RXHASH:
+		case BPF_S_ANC_CPU:
+		case BPF_S_ANC_QUEUE:
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+			/* first instruction sets A register (or is RET 'constant') */
+			break;
+		default:
+			/* make sure we dont leak kernel information to user */
+			CLEAR_A(); /* A = 0 */
+		}
+
+		for (i = 0; i < flen; i++) {
+			unsigned int K = filter[i].k;
+
+			switch (filter[i].code) {
+			case BPF_S_ALU_ADD_X: /* A += X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
+				break;
+			case BPF_S_ALU_ADD_K: /* A += K; */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
+				else
+					EMIT1_off32(0x05, K);	/* add imm32,%eax */
+				break;
+			case BPF_S_ALU_SUB_X: /* A -= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
+				break;
+			case BPF_S_ALU_SUB_K: /* A -= K */
+				if (!K)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
+				else
+					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
+				break;
+			case BPF_S_ALU_MUL_X: /* A *= X; */
+				seen |= SEEN_XREG;
+				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
+				break;
+			case BPF_S_ALU_MUL_K: /* A *= K */
+				if (is_imm8(K))
+					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
+				else {
+					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
+					EMIT(K, 4);
+				}
+				break;
+			case BPF_S_ALU_DIV_X: /* A /= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
+				if (pc_ret0 != -1)
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4));
+				else {
+					EMIT_COND_JMP(X86_JNE, 2 + 5);
+					CLEAR_A();
+					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
+				}
+				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
+				break;
+			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
+				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
+				EMIT(K, 4);
+				EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */
+				break;
+			case BPF_S_ALU_AND_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
+				break;
+			case BPF_S_ALU_AND_K:
+				if (K >= 0xFFFFFF00) {
+					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
+				} else if (K >= 0xFFFF0000) {
+					EMIT2(0x66, 0x25);	/* and imm16,%ax */
+					EMIT2(K, 2);
+				} else {
+					EMIT1_off32(0x25, K);	/* and imm32,%eax */
+				}
+				break;
+			case BPF_S_ALU_OR_X:
+				seen |= SEEN_XREG;
+				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
+				break;
+			case BPF_S_ALU_OR_K:
+				if (is_imm8(K))
+					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
+				else
+					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
+				break;
+			case BPF_S_ALU_LSH_X: /* A <<= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
+				break;
+			case BPF_S_ALU_LSH_K:
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe0); /* shl %eax */
+				else
+					EMIT3(0xc1, 0xe0, K);
+				break;
+			case BPF_S_ALU_RSH_X: /* A >>= X; */
+				seen |= SEEN_XREG;
+				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
+				break;
+			case BPF_S_ALU_RSH_K: /* A >>= K; */
+				if (K == 0)
+					break;
+				else if (K == 1)
+					EMIT2(0xd1, 0xe8); /* shr %eax */
+				else
+					EMIT3(0xc1, 0xe8, K);
+				break;
+			case BPF_S_ALU_NEG:
+				EMIT2(0xf7, 0xd8);		/* neg %eax */
+				break;
+			case BPF_S_RET_K:
+				if (!K) {
+					if (pc_ret0 == -1)
+						pc_ret0 = i;
+					CLEAR_A();
+				} else {
+					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
+				}
+				/* fallinto */
+			case BPF_S_RET_A:
+				if (seen) {
+					if (i != flen - 1) {
+						EMIT_JMP(cleanup_addr - addrs[i]);
+						break;
+					}
+					if (seen & SEEN_XREG)
+						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
+					EMIT1(0xc9);		/* leaveq */
+				}
+				EMIT1(0xc3);		/* ret */
+				break;
+			case BPF_S_MISC_TAX: /* X = A */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
+				break;
+			case BPF_S_MISC_TXA: /* A = X */
+				seen |= SEEN_XREG;
+				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
+				break;
+			case BPF_S_LD_IMM: /* A = K */
+				if (!K)
+					CLEAR_A();
+				else
+					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+				break;
+			case BPF_S_LDX_IMM: /* X = K */
+				seen |= SEEN_XREG;
+				if (!K)
+					CLEAR_X();
+				else
+					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
+				break;
+			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
+				seen |= SEEN_MEM;
+				EMIT3(0x8b, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
+				seen |= SEEN_MEM;
+				EMIT3(0x89, 0x45, 0xf0 - K*4);
+				break;
+			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+				seen |= SEEN_XREG | SEEN_MEM;
+				EMIT3(0x89, 0x5d, 0xf0 - K*4);
+				break;
+			case BPF_S_LD_W_LEN: /*	A = skb->len; */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov    off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_LDX_W_LEN: /* X = skb->len; */
+				seen |= SEEN_XREG;
+				if (is_imm8(offsetof(struct sk_buff, len)))
+					/* mov off8(%rdi),%ebx */
+					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
+				else {
+					EMIT2(0x8b, 0x9f);
+					EMIT(offsetof(struct sk_buff, len), 4);
+				}
+				break;
+			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+				if (is_imm8(offsetof(struct sk_buff, protocol))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, protocol), 4);
+				}
+				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
+				break;
+			case BPF_S_ANC_IFINDEX:
+				if (is_imm8(offsetof(struct sk_buff, dev))) {
+					/* movq off8(%rdi),%rax */
+					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
+				} else {
+					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
+					EMIT(offsetof(struct sk_buff, dev), 4);
+				}
+				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
+				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
+				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
+				EMIT(offsetof(struct net_device, ifindex), 4);
+				break;
+			case BPF_S_ANC_MARK:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+				if (is_imm8(offsetof(struct sk_buff, mark))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, mark), 4);
+				}
+				break;
+			case BPF_S_ANC_RXHASH:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
+				if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+					/* mov off8(%rdi),%eax */
+					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+				} else {
+					EMIT2(0x8b, 0x87);
+					EMIT(offsetof(struct sk_buff, rxhash), 4);
+				}
+				break;
+			case BPF_S_ANC_QUEUE:
+				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
+					/* movzwl off8(%rdi),%eax */
+					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
+				} else {
+					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
+				}
+				break;
+			case BPF_S_ANC_CPU:
+#ifdef CONFIG_SMP
+				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
+				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
+#else
+				CLEAR_A();
+#endif
+				break;
+			case BPF_S_LD_W_ABS:
+				func = sk_load_word;
+common_load:			seen |= SEEN_DATAREF;
+				if ((int)K < 0)
+					goto out;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call */
+				break;
+			case BPF_S_LD_H_ABS:
+				func = sk_load_half;
+				goto common_load;
+			case BPF_S_LD_B_ABS:
+				func = sk_load_byte;
+				goto common_load;
+			case BPF_S_LDX_B_MSH:
+				if ((int)K < 0) {
+					if (pc_ret0 != -1) {
+						EMIT_JMP(addrs[pc_ret0] - addrs[i]);
+						break;
+					}
+					CLEAR_A();
+					EMIT_JMP(cleanup_addr - addrs[i]);
+					break;
+				}
+				seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = sk_load_byte_msh - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
+				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
+				break;
+			case BPF_S_LD_W_IND:
+				func = sk_load_word_ind;
+common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
+				t_offset = func - (image + addrs[i]);
+				EMIT1_off32(0xbe, K);	/* mov imm32,%esi   */
+				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
+				break;
+			case BPF_S_LD_H_IND:
+				func = sk_load_half_ind;
+				goto common_load_ind;
+			case BPF_S_LD_B_IND:
+				func = sk_load_byte_ind;
+				goto common_load_ind;
+			case BPF_S_JMP_JA:
+				t_offset = addrs[i + K] - addrs[i];
+				EMIT_JMP(t_offset);
+				break;
+			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
+			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
+			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
+			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
+			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
+
+cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
+				t_offset = addrs[i + filter[i].jt] - addrs[i];
+
+				/* same targets, can avoid doing the test :) */
+				if (filter[i].jt == filter[i].jf) {
+					EMIT_JMP(t_offset);
+					break;
+				}
+
+				switch (filter[i].code) {
+				case BPF_S_JMP_JGT_X:
+				case BPF_S_JMP_JGE_X:
+				case BPF_S_JMP_JEQ_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
+					break;
+				case BPF_S_JMP_JSET_X:
+					seen |= SEEN_XREG;
+					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
+					break;
+				case BPF_S_JMP_JEQ_K:
+					if (K == 0) {
+						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
+						break;
+					}
+				case BPF_S_JMP_JGT_K:
+				case BPF_S_JMP_JGE_K:
+					if (K <= 127)
+						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+					else
+						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
+					break;
+				case BPF_S_JMP_JSET_K:
+					if (K <= 0xFF)
+						EMIT2(0xa8, K); /* test imm8,%al */
+					else if (!(K & 0xFFFF00FF))
+						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
+					else if (K <= 0xFFFF) {
+						EMIT2(0x66, 0xa9); /* test imm16,%ax */
+						EMIT(K, 2);
+					} else {
+						EMIT1_off32(0xa9, K); /* test imm32,%eax */
+					}
+					break;
+				}
+				if (filter[i].jt != 0) {
+					if (filter[i].jf)
+						t_offset += is_near(f_offset) ? 2 : 6;
+					EMIT_COND_JMP(t_op, t_offset);
+					if (filter[i].jf)
+						EMIT_JMP(f_offset);
+					break;
+				}
+				EMIT_COND_JMP(f_op, f_offset);
+				break;
+			default:
+				/* hmm, too complex filter, give up with jit compiler */
+				goto out;
+			}
+			ilen = prog - temp;
+			if (image) {
+				if (unlikely(proglen + ilen > oldproglen)) {
+					pr_err("bpb_jit_compile fatal error\n");
+					kfree(addrs);
+					module_free(NULL, image);
+					return;
+				}
+				memcpy(image + proglen, temp, ilen);
+			}
+			proglen += ilen;
+			addrs[i] = proglen;
+			prog = temp;
+		}
+		/* last bpf instruction is always a RET :
+		 * use it to give the cleanup instruction(s) addr
+		 */
+		cleanup_addr = proglen - 1; /* ret */
+		if (seen)
+			cleanup_addr -= 1; /* leaveq */
+		if (seen & SEEN_XREG)
+			cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
+
+		if (image) {
+			WARN_ON(proglen != oldproglen);
+			break;
+		}
+		if (proglen == oldproglen) {
+			image = module_alloc(max_t(unsigned int,
+						   proglen,
+						   sizeof(struct work_struct)));
+			if (!image)
+				goto out;
+		}
+		oldproglen = proglen;
+	}
+	if (bpf_jit_enable > 1)
+		pr_err("flen=%d proglen=%u pass=%d image=%p\n",
+		       flen, proglen, pass, image);
+
+	if (image) {
+		if (bpf_jit_enable > 1)
+			print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_ADDRESS,
+				       16, 1, image, proglen, false);
+
+		bpf_flush_icache(image, image + proglen);
+
+		fp->bpf_func = (void *)image;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+static void jit_free_defer(struct work_struct *arg)
+{
+	module_free(NULL, arg);
+}
+
+/* run from softirq, we must use a work_struct to call
+ * module_free() from process context
+ */
+void bpf_jit_free(struct sk_filter *fp)
+{
+	if (fp->bpf_func != sk_run_filter) {
+		struct work_struct *work = (struct work_struct *)fp->bpf_func;
+
+		INIT_WORK(work, jit_free_defer);
+		schedule_work(work);
+	}
+}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45266b75409a..4609b85e559d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -135,6 +135,8 @@ struct sk_filter
 {
 	atomic_t		refcnt;
 	unsigned int         	len;	/* Number of filter blocks */
+	unsigned int		(*bpf_func)(const struct sk_buff *skb,
+					    const struct sock_filter *filter);
 	struct rcu_head		rcu;
 	struct sock_filter     	insns[0];
 };
@@ -153,6 +155,80 @@ extern unsigned int sk_run_filter(const struct sk_buff *skb,
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
+
+#ifdef CONFIG_BPF_JIT
+extern void bpf_jit_compile(struct sk_filter *fp);
+extern void bpf_jit_free(struct sk_filter *fp);
+#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
+#else
+static inline void bpf_jit_compile(struct sk_filter *fp)
+{
+}
+static inline void bpf_jit_free(struct sk_filter *fp)
+{
+}
+#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
+#endif
+
+enum {
+	BPF_S_RET_K = 1,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+	/* Ancillary data */
+	BPF_S_ANC_PROTOCOL,
+	BPF_S_ANC_PKTTYPE,
+	BPF_S_ANC_IFINDEX,
+	BPF_S_ANC_NLATTR,
+	BPF_S_ANC_NLATTR_NEST,
+	BPF_S_ANC_MARK,
+	BPF_S_ANC_QUEUE,
+	BPF_S_ANC_HATYPE,
+	BPF_S_ANC_RXHASH,
+	BPF_S_ANC_CPU,
+};
+
 #endif /* __KERNEL__ */
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cb8178ab3c52..364bcf212f71 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2514,6 +2514,7 @@ extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
+extern int		bpf_jit_enable;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d0ae90af0b40..79aafbbf430a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -391,8 +391,8 @@ struct sk_buff {
 
 	__u32			rxhash;
 
+	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
-	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
diff --git a/net/Kconfig b/net/Kconfig
index 79cabf1ee68b..745fb02d2fda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,19 @@ config XPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config HAVE_BPF_JIT
+	bool
+
+config BPF_JIT
+	bool "enable BPF Just In Time compiler"
+	depends on HAVE_BPF_JIT
+	---help---
+	  Berkeley Packet Filter filtering capabilities are normally handled
+	  by an interpreter. This option allows kernel to generate a native
+	  code when filter is loaded in memory. This should speedup
+	  packet sniffing (libpcap/tcpdump). Note : Admin should enable
+	  this feature changing /proc/sys/net/core/bpf_jit_enable
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/filter.c b/net/core/filter.c
index afb8afb066bb..0eb8c4466eaa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,65 +39,6 @@
 #include <linux/filter.h>
 #include <linux/reciprocal_div.h>
 
-enum {
-	BPF_S_RET_K = 1,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-	/* Ancillary data */
-	BPF_S_ANC_PROTOCOL,
-	BPF_S_ANC_PKTTYPE,
-	BPF_S_ANC_IFINDEX,
-	BPF_S_ANC_NLATTR,
-	BPF_S_ANC_NLATTR_NEST,
-	BPF_S_ANC_MARK,
-	BPF_S_ANC_QUEUE,
-	BPF_S_ANC_HATYPE,
-	BPF_S_ANC_RXHASH,
-	BPF_S_ANC_CPU,
-};
-
 /* No hurry in this branch */
 static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 {
@@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
+		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
+	bpf_jit_free(fp);
 	kfree(fp);
 }
 EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 
 	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
+	fp->bpf_func = sk_run_filter;
 
 	err = sk_chk_filter(fp->insns, fp->len);
 	if (err) {
@@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 		return err;
 	}
 
+	bpf_jit_compile(fp);
+
 	old_fp = rcu_dereference_protected(sk->sk_filter,
 					   sock_owned_by_user(sk));
 	rcu_assign_pointer(sk->sk_filter, fp);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 385b6095fdc4..a829e3f60aeb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_BPF_JIT
+	{
+		.procname	= "bpf_jit_enable",
+		.data		= &bpf_jit_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
 	{
 		.procname	= "netdev_tstamp_prequeue",
 		.data		= &netdev_tstamp_prequeue,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5362e96022b..549527bca87a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,7 +538,7 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
-		res = sk_run_filter(skb, filter->insns);
+		res = SK_RUN_FILTER(filter, skb);
 	rcu_read_unlock();
 
 	return res;
-- 
cgit v1.2.3


From 8a850cadca0e387c87a0911a61e99fd66aeb57ec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 28 Apr 2011 11:16:44 +0200
Subject: perf event, x86: Use better stalled cycles metric

Use the UOPS_EXECUTED.*,c=1,i=1 event on Intel CPUs - it is a rather
good indicator of CPU execution stalls, more sensitive and more inclusive
than the 0xa2 resource stalls event (which does not count nearly as many
stall types).

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn2dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 067a48b13a76..1ea94224f62e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,8 +1413,8 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
-		/* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */
-		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2;
+		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
 
 		if (ebx & 0x40) {
 			/*
-- 
cgit v1.2.3


From 20443598d9bdfe3563f901e27fd482a3f5d3d231 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 27 Apr 2011 16:30:52 +0200
Subject: x86: devicetree: Configure IOAPIC pin only once

We use io_apic_setup_irq_pin() in order to configure pin's interrupt
number polarity and type. This is done on every irq_create_of_mapping()
which happens for instance during pci enable calls. Level typed
interrupts are masked by default, edge are unmasked.

On the first ->xlate() call the level interrupt is configured and
masked. The driver calls request_irq() and the line is unmasked. Lets
assume the interrupt line is shared with another device and we call
pci_enable_device() for this device. The ->xlate() configures the pin
again and it is masked. request_irq() does not unmask the line because
it _is_ already unmasked according to its internal state. So the
interrupt will never be unmasked again.

This patch is based on an earlier work by Torben Hohn and solves the
problem by configuring the pin only once. Since all devices must agree
on the same type and polarity there is no point in configuring the pin
more than once.

[ tglx: Split out the ce4100 part into a separate patch ]

Cc: Torben Hohn <torbenh@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/%3C20110427143052.GA15211%40linutronix.de%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/io_apic.h |  2 +-
 arch/x86/kernel/apic/io_apic.c | 10 +++++-----
 arch/x86/kernel/devicetree.c   |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index c4bd267dfc50..a97a240f67f3 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -150,7 +150,7 @@ void setup_IO_APIC_irq_extra(u32 gsi);
 extern void ioapic_and_gsi_init(void);
 extern void ioapic_insert_resources(void);
 
-int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
+int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
 
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 68df09bba92e..45fd33d1fd3a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -128,8 +128,8 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
-				      struct io_apic_irq_attr *attr);
+static int io_apic_setup_irq_pin(unsigned int irq, int node,
+				 struct io_apic_irq_attr *attr);
 
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
@@ -3570,7 +3570,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-int
+static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
 	struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
@@ -3585,8 +3585,8 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 	return ret;
 }
 
-static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
-				      struct io_apic_irq_attr *attr)
+int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+			       struct io_apic_irq_attr *attr)
 {
 	unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
 	int ret;
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 706a9fb46a58..e90f08458e6b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -391,7 +391,7 @@ static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
 
 	set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
 
-	return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
+	return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
 }
 
 static void __init ioapic_add_ofnode(struct device_node *np)
-- 
cgit v1.2.3


From 1ff42c32c7614c2e810ed388fd1ba04a5626b74c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 27 Apr 2011 16:30:52 +0200
Subject: x86: ce4100: Configure IOAPIC pins for USB and SATA to level type

The USB and SATA ioapic interrrupt pins are configured as edge type,
but need to be level type interrupts to work correctly.

[ tglx: Split out from the combo patch ]

Cc: Torben Hohn <torbenh@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/%3C20110427143052.GA15211%40linutronix.de%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/platform/ce4100/falconfalls.dts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index 2d6d226f2b10..e70be38ce039 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -347,7 +347,7 @@
 						   "pciclass0c03";
 
 					reg = <0x16800 0x0 0x0 0x0 0x0>;
-					interrupts = <22 3>;
+					interrupts = <22 1>;
 				};
 
 				usb@d,1 {
@@ -357,7 +357,7 @@
 						   "pciclass0c03";
 
 					reg = <0x16900 0x0 0x0 0x0 0x0>;
-					interrupts = <22 3>;
+					interrupts = <22 1>;
 				};
 
 				sata@e,0 {
@@ -367,7 +367,7 @@
 						   "pciclass0106";
 
 					reg = <0x17000 0x0 0x0 0x0 0x0>;
-					interrupts = <23 3>;
+					interrupts = <23 1>;
 				};
 
 				flash@f,0 {
-- 
cgit v1.2.3


From 83a5d2d1b4cc06ce17b44873e004fc0b55552183 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 12 Mar 2011 03:27:22 +0000
Subject: ARM: Fix .size directive for xscale_dma_a0_map_area

gas used to accept (and ignore?) .size directives which referred to
undefined symbols, as this does.  In binutils 2.21 these are treated
as fatal errors.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 arch/arm/mm/proc-xscale.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index ce233bcbf506..42af97664c9d 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -395,7 +395,7 @@ ENTRY(xscale_dma_a0_map_area)
 	teq	r2, #DMA_TO_DEVICE
 	beq	xscale_dma_clean_range
 	b	xscale_dma_flush_range
-ENDPROC(xscsale_dma_a0_map_area)
+ENDPROC(xscale_dma_a0_map_area)
 
 /*
  *	dma_unmap_area(start, size, dir)
-- 
cgit v1.2.3


From e6a585801b451443480ff66914a522b482457460 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 21:04:28 +0200
Subject: x86: signal: handle_signal() should use set_current_blocked()

This is ugly, but if sigprocmask() needs retarget_shared_pending() then
handle signal should follow this logic. In theory it is newer correct to
add the new signals to current->blocked, the signal handler can sleep/etc
so we should notify other threads in case we block the pending signal and
nobody else has TIF_SIGPENDING.

Of course, this change doesn't make signals faster :/

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Matt Fleming <matt.fleming@linux.intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/signal.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4fd173cd8e57..5a8f5e68bb61 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -682,6 +682,7 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	      sigset_t *oldset, struct pt_regs *regs)
 {
+	sigset_t blocked;
 	int ret;
 
 	/* Are we from a system call? */
@@ -741,12 +742,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked, sig);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+		sigaddset(&blocked, sig);
+	set_current_blocked(&blocked);
 
 	tracehook_signal_handler(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
-- 
cgit v1.2.3


From e9bd3f0faa90084f188830d77723bafe422e486b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 27 Apr 2011 21:09:39 +0200
Subject: x86: signal: sys_rt_sigreturn() should use set_current_blocked()

Normally sys_rt_sigreturn() restores the old current->blocked which was
changed by handle_signal(), and unblocking is always fine.

But the debugger or application itself can change frame->uc_sigmask and
thus we need set_current_blocked()->retarget_shared_pending().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Matt Fleming <matt.fleming@linux.intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/signal.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 5a8f5e68bb61..40a24932a8a1 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -601,10 +601,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 		goto badframe;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
-- 
cgit v1.2.3


From 57d8e02e3cd21bccf2b84b26b42feb79e1f0f83e Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 27 Apr 2011 15:26:51 -0700
Subject: um: mdd support for 64 bit atomic operations

This adds support for 64 bit atomic operations on 32 bit UML systems.  XFS
needs them since 2.6.38.

  $ make ARCH=um SUBARCH=i386
  ...
    LD      .tmp_vmlinux1
  fs/built-in.o: In function `xlog_regrant_reserve_log_space':
  xfs_log.c:(.text+0xd8584): undefined reference to `atomic64_read_386'
  xfs_log.c:(.text+0xd85ac): undefined reference to `cmpxchg8b_emu'
  ...

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=32812

Reported-by: Martin Walch <walch.martin@web.de>
Tested-by: Martin Walch <walch.martin@web.de>
Cc: Martin Walch <walch.martin@web.de>
Cc: <stable@kernel.org> [2.6.38.x 084189a: um: disable CONFIG_CMPXCHG_LOCAL]
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/sys-i386/Makefile          |   2 +-
 arch/um/sys-i386/atomic64_cx8_32.S | 225 +++++++++++++++++++++++++++++++++++++
 2 files changed, 226 insertions(+), 1 deletion(-)
 create mode 100644 arch/um/sys-i386/atomic64_cx8_32.S

(limited to 'arch')

diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 804b28dd0328..b1da91c1b200 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -4,7 +4,7 @@
 
 obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
-	sys_call_table.o tls.o
+	sys_call_table.o tls.o atomic64_cx8_32.o
 
 obj-$(CONFIG_BINFMT_ELF) += elfcore.o
 
diff --git a/arch/um/sys-i386/atomic64_cx8_32.S b/arch/um/sys-i386/atomic64_cx8_32.S
new file mode 100644
index 000000000000..1e901d3d4a95
--- /dev/null
+++ b/arch/um/sys-i386/atomic64_cx8_32.S
@@ -0,0 +1,225 @@
+/*
+ * atomic64_t for 586+
+ *
+ * Copied from arch/x86/lib/atomic64_cx8_32.S
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/dwarf2.h>
+
+.macro SAVE reg
+	pushl_cfi %\reg
+	CFI_REL_OFFSET \reg, 0
+.endm
+
+.macro RESTORE reg
+	popl_cfi %\reg
+	CFI_RESTORE \reg
+.endm
+
+.macro read64 reg
+	movl %ebx, %eax
+	movl %ecx, %edx
+/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
+	LOCK_PREFIX
+	cmpxchg8b (\reg)
+.endm
+
+ENTRY(atomic64_read_cx8)
+	CFI_STARTPROC
+
+	read64 %ecx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_read_cx8)
+
+ENTRY(atomic64_set_cx8)
+	CFI_STARTPROC
+
+1:
+/* we don't need LOCK_PREFIX since aligned 64-bit writes
+ * are atomic on 586 and newer */
+	cmpxchg8b (%esi)
+	jne 1b
+
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_set_cx8)
+
+ENTRY(atomic64_xchg_cx8)
+	CFI_STARTPROC
+
+	movl %ebx, %eax
+	movl %ecx, %edx
+1:
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_xchg_cx8)
+
+.macro addsub_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+	CFI_STARTPROC
+	SAVE ebp
+	SAVE ebx
+	SAVE esi
+	SAVE edi
+
+	movl %eax, %esi
+	movl %edx, %edi
+	movl %ecx, %ebp
+
+	read64 %ebp
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	\ins\()l %esi, %ebx
+	\insc\()l %edi, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%ebp)
+	jne 1b
+
+10:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE edi
+	RESTORE esi
+	RESTORE ebx
+	RESTORE ebp
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+
+addsub_return add add adc
+addsub_return sub sub sbb
+
+.macro incdec_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	\ins\()l $1, %ebx
+	\insc\()l $0, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+10:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE ebx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+
+incdec_return inc add adc
+incdec_return dec sub sbb
+
+ENTRY(atomic64_dec_if_positive_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	subl $1, %ebx
+	sbb $0, %ecx
+	js 2f
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+2:
+	movl %ebx, %eax
+	movl %ecx, %edx
+	RESTORE ebx
+	ret
+	CFI_ENDPROC
+ENDPROC(atomic64_dec_if_positive_cx8)
+
+ENTRY(atomic64_add_unless_cx8)
+	CFI_STARTPROC
+	SAVE ebp
+	SAVE ebx
+/* these just push these two parameters on the stack */
+	SAVE edi
+	SAVE esi
+
+	movl %ecx, %ebp
+	movl %eax, %esi
+	movl %edx, %edi
+
+	read64 %ebp
+1:
+	cmpl %eax, 0(%esp)
+	je 4f
+2:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	addl %esi, %ebx
+	adcl %edi, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%ebp)
+	jne 1b
+
+	movl $1, %eax
+3:
+	addl $8, %esp
+	CFI_ADJUST_CFA_OFFSET -8
+	RESTORE ebx
+	RESTORE ebp
+	ret
+4:
+	cmpl %edx, 4(%esp)
+	jne 2b
+	xorl %eax, %eax
+	jmp 3b
+	CFI_ENDPROC
+ENDPROC(atomic64_add_unless_cx8)
+
+ENTRY(atomic64_inc_not_zero_cx8)
+	CFI_STARTPROC
+	SAVE ebx
+
+	read64 %esi
+1:
+	testl %eax, %eax
+	je 4f
+2:
+	movl %eax, %ebx
+	movl %edx, %ecx
+	addl $1, %ebx
+	adcl $0, %ecx
+	LOCK_PREFIX
+	cmpxchg8b (%esi)
+	jne 1b
+
+	movl $1, %eax
+3:
+	RESTORE ebx
+	ret
+4:
+	testl %edx, %edx
+	jne 2b
+	jmp 3b
+	CFI_ENDPROC
+ENDPROC(atomic64_inc_not_zero_cx8)
-- 
cgit v1.2.3


From 365a0deae2b6743b3263a71871bbd8c9f66ac34c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 27 Apr 2011 15:26:53 -0700
Subject: uml: fix hppfs build

Make HoneyPot ProcFS depend on CONFIG_PROC_FS so that it will build.
Recommended by Christoph Hellwig.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33692

Reported-by: Simon Danner <danner.simon@gmail.com>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/Kconfig.um | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um
index 90a438acbfaf..b5e675e370c6 100644
--- a/arch/um/Kconfig.um
+++ b/arch/um/Kconfig.um
@@ -47,7 +47,7 @@ config HOSTFS
 
 config HPPFS
 	tristate "HoneyPot ProcFS (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on EXPERIMENTAL && PROC_FS
 	help
 	  hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
 	  entries to be overridden, removed, or fabricated from the host.
-- 
cgit v1.2.3


From 534e3adbd22efa327e6ff27cf2d8ebaad8382ecd Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 27 Apr 2011 15:26:54 -0700
Subject: um: adjust current_thread_info() for newer gcc versions

In some cases gcc >= 4.5.2 will optimize away current_thread_info().  To
prevent gcc from doing so the stack address has to be obtained via inline
asm.

Signed-off-by: Richard Weinberger <richard@nod.at>
Acked-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/include/asm/thread_info.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index e2cf786bda0a..5bd1bad33fab 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -49,7 +49,10 @@ static inline struct thread_info *current_thread_info(void)
 {
 	struct thread_info *ti;
 	unsigned long mask = THREAD_SIZE - 1;
-	ti = (struct thread_info *) (((unsigned long) &ti) & ~mask);
+	void *p;
+
+	asm volatile ("" : "=r" (p) : "0" (&ti));
+	ti = (struct thread_info *) (((unsigned long)p) & ~mask);
 	return ti;
 }
 
-- 
cgit v1.2.3


From 38ade3a1fa0421c12627c7b48c33e89414fc9b76 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 29 Apr 2011 00:36:21 +0200
Subject: shmobile: Use power domains for platform runtime PM

shmobile platforms replace the runtime PM callbacks of the platform
bus type with their own routines, but this means that the callbacks
are replaced system-wide.  This may not be the right approach if the
platform devices on the system are not of the same type (e.g. some
of them belong to an SoC and the others are located in separate
chips), because in those cases they may require different handling.
Thus it is better to use power domains to override the platform bus
type's PM handling, as it generally is possible to use different
power domains for devices with different PM requirements.

Define a default power domain for shmobile in both the SH and ARM
falvors and use it to override the platform bus type's PM callbacks.
Since the suspend and hibernate callbacks of the new "default" power
domains need to be the same and the platform bus type's suspend and
hibernate callbacks for the time being, export those callbacks so
that can be used outside of the platform bus type code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c      | 25 +++++++++++++++++-------
 arch/sh/kernel/cpu/shmobile/pm_runtime.c | 33 +++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 94912d3944d3..12bb504c7f49 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -66,11 +66,11 @@ static void platform_pm_runtime_bug(struct device *dev,
 		dev_err(dev, "runtime pm suspend before resume\n");
 }
 
-int platform_pm_runtime_suspend(struct device *dev)
+static int default_platform_runtime_suspend(struct device *dev)
 {
 	struct pm_runtime_data *prd = __to_prd(dev);
 
-	dev_dbg(dev, "platform_pm_runtime_suspend()\n");
+	dev_dbg(dev, "%s()\n", __func__);
 
 	platform_pm_runtime_bug(dev, prd);
 
@@ -82,11 +82,11 @@ int platform_pm_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-int platform_pm_runtime_resume(struct device *dev)
+static int default_platform_runtime_resume(struct device *dev)
 {
 	struct pm_runtime_data *prd = __to_prd(dev);
 
-	dev_dbg(dev, "platform_pm_runtime_resume()\n");
+	dev_dbg(dev, "%s()\n", __func__);
 
 	platform_pm_runtime_init(dev, prd);
 
@@ -98,12 +98,21 @@ int platform_pm_runtime_resume(struct device *dev)
 	return 0;
 }
 
-int platform_pm_runtime_idle(struct device *dev)
+static int default_platform_runtime_idle(struct device *dev)
 {
 	/* suspend synchronously to disable clocks immediately */
 	return pm_runtime_suspend(dev);
 }
 
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = default_platform_runtime_suspend,
+		.runtime_resume = default_platform_runtime_resume,
+		.runtime_idle = default_platform_runtime_idle,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
+
 static int platform_bus_notify(struct notifier_block *nb,
 			       unsigned long action, void *data)
 {
@@ -114,10 +123,12 @@ static int platform_bus_notify(struct notifier_block *nb,
 
 	if (action == BUS_NOTIFY_BIND_DRIVER) {
 		prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL);
-		if (prd)
+		if (prd) {
 			devres_add(dev, prd);
-		else
+			dev->pwr_domain = &default_power_domain;
+		} else {
 			dev_err(dev, "unable to alloc memory for runtime pm\n");
+		}
 	}
 
 	return 0;
diff --git a/arch/sh/kernel/cpu/shmobile/pm_runtime.c b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
index 6dcb8166a64d..22db127afa7b 100644
--- a/arch/sh/kernel/cpu/shmobile/pm_runtime.c
+++ b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
@@ -139,7 +139,7 @@ void platform_pm_runtime_suspend_idle(void)
 	queue_work(pm_wq, &hwblk_work);
 }
 
-int platform_pm_runtime_suspend(struct device *dev)
+static int default_platform_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pdev_archdata *ad = &pdev->archdata;
@@ -147,7 +147,7 @@ int platform_pm_runtime_suspend(struct device *dev)
 	int hwblk = ad->hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_suspend() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -183,20 +183,20 @@ int platform_pm_runtime_suspend(struct device *dev)
 	mutex_unlock(&ad->mutex);
 
 out:
-	dev_dbg(dev, "platform_pm_runtime_suspend() [%d] returns %d\n",
-		hwblk, ret);
+	dev_dbg(dev, "%s() [%d] returns %d\n",
+		 __func__, hwblk, ret);
 
 	return ret;
 }
 
-int platform_pm_runtime_resume(struct device *dev)
+static int default_platform_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct pdev_archdata *ad = &pdev->archdata;
 	int hwblk = ad->hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_resume() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -228,19 +228,19 @@ int platform_pm_runtime_resume(struct device *dev)
 	 */
 	mutex_unlock(&ad->mutex);
 out:
-	dev_dbg(dev, "platform_pm_runtime_resume() [%d] returns %d\n",
-		hwblk, ret);
+	dev_dbg(dev, "%s() [%d] returns %d\n",
+		__func__, hwblk, ret);
 
 	return ret;
 }
 
-int platform_pm_runtime_idle(struct device *dev)
+static int default_platform_runtime_idle(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	int hwblk = pdev->archdata.hwblk_id;
 	int ret = 0;
 
-	dev_dbg(dev, "platform_pm_runtime_idle() [%d]\n", hwblk);
+	dev_dbg(dev, "%s() [%d]\n", __func__, hwblk);
 
 	/* ignore off-chip platform devices */
 	if (!hwblk)
@@ -252,10 +252,19 @@ int platform_pm_runtime_idle(struct device *dev)
 	/* suspend synchronously to disable clocks immediately */
 	ret = pm_runtime_suspend(dev);
 out:
-	dev_dbg(dev, "platform_pm_runtime_idle() [%d] done!\n", hwblk);
+	dev_dbg(dev, "%s() [%d] done!\n", __func__, hwblk);
 	return ret;
 }
 
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = default_platform_runtime_suspend,
+		.runtime_resume = default_platform_runtime_resume,
+		.runtime_idle = default_platform_runtime_idle,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
+
 static int platform_bus_notify(struct notifier_block *nb,
 			       unsigned long action, void *data)
 {
@@ -276,6 +285,7 @@ static int platform_bus_notify(struct notifier_block *nb,
 		hwblk_disable(hwblk_info, hwblk);
 		/* make sure driver re-inits itself once */
 		__set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags);
+		dev->pwr_domain = &default_power_domain;
 		break;
 	/* TODO: add BUS_NOTIFY_BIND_DRIVER and increase idle count */
 	case BUS_NOTIFY_BOUND_DRIVER:
@@ -289,6 +299,7 @@ static int platform_bus_notify(struct notifier_block *nb,
 		__set_bit(PDEV_ARCHDATA_FLAG_INIT, &pdev->archdata.flags);
 		break;
 	case BUS_NOTIFY_DEL_DEVICE:
+		dev->pwr_domain = NULL;
 		break;
 	}
 	return 0;
-- 
cgit v1.2.3


From 638080c37ae08fd0c44cec13d7948ca5385ae851 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@ti.com>
Date: Fri, 29 Apr 2011 00:36:42 +0200
Subject: OMAP2+ / PM: move runtime PM implementation to use device power
 domains

In commit 7538e3db6e015e890825fbd9f8659952896ddd5b (PM: add support
for device power domains) a better way for handling platform-specific
power hooks was introduced.

Rather than using the platform_bus dev_pm_ops overrides
(platform_bus_set_pm_ops()), this patch moves the OMAP runtime PM
implementation over to using device power domains.

Since OMAP is the only user of platform_bus_set_pm_ops(), that
interface can be removed (and will be in a forthcoming patch.)

[rjw: Rebased on top of a previous change modifying the handling of
 power domains by the PM core so that power domain callbacks take
 precendence over subsystem-level PM callbacks.]

Signed-off-by: Kevin Hilman <khilman@ti.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-omap2/Makefile     |  6 +--
 arch/arm/mach-omap2/pm_bus.c     | 85 ----------------------------------------
 arch/arm/plat-omap/omap_device.c | 23 +++++++++++
 3 files changed, 26 insertions(+), 88 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/pm_bus.c

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index a45cd6409686..b3535842f4f8 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -59,10 +59,10 @@ endif
 # Power Management
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_ARCH_OMAP2)		+= pm24xx.o
-obj-$(CONFIG_ARCH_OMAP2)		+= sleep24xx.o pm_bus.o
+obj-$(CONFIG_ARCH_OMAP2)		+= sleep24xx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= pm34xx.o sleep34xx.o \
-					   cpuidle34xx.o pm_bus.o
-obj-$(CONFIG_ARCH_OMAP4)		+= pm44xx.o pm_bus.o
+					   cpuidle34xx.o
+obj-$(CONFIG_ARCH_OMAP4)		+= pm44xx.o
 obj-$(CONFIG_PM_DEBUG)			+= pm-debug.o
 obj-$(CONFIG_OMAP_SMARTREFLEX)          += sr_device.o smartreflex.o
 obj-$(CONFIG_OMAP_SMARTREFLEX_CLASS3)	+= smartreflex-class3.o
diff --git a/arch/arm/mach-omap2/pm_bus.c b/arch/arm/mach-omap2/pm_bus.c
deleted file mode 100644
index 5acd2ab298b1..000000000000
--- a/arch/arm/mach-omap2/pm_bus.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Runtime PM support code for OMAP
- *
- * Author: Kevin Hilman, Deep Root Systems, LLC
- *
- * Copyright (C) 2010 Texas Instruments, Inc.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/pm_runtime.h>
-#include <linux/platform_device.h>
-#include <linux/mutex.h>
-
-#include <plat/omap_device.h>
-#include <plat/omap-pm.h>
-
-#ifdef CONFIG_PM_RUNTIME
-static int omap_pm_runtime_suspend(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int r, ret = 0;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	ret = pm_generic_runtime_suspend(dev);
-
-	if (!ret && dev->parent == &omap_device_parent) {
-		r = omap_device_idle(pdev);
-		WARN_ON(r);
-	}
-
-	return ret;
-};
-
-static int omap_pm_runtime_resume(struct device *dev)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int r;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	if (dev->parent == &omap_device_parent) {
-		r = omap_device_enable(pdev);
-		WARN_ON(r);
-	}
-
-	return pm_generic_runtime_resume(dev);
-};
-#else
-#define omap_pm_runtime_suspend NULL
-#define omap_pm_runtime_resume NULL
-#endif /* CONFIG_PM_RUNTIME */
-
-static int __init omap_pm_runtime_init(void)
-{
-	const struct dev_pm_ops *pm;
-	struct dev_pm_ops *omap_pm;
-
-	pm = platform_bus_get_pm_ops();
-	if (!pm) {
-		pr_err("%s: unable to get dev_pm_ops from platform_bus\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL);
-	if (!omap_pm) {
-		pr_err("%s: unable to alloc memory for new dev_pm_ops\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	omap_pm->runtime_suspend = omap_pm_runtime_suspend;
-	omap_pm->runtime_resume = omap_pm_runtime_resume;
-
-	platform_bus_set_pm_ops(omap_pm);
-
-	return 0;
-}
-core_initcall(omap_pm_runtime_init);
diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c
index 9bbda9acb73b..a37b8eb65b76 100644
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -536,6 +536,28 @@ int omap_early_device_register(struct omap_device *od)
 	return 0;
 }
 
+static int _od_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return omap_device_idle(pdev);
+}
+
+static int _od_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return omap_device_enable(pdev);
+}
+
+static struct dev_power_domain omap_device_power_domain = {
+	.ops = {
+		.runtime_suspend = _od_runtime_suspend,
+		.runtime_resume = _od_runtime_resume,
+		USE_PLATFORM_PM_SLEEP_OPS
+	}
+};
+
 /**
  * omap_device_register - register an omap_device with one omap_hwmod
  * @od: struct omap_device * to register
@@ -549,6 +571,7 @@ int omap_device_register(struct omap_device *od)
 	pr_debug("omap_device: %s: registering\n", od->pdev.name);
 
 	od->pdev.dev.parent = &omap_device_parent;
+	od->pdev.dev.pwr_domain = &omap_device_power_domain;
 	return platform_device_register(&od->pdev);
 }
 
-- 
cgit v1.2.3


From 1d2b71f61b6a10216274e27b717becf9ae101fc7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 29 Apr 2011 00:36:53 +0200
Subject: PM / Runtime: Add subsystem data field to struct dev_pm_info

Some subsystems need to attach PM-related data to struct device and
they need to use devres for this purpose.  For their convenience
and to make code more straightforward, add a new field called
subsys_data to struct dev_pm_info and let subsystems use it for
attaching PM-related information to devices.

Convert the ARM shmobile platform to using the new field.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c | 34 +++++++++++++++++-----------------
 include/linux/pm.h                  |  1 +
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 12bb504c7f49..30bbe9a99ae1 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/bitmap.h>
+#include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
 #define BIT_ONCE 0
@@ -29,22 +30,9 @@ struct pm_runtime_data {
 	struct clk *clk;
 };
 
-static void __devres_release(struct device *dev, void *res)
-{
-	struct pm_runtime_data *prd = res;
-
-	dev_dbg(dev, "__devres_release()\n");
-
-	if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-		clk_disable(prd->clk);
-
-	if (test_bit(BIT_ACTIVE, &prd->flags))
-		clk_put(prd->clk);
-}
-
 static struct pm_runtime_data *__to_prd(struct device *dev)
 {
-	return devres_find(dev, __devres_release, NULL, NULL);
+	return dev ? dev->power.subsys_data : NULL;
 }
 
 static void platform_pm_runtime_init(struct device *dev,
@@ -121,14 +109,26 @@ static int platform_bus_notify(struct notifier_block *nb,
 
 	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
 
-	if (action == BUS_NOTIFY_BIND_DRIVER) {
-		prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL);
+	switch (action) {
+	case BUS_NOTIFY_BIND_DRIVER:
+		prd = kzalloc(sizeof(*prd), GFP_KERNEL);
 		if (prd) {
-			devres_add(dev, prd);
+			dev->power.subsys_data = prd;
 			dev->pwr_domain = &default_power_domain;
 		} else {
 			dev_err(dev, "unable to alloc memory for runtime pm\n");
 		}
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		prd = __to_prd(dev);
+		if (prd) {
+			if (test_bit(BIT_CLK_ENABLED, &prd->flags))
+				clk_disable(prd->clk);
+
+			if (test_bit(BIT_ACTIVE, &prd->flags))
+				clk_put(prd->clk);
+		}
+		break;
 	}
 
 	return 0;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..f4167d0faa67 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -460,6 +460,7 @@ struct dev_pm_info {
 	unsigned long		active_jiffies;
 	unsigned long		suspended_jiffies;
 	unsigned long		accounting_timestamp;
+	void			*subsys_data;  /* Owned by the subsystem. */
 #endif
 };
 
-- 
cgit v1.2.3


From cf3cc1aa9b6d0bf1750143af65829f4368d77492 Mon Sep 17 00:00:00 2001
From: Viktor Rosendahl <viktor.rosendahl@nokia.com>
Date: Mon, 28 Mar 2011 18:56:05 +0300
Subject: kprobes/arm: Fix ldrd/strd emulation

Currently emulate_ldrd and emulate_strd don't even have the adjustment
of the PC value, so in case of Rn == PC, it will not update the PC
incorrectly but instead load/store from the wrong address.  Let's add
both the adjustment of the PC value and the check for PC == PC.

Signed-off-by: Viktor Rosendahl <viktor.rosendahl@nokia.com>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 23891317dc4b..3b0cf90cb449 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -540,9 +540,12 @@ static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm = insn & 0xf;  /* rm may be invalid, don't care. */
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
 
 	/* Not following the C calling convention here, so need asm(). */
 	__asm__ __volatile__ (
@@ -554,29 +557,36 @@ static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 		"str	r0, %[rn]	\n\t"	/* in case of writeback */
 		"str	r2, %[rd0]	\n\t"
 		"str	r3, %[rd1]	\n\t"
-		: [rn]  "+m" (regs->uregs[rn]),
+		: [rn]  "+m" (rnv),
 		  [rd0] "=m" (regs->uregs[rd]),
 		  [rd1] "=m" (regs->uregs[rd+1])
-		: [rm]   "m" (regs->uregs[rm]),
+		: [rm]   "m" (rmv),
 		  [cpsr] "r" (regs->ARM_cpsr),
 		  [i_fn] "r" (i_fn)
 		: "r0", "r1", "r2", "r3", "lr", "cc"
 	);
+	if (rn != 15)
+		regs->uregs[rn] = rnv;  /* Save Rn in case of writeback. */
 }
 
 static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
 	int rd = (insn >> 12) & 0xf;
 	int rn = (insn >> 16) & 0xf;
 	int rm  = insn & 0xf;
-	long rnv = regs->uregs[rn];
-	long rmv = regs->uregs[rm];  /* rm/rmv may be invalid, don't care. */
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
+	/* rm/rmv may be invalid, don't care. */
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rnv_wb;
 
-	regs->uregs[rn] = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
+	rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
 					       regs->uregs[rd+1],
 					       regs->ARM_cpsr, i_fn);
+	if (rn != 15)
+		regs->uregs[rn] = rnv_wb;  /* Save Rn in case of writeback. */
 }
 
 static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs)
-- 
cgit v1.2.3


From 073090cb701148396b1130be81f8ac84a41f196d Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Wed, 6 Apr 2011 11:17:09 +0100
Subject: ARM: kprobes: Fix probing of conditionally executed instructions

When a kprobe is placed onto conditionally executed ARM instructions,
many of the emulation routines used to single step them produce corrupt
register results. Rather than fix all of these cases we modify the
framework which calls them to test the relevant condition flags and, if
the test fails, skip calling the emulation code.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/include/asm/kprobes.h   |  3 ++
 arch/arm/kernel/kprobes-decode.c | 91 ++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/kprobes.c        |  3 +-
 3 files changed, 96 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index bb8a19bd5822..e46bdd0097eb 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -39,10 +39,13 @@ typedef u32 kprobe_opcode_t;
 struct kprobe;
 typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *);
 
+typedef unsigned long (kprobe_check_cc)(unsigned long);
+
 /* Architecture specific copy of original instruction. */
 struct arch_specific_insn {
 	kprobe_opcode_t		*insn;
 	kprobe_insn_handler_t	*insn_handler;
+	kprobe_check_cc		*insn_check_cc;
 };
 
 struct prev_kprobe {
diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 3b0cf90cb449..2e84169b9e91 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1396,6 +1396,96 @@ space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	return INSN_GOOD;
 }
 
+static unsigned long __kprobes __check_eq(unsigned long cpsr)
+{
+	return cpsr & PSR_Z_BIT;
+}
+
+static unsigned long __kprobes __check_ne(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_Z_BIT;
+}
+
+static unsigned long __kprobes __check_cs(unsigned long cpsr)
+{
+	return cpsr & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_cc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_mi(unsigned long cpsr)
+{
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_pl(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_vs(unsigned long cpsr)
+{
+	return cpsr & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_vc(unsigned long cpsr)
+{
+	return (~cpsr) & PSR_V_BIT;
+}
+
+static unsigned long __kprobes __check_hi(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return cpsr & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ls(unsigned long cpsr)
+{
+	cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (~cpsr) & PSR_C_BIT;
+}
+
+static unsigned long __kprobes __check_ge(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return (~cpsr) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_lt(unsigned long cpsr)
+{
+	cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	return cpsr & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_gt(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return (~temp) & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_le(unsigned long cpsr)
+{
+	unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+	temp |= (cpsr << 1);			 /* PSR_N_BIT |= PSR_Z_BIT */
+	return temp & PSR_N_BIT;
+}
+
+static unsigned long __kprobes __check_al(unsigned long cpsr)
+{
+	return true;
+}
+
+static kprobe_check_cc * const condition_checks[16] = {
+	&__check_eq, &__check_ne, &__check_cs, &__check_cc,
+	&__check_mi, &__check_pl, &__check_vs, &__check_vc,
+	&__check_hi, &__check_ls, &__check_ge, &__check_lt,
+	&__check_gt, &__check_le, &__check_al, &__check_al
+};
+
 /* Return:
  *   INSN_REJECTED     If instruction is one not allowed to kprobe,
  *   INSN_GOOD         If instruction is supported and uses instruction slot,
@@ -1411,6 +1501,7 @@ space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 enum kprobe_insn __kprobes
 arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	asi->insn_check_cc = condition_checks[insn>>28];
 	asi->insn[1] = KPROBE_RETURN_INSTRUCTION;
 
 	if ((insn & 0xf0000000) == 0xf0000000) {
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 2ba7deb3072e..1656c87501c0 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -134,7 +134,8 @@ static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs,
 				 struct kprobe_ctlblk *kcb)
 {
 	regs->ARM_pc += 4;
-	p->ainsn.insn_handler(p, regs);
+	if (p->ainsn.insn_check_cc(regs->ARM_cpsr))
+		p->ainsn.insn_handler(p, regs);
 }
 
 /*
-- 
cgit v1.2.3


From a539f5d46c868cb1f37b92e62e040b400571aed4 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Wed, 6 Apr 2011 11:17:10 +0100
Subject: ARM: kprobes: Remove redundant condition checks from simulation
 routines

Now we have the framework code handling conditionally executed
instructions we can remove redundant checks in individual simulation
routines.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 30 +++---------------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 2e84169b9e91..f52fac0c59f0 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -71,10 +71,6 @@
 #define PSR_fs	(PSR_f|PSR_s)
 
 #define KPROBE_RETURN_INSTRUCTION	0xe1a0f00e	/* mov pc, lr */
-#define SET_R0_TRUE_INSTRUCTION		0xe3a00001	/* mov	r0, #1 */
-
-#define	truecc_insn(insn)	(((insn) & 0xf0000000) | \
-				 (SET_R0_TRUE_INSTRUCTION & 0x0fffffff))
 
 typedef long (insn_0arg_fn_t)(void);
 typedef long (insn_1arg_fn_t)(long);
@@ -419,14 +415,10 @@ insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr,
 
 static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	long iaddr = (long)p->addr;
 	int disp  = branch_displacement(insn);
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	if (insn & (1 << 24))
 		regs->ARM_lr = iaddr + 4;
 
@@ -446,14 +438,10 @@ static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	int rm = insn & 0xf;
 	long rmv = regs->uregs[rm];
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	if (insn & (1 << 5))
 		regs->ARM_lr = (long)p->addr + 4;
 
@@ -465,7 +453,6 @@ static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
 	kprobe_opcode_t insn = p->opcode;
 	int rn = (insn >> 16) & 0xf;
 	int lbit = insn & (1 << 20);
@@ -476,9 +463,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 	int reg_bit_vector;
 	int reg_count;
 
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	reg_count = 0;
 	reg_bit_vector = insn & 0xffff;
 	while (reg_bit_vector) {
@@ -510,11 +494,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 
 static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-
-	if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn))
-		return;
-
 	regs->ARM_pc = (long)p->addr + str_pc_offset;
 	simulate_ldm1stm1(p, regs);
 	regs->ARM_pc = (long)p->addr + 4;
@@ -1056,9 +1035,8 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
 		/* BX     : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
 		if ((insn & 0x0ff000d0) == 0x01200010) {
-			asi->insn[0] = truecc_insn(insn);
 			asi->insn_handler = simulate_blx2bx;
-			return INSN_GOOD;
+			return INSN_GOOD_NO_SLOT;
 		}
 
 		/* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */
@@ -1333,10 +1311,9 @@ space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
 	/* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */
-	asi->insn[0] = truecc_insn(insn);
 	asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */
 				simulate_stm1_pc : simulate_ldm1stm1;
-	return INSN_GOOD;
+	return INSN_GOOD_NO_SLOT;
 }
 
 static enum kprobe_insn __kprobes
@@ -1344,9 +1321,8 @@ space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* B  : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */
 	/* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */
-	asi->insn[0] = truecc_insn(insn);
 	asi->insn_handler = simulate_bbl;
-	return INSN_GOOD;
+	return INSN_GOOD_NO_SLOT;
 }
 
 static enum kprobe_insn __kprobes
-- 
cgit v1.2.3


From ad111ce46674a473370d302325db8f418ac4fe92 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Wed, 6 Apr 2011 11:17:11 +0100
Subject: ARM: kprobes: Fix emulation of CMP, CMN, TST and TEQ instructions.

Probing these instructions was corrupting R0 because the emulation code
didn't account for the fact that they don't write a result to a
register.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 55 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index f52fac0c59f0..ee29d3350437 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -807,6 +807,17 @@ emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs)
 	regs->uregs[rd] = insnslot_1arg_rwflags(rnv, &regs->ARM_cpsr, i_fn);
 }
 
+static void __kprobes
+emulate_alu_tests_imm(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	int rn = (insn >> 16) & 0xf;
+	long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn];
+
+	insnslot_1arg_rwflags(rnv, &regs->ARM_cpsr, i_fn);
+}
+
 static void __kprobes
 emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs)
 {
@@ -843,6 +854,22 @@ emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs)
 		insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
 }
 
+static void __kprobes
+emulate_alu_tests(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	long ppc = (long)p->addr + 8;
+	int rn = (insn >> 16) & 0xf;
+	int rs = (insn >> 8) & 0xf;	/* rs/rsv may be invalid, don't care. */
+	int rm = insn & 0xf;
+	long rnv = (rn == 15) ? ppc : regs->uregs[rn];
+	long rmv = (rm == 15) ? ppc : regs->uregs[rm];
+	long rsv = regs->uregs[rs];
+
+	insnslot_3arg_rwflags(rnv, rmv, rsv, &regs->ARM_cpsr, i_fn);
+}
+
 static enum kprobe_insn __kprobes
 prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
@@ -1142,8 +1169,20 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		insn |= 0x00000200;     /* Rs = r2 */
 	}
 	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
+
+	if ((insn & 0x0f900000) == 0x01100000) {
+		/*
+		 * TST : cccc 0001 0001 xxxx xxxx xxxx xxxx xxxx
+		 * TEQ : cccc 0001 0011 xxxx xxxx xxxx xxxx xxxx
+		 * CMP : cccc 0001 0101 xxxx xxxx xxxx xxxx xxxx
+		 * CMN : cccc 0001 0111 xxxx xxxx xxxx xxxx xxxx
+		 */
+		asi->insn_handler = emulate_alu_tests;
+	} else {
+		/* ALU ops which write to Rd */
+		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
 				emulate_alu_rwflags : emulate_alu_rflags;
+	}
 	return INSN_GOOD;
 }
 
@@ -1170,8 +1209,20 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	 */
 	insn &= 0xffff0fff;	/* Rd = r0 */
 	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
+
+	if ((insn & 0x0f900000) == 0x03100000) {
+		/*
+		 * TST : cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx
+		 * TEQ : cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx
+		 * CMP : cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx
+		 * CMN : cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx
+		 */
+		asi->insn_handler = emulate_alu_tests_imm;
+	} else {
+		/* ALU ops which write to Rd */
+		asi->insn_handler = (insn & (1 << 20)) ?  /* S-bit */
 			emulate_alu_imm_rwflags : emulate_alu_imm_rflags;
+	}
 	return INSN_GOOD;
 }
 
-- 
cgit v1.2.3


From 896a74e19d0131413a96502429994bc8e6bbbe5a Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Wed, 6 Apr 2011 11:17:12 +0100
Subject: ARM: kprobes: Fix emulation of Data-processing (immediate)
 instructions

Emulation of instructions like "ADD rd, rn, #<const>" would result in a
corrupted value for rd.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index ee29d3350437..baf053ea96eb 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1207,7 +1207,7 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	 * *S (bit 20) updates condition codes
 	 * ADC/SBC/RSC reads the C flag
 	 */
-	insn &= 0xffff0fff;	/* Rd = r0 */
+	insn &= 0xfff00fff;	/* Rn = r0 and Rd = r0 */
 	asi->insn[0] = insn;
 
 	if ((insn & 0x0f900000) == 0x03100000) {
-- 
cgit v1.2.3


From 51468ea91efad9c7e6dbae43cd8bdc423ec61709 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Thu, 7 Apr 2011 13:25:15 +0100
Subject: ARM: kprobes: Reject probing MRS instructions which read SPSR

We need to reject probing of instructions which read SPSR because
we can't handle this as the value in SPSR is lost when the exception
handler for the probe breakpoint first runs.

This patch also fixes the bitmask for MRS instructions decoding to
include checking bits 5-7.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index baf053ea96eb..e5bc576ba3fb 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1026,14 +1026,16 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */
 	if ((insn & 0x0f900010) == 0x01000000) {
 
-		/* BXJ  : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
-		/* MSR  : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+		/* BXJ      : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
+		/* MSR      : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+		/* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
 		if ((insn & 0x0ff000f0) == 0x01200020 ||
-		    (insn & 0x0fb000f0) == 0x01200000)
+		    (insn & 0x0fb000f0) == 0x01200000 ||
+		    (insn & 0x0ff000f0) == 0x01400000)
 			return INSN_REJECTED;
 
-		/* MRS : cccc 0001 0x00 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0fb00010) == 0x01000000)
+		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
+		if ((insn & 0x0ff000f0) == 0x01000000)
 			return prep_emulate_rd12(insn, asi);
 
 		/* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
-- 
cgit v1.2.3


From c412aba2a1243192a4d53736805a96bdda915608 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Thu, 7 Apr 2011 13:25:16 +0100
Subject: ARM: kprobes: Fix emulation of MRS instruction

The MRS instruction should set mode and interrupt bits in the read value
so it is simpler to use a new simulation routine (simulate_mrs) rather
than some modified emulation.

prep_emulate_rd12 is now unused and removed.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index e5bc576ba3fb..4ab83f4c47cf 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -451,6 +451,14 @@ static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs)
 		regs->ARM_cpsr |= PSR_T_BIT;
 }
 
+static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs)
+{
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	unsigned long mask = 0xf8ff03df; /* Mask out execution state */
+	regs->uregs[rd] = regs->ARM_cpsr & mask;
+}
+
 static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs)
 {
 	kprobe_opcode_t insn = p->opcode;
@@ -895,15 +903,6 @@ prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	return INSN_GOOD;
 }
 
-static enum kprobe_insn __kprobes
-prep_emulate_rd12(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_rd12;
-	return INSN_GOOD;
-}
-
 static enum kprobe_insn __kprobes
 prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn,
 				struct arch_specific_insn *asi)
@@ -1035,8 +1034,10 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return INSN_REJECTED;
 
 		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0ff000f0) == 0x01000000)
-			return prep_emulate_rd12(insn, asi);
+		if ((insn & 0x0ff000f0) == 0x01000000) {
+			asi->insn_handler = simulate_mrs;
+			return INSN_GOOD_NO_SLOT;
+		}
 
 		/* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
 		if ((insn & 0x0ff00090) == 0x01400080)
-- 
cgit v1.2.3


From 983ebd9365096c3386f6ed0978333e15f66024f5 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Thu, 7 Apr 2011 13:25:17 +0100
Subject: ARM: kprobes: Reject probing of instructions which write to PC
 unpredictably.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 4ab83f4c47cf..6d09db9c84ab 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -68,6 +68,8 @@
 
 #define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25)
 
+#define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos))
+
 #define PSR_fs	(PSR_f|PSR_s)
 
 #define KPROBE_RETURN_INSTRUCTION	0xe1a0f00e	/* mov pc, lr */
@@ -897,6 +899,9 @@ prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 static enum kprobe_insn __kprobes
 prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
 	asi->insn[0] = insn;
 	asi->insn_handler = emulate_rd12rm0;
@@ -907,6 +912,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn,
 				struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
 	insn |= 0x00000001;	/* Rm = r1 */
 	asi->insn[0] = insn;
@@ -918,6 +926,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn,
 			       struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff0f0f0;	/* Rd = r0, Rs = r0 */
 	insn |= 0x00000001;	/* Rm = r1          */
 	asi->insn[0] = insn;
@@ -929,6 +940,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn,
 				   struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16))
+		return INSN_REJECTED;	/* Rd is PC */
+
 	insn &= 0xfff000f0;	/* Rd = r0, Rn = r0 */
 	insn |= 0x00000102;	/* Rs = r1, Rm = r2 */
 	asi->insn[0] = insn;
@@ -940,6 +954,9 @@ static enum kprobe_insn __kprobes
 prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
 				       struct arch_specific_insn *asi)
 {
+	if (is_r15(insn, 16) || is_r15(insn, 12))
+		return INSN_REJECTED;	/* RdHi or RdLo is PC */
+
 	insn &= 0xfff000f0;	/* RdHi = r0, RdLo = r1 */
 	insn |= 0x00001203;	/* Rs = r2, Rm = r3 */
 	asi->insn[0] = insn;
@@ -1035,6 +1052,8 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
 		if ((insn & 0x0ff000f0) == 0x01000000) {
+			if (is_r15(insn, 12))
+				return INSN_REJECTED;	/* Rd is PC */
 			asi->insn_handler = simulate_mrs;
 			return INSN_GOOD_NO_SLOT;
 		}
@@ -1065,6 +1084,8 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
 		/* BX     : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
 		if ((insn & 0x0ff000d0) == 0x01200010) {
+			if ((insn & 0x0ff000ff) == 0x0120003f)
+				return INSN_REJECTED; /* BLX pc */
 			asi->insn_handler = simulate_blx2bx;
 			return INSN_GOOD_NO_SLOT;
 		}
@@ -1234,6 +1255,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */
 	if ((insn & 0x0ff000f0) == 0x068000b0) {
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		insn &= 0xfff00ff0;	/* Rd = r0, Rn = r0 */
 		insn |= 0x00000001;	/* Rm = r1 */
 		asi->insn[0] = insn;
@@ -1247,6 +1270,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */
 	if ((insn & 0x0fa00030) == 0x06a00010 ||
 	    (insn & 0x0fb000f0) == 0x06a00030) {
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		insn &= 0xffff0ff0;	/* Rd = r0, Rm = r0 */
 		asi->insn[0] = insn;
 		asi->insn_handler = emulate_sat;
@@ -1384,6 +1409,9 @@ space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
 	/* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
+	if (is_r15(insn, 16) || is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rn or Rd is PC */
+
 	insn &= 0xfff00fff;
 	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
 	asi->insn[0] = insn;
-- 
cgit v1.2.3


From 75539aea4cb96823171cabac7391f1a26fbf9c1b Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Thu, 7 Apr 2011 13:25:18 +0100
Subject: ARM: kprobes: Fix error in comment

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 6d09db9c84ab..54c175906787 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1069,7 +1069,7 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
 
 		/* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */
-		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 0x00 xxxx : Q */
+		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */
 		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
 
 	}
-- 
cgit v1.2.3


From ba48d40713ae5afe758dd5525a0f8740cce71d22 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Thu, 7 Apr 2011 13:25:19 +0100
Subject: ARM: kprobes: Reject probing of undefined multiply instructions

The instructions space for 'Multiply and multiply-accumulate'
instructions contains some undefined patterns. We need to reject
probing of these because they may in future become defined and the
kprobes code may then emulate them faultily.

This has already happened with the new MLS instruction which this patch
also adds correct decoding for as well as tightening up other decoding
tests. (Before this patch the wrong emulation routine was being called
for MLS though it still produced correct results.)

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 54c175906787..8f3f03e46ace 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1102,13 +1102,16 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	}
 
 	/* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */
-	else if ((insn & 0x0f000090) == 0x00000090) {
+	else if ((insn & 0x0f0000f0) == 0x00000090) {
 
 		/* MUL    : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx :   */
 		/* MULS   : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* MLA    : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx :   */
 		/* MLAS   : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* UMAAL  : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx :   */
+		/* undef  : cccc 0000 0101 xxxx xxxx xxxx 1001 xxxx :   */
+		/* MLS    : cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx :   */
+		/* undef  : cccc 0000 0111 xxxx xxxx xxxx 1001 xxxx :   */
 		/* UMULL  : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx :   */
 		/* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* UMLAL  : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx :   */
@@ -1117,9 +1120,11 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* SMLAL  : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx :   */
 		/* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */
-		if ((insn & 0x0fe000f0) == 0x00000090) {
+		if ((insn & 0x00d00000) == 0x00500000) {
+			return INSN_REJECTED;
+		} else if ((insn & 0x00e00000) == 0x00000000) {
 		       return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		} else if  ((insn & 0x0fe000f0) == 0x00200090) {
+		} else if ((insn & 0x00a00000) == 0x00200000) {
 		       return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
 		} else {
 		       return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
-- 
cgit v1.2.3


From ec58d7f2373cec47f30b773d40a89f18bf6fc489 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Fri, 8 Apr 2011 15:32:53 +0100
Subject: ARM: kprobes: Reject probing of STREX and LDREX instructions

The emulation code for STREX and LDREX instructions is faulty, however,
rather than attempting to fix this we reject probes of these
instructions. We do this because they can never succeed in gaining
exclusive access as the exception framework clears the exclusivity
monitor when a probes breakpoint is hit. (This is a general problem
when probing all instructions executing between a LDREX and its
corresponding STREX and can lead to infinite retry loops.)

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 8f3f03e46ace..c24e21ec427f 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1136,17 +1136,34 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		/* SWP   : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */
 		/* SWPB  : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */
-		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
-		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
+		/* ???   : cccc 0001 0x01 xxxx xxxx xxxx 1001 xxxx */
+		/* ???   : cccc 0001 0x10 xxxx xxxx xxxx 1001 xxxx */
+		/* ???   : cccc 0001 0x11 xxxx xxxx xxxx 1001 xxxx */
 		/* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */
 		/* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXD: cccc 0001 1010 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXD: cccc 0001 1011 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXB: cccc 0001 1100 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXB: cccc 0001 1101 xxxx xxxx xxxx 1001 xxxx */
+		/* STREXH: cccc 0001 1110 xxxx xxxx xxxx 1001 xxxx */
+		/* LDREXH: cccc 0001 1111 xxxx xxxx xxxx 1001 xxxx */
+
+		/* LDRD  : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */
+		/* STRD  : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */
 		/* LDRH  : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */
 		/* STRH  : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */
 		/* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */
 		/* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */
-		if ((insn & 0x0fb000f0) == 0x01000090) {
-			/* SWP/SWPB */
-			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		if ((insn & 0x0f0000f0) == 0x01000090) {
+			if ((insn & 0x0fb000f0) == 0x01000090) {
+				/* SWP/SWPB */
+				return prep_emulate_rd12rn16rm0_wflags(insn,
+									asi);
+			} else {
+				/* STREX/LDREX variants and unallocaed space */
+				return INSN_REJECTED;
+			}
+
 		} else if ((insn & 0x0e1000d0) == 0x00000d0) {
 			/* STRD/LDRD */
 			insn &= 0xfff00fff;
-- 
cgit v1.2.3


From 6823fc85fcfba11675f2027aadf2d5291c6f351b Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Fri, 8 Apr 2011 15:32:54 +0100
Subject: ARM: kprobes: Fix emulation of LDRH, STRH, LDRSB and LDRSH
 instructions

The decoding of these instructions got the register indexed and
immediate indexed forms the wrong way around, causing incorrect
emulation.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index c24e21ec427f..348ff47acd06 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -883,11 +883,12 @@ emulate_alu_tests(struct kprobe *p, struct pt_regs *regs)
 static enum kprobe_insn __kprobes
 prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	int ibit = (insn & (1 << 26)) ? 25 : 22;
+	int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25))
+					 : (~insn & (1 << 22));
 
 	insn &= 0xfff00fff;
 	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	if (insn & (1 << ibit)) {
+	if (not_imm) {
 		insn &= ~0xf;
 		insn |= 2;	/* Rm = r2 */
 	}
-- 
cgit v1.2.3


From 54823accfcfc715e9e757a621afb40dabc01d033 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Fri, 8 Apr 2011 15:32:55 +0100
Subject: ARM: kprobes: Reject probing of LDR/STR instructions which update PC
 unpredictably

Using PC as an base register with writeback is UNPREDICTABLE, as is non
word-sized loads or stores of PC. (We only really care about preventing
loads to PC but it keeps the code simpler if we also exclude stores.)

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 348ff47acd06..a4dba1f7c87b 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -70,6 +70,12 @@
 
 #define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos))
 
+/*
+ * Test if load/store instructions writeback the address register.
+ * if P (bit 24) == 0 or W (bit 21) == 1
+ */
+#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000)
+
 #define PSR_fs	(PSR_f|PSR_s)
 
 #define KPROBE_RETURN_INSTRUCTION	0xe1a0f00e	/* mov pc, lr */
@@ -886,6 +892,9 @@ prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25))
 					 : (~insn & (1 << 22));
 
+	if (is_writeback(insn) && is_r15(insn, 16))
+		return INSN_REJECTED;	/* Writeback to PC */
+
 	insn &= 0xfff00fff;
 	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
 	if (not_imm) {
@@ -1167,6 +1176,11 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		} else if ((insn & 0x0e1000d0) == 0x00000d0) {
 			/* STRD/LDRD */
+			if ((insn & 0x0000e000) == 0x0000e000)
+				return INSN_REJECTED;	/* Rd is LR or PC */
+			if (is_writeback(insn) && is_r15(insn, 16))
+				return INSN_REJECTED;	/* Writeback to PC */
+
 			insn &= 0xfff00fff;
 			insn |= 0x00002000;	/* Rn = r0, Rd = r2 */
 			if (insn & (1 << 22)) {
@@ -1180,6 +1194,9 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 			return INSN_GOOD;
 		}
 
+		/* LDRH/STRH/LDRSB/LDRSH */
+		if (is_r15(insn, 12))
+			return INSN_REJECTED;	/* Rd is PC */
 		return prep_emulate_ldr_str(insn, asi);
 	}
 
-- 
cgit v1.2.3


From 5c6b76fc7d8220e8f00e7a49fb56ca852d7fb661 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Fri, 8 Apr 2011 15:32:56 +0100
Subject: ARM: kprobes: Fix emulation of LDRD and STRD instructions

The decoding of these instructions got the register indexed and
immediate indexed forms the wrong way around, causing incorrect
emulation.

Instructions like "LDRD Rx, [Rx]" were corrupting Rx because the base
register writeback was being performed unconditionally, overwriting the
value just loaded from memory. The fix is to only writeback the base
register when that form of the instruction is used. Note, now that we
reject probing writeback with PC the emulation code doesn't need the
check rn!=15.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index a4dba1f7c87b..826abc16e670 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -560,8 +560,8 @@ static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 		  [i_fn] "r" (i_fn)
 		: "r0", "r1", "r2", "r3", "lr", "cc"
 	);
-	if (rn != 15)
-		regs->uregs[rn] = rnv;  /* Save Rn in case of writeback. */
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv;
 }
 
 static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs)
@@ -580,8 +580,8 @@ static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs)
 	rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd],
 					       regs->uregs[rd+1],
 					       regs->ARM_cpsr, i_fn);
-	if (rn != 15)
-		regs->uregs[rn] = rnv_wb;  /* Save Rn in case of writeback. */
+	if (is_writeback(insn))
+		regs->uregs[rn] = rnv_wb;
 }
 
 static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs)
@@ -1183,8 +1183,8 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 			insn &= 0xfff00fff;
 			insn |= 0x00002000;	/* Rn = r0, Rd = r2 */
-			if (insn & (1 << 22)) {
-				/* I bit */
+			if (!(insn & (1 << 22))) {
+				/* Register index */
 				insn &= ~0xf;
 				insn |= 1;	/* Rm = r1 */
 			}
-- 
cgit v1.2.3


From 81ff5720b9561b48e3dc640ca15799ba3919f5a6 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 12 Apr 2011 07:45:21 +0100
Subject: ARM: kprobes: Reject probing of LDRB instructions which load PC

These instructions are specified as UNPREDICTABLE.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 826abc16e670..a37745f2abba 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1416,6 +1416,10 @@ space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* STRB  : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */
 	/* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */
 	/* STRT  : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */
+
+	if ((insn & 0x00500000) == 0x00500000 && is_r15(insn, 12))
+		return INSN_REJECTED;	/* LDRB into PC */
+
 	return prep_emulate_ldr_str(insn, asi);
 }
 
-- 
cgit v1.2.3


From 0e384ed164bdc2ad832270e81dbd14a17c143e78 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 12 Apr 2011 07:45:22 +0100
Subject: ARM: kprobes: Add emulation of RBIT instruction

The v6T2 RBIT instruction was accidentally being emulated correctly,
this patch adds correct decoding for the instruction.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index a37745f2abba..b6bbc0b5ecd6 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1320,9 +1320,10 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/* REV    : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */
 	/* REV16  : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */
+	/* RBIT   : cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */
 	/* REVSH  : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */
 	if ((insn & 0x0ff00070) == 0x06b00030 ||
-	    (insn & 0x0ff000f0) == 0x06f000b0)
+	    (insn & 0x0ff00070) == 0x06f00030)
 		return prep_emulate_rd12rm0(insn, asi);
 
 	/* SADD16    : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */
-- 
cgit v1.2.3


From 780b5c1162286cc75ef2dcc0f14215d9f9f06230 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 12 Apr 2011 07:45:23 +0100
Subject: ARM: kprobes: Reject probing of undefined media instructions

The instructions space for media instructions contains some undefined
patterns. We need to reject probing of these because they may in future
become defined and the kprobes code may then emulate them faultily.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index b6bbc0b5ecd6..f0ca7f4bc4cb 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1326,52 +1326,86 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	    (insn & 0x0ff00070) == 0x06f00030)
 		return prep_emulate_rd12rm0(insn, asi);
 
+	/* ???       : cccc 0110 0000 xxxx xxxx xxxx xxx1 xxxx :   */
 	/* SADD16    : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */
 	/* SADDSUBX  : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */
 	/* SSUBADDX  : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */
 	/* SSUB16    : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */
 	/* SADD8     : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */
+	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0001 xxxx xxxx xxxx 1101 xxxx :   */
 	/* SSUB8     : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */
 	/* QADD16    : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx :   */
 	/* QADDSUBX  : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx :   */
 	/* QSUBADDX  : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx :   */
 	/* QSUB16    : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx :   */
 	/* QADD8     : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0010 xxxx xxxx xxxx 1101 xxxx :   */
 	/* QSUB8     : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx :   */
 	/* SHADD16   : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx :   */
 	/* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx :   */
 	/* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx :   */
 	/* SHSUB16   : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SHADD8    : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0011 xxxx xxxx xxxx 1101 xxxx :   */
 	/* SHSUB8    : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx :   */
+	/* ???       : cccc 0110 0100 xxxx xxxx xxxx xxx1 xxxx :   */
 	/* UADD16    : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */
 	/* UADDSUBX  : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */
 	/* USUBADDX  : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */
 	/* USUB16    : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */
 	/* UADD8     : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */
+	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0101 xxxx xxxx xxxx 1101 xxxx :   */
 	/* USUB8     : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */
 	/* UQADD16   : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx :   */
 	/* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx :   */
 	/* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx :   */
 	/* UQSUB16   : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UQADD8    : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0110 xxxx xxxx xxxx 1101 xxxx :   */
 	/* UQSUB8    : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx :   */
 	/* UHADD16   : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx :   */
 	/* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx :   */
 	/* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx :   */
 	/* UHSUB16   : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UHADD8    : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx :   */
+	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1011 xxxx :   */
+	/* ???       : cccc 0110 0111 xxxx xxxx xxxx 1101 xxxx :   */
 	/* UHSUB8    : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx :   */
+	if ((insn & 0x0f800010) == 0x06000010) {
+		if ((insn & 0x00300000) == 0x00000000 ||
+		    (insn & 0x000000e0) == 0x000000a0 ||
+		    (insn & 0x000000e0) == 0x000000c0)
+			return INSN_REJECTED;	/* Unallocated space */
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	}
+
 	/* PKHBT     : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx :   */
 	/* PKHTB     : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx :   */
+	if ((insn & 0x0ff00030) == 0x06800010)
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
 	/* SXTAB16   : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SXTB      : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* ???       : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SXTAB     : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SXTAH     : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UXTAB16   : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx :   */
+	/* ???       : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UXTAB     : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UXTAH     : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx :   */
-	return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	if ((insn & 0x0f8000f0) == 0x06800070) {
+		if ((insn & 0x00300000) == 0x00100000)
+			return INSN_REJECTED;	/* Unallocated space */
+		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+	}
+
+	/* Other instruction encodings aren't yet defined */
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
-- 
cgit v1.2.3


From 8dd7cfbed83c74b1fb991fae264944e041e22e62 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 12 Apr 2011 07:45:24 +0100
Subject: ARM: kprobes: Fix emulation of SXTB16, SXTB, SXTH, UXTB16, UXTB and
 UXTH instructions

These sign extension instructions are encoded as extend-and-add
instructions where the register to add is specified as r15. The decoding
routines weren't checking for this and were using the incorrect
emulation code, giving incorrect results.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index f0ca7f4bc4cb..1e413a9c17ef 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1390,18 +1390,28 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
 
 	/* SXTAB16   : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx :   */
-	/* SXTB      : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTB16    : cccc 0110 1000 1111 xxxx xxxx 0111 xxxx :   */
 	/* ???       : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx :   */
 	/* SXTAB     : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTB      : cccc 0110 1010 1111 xxxx xxxx 0111 xxxx :   */
 	/* SXTAH     : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx :   */
+	/* SXTH      : cccc 0110 1011 1111 xxxx xxxx 0111 xxxx :   */
 	/* UXTAB16   : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTB16    : cccc 0110 1100 1111 xxxx xxxx 0111 xxxx :   */
 	/* ???       : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx :   */
 	/* UXTAB     : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTB      : cccc 0110 1110 1111 xxxx xxxx 0111 xxxx :   */
 	/* UXTAH     : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx :   */
+	/* UXTH      : cccc 0110 1111 1111 xxxx xxxx 0111 xxxx :   */
 	if ((insn & 0x0f8000f0) == 0x06800070) {
 		if ((insn & 0x00300000) == 0x00100000)
 			return INSN_REJECTED;	/* Unallocated space */
-		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
+		if ((insn & 0x000f0000) == 0x000f0000) {
+			return prep_emulate_rd12rm0(insn, asi);
+		} else {
+			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		}
 	}
 
 	/* Other instruction encodings aren't yet defined */
-- 
cgit v1.2.3


From 038c3839c917e3eea1150a1dc55607b9bde2d5ac Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 12 Apr 2011 07:45:25 +0100
Subject: ARM: kprobes: Fix emulation of SMUAD, SMUSD and SMMUL instructions

The signed multiply instructions were being decoded incorrectly.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 1e413a9c17ef..068e5c846571 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1436,18 +1436,26 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
 
 	/* SMLAD  : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */
+	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
 	/* SMLSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */
+	/* SMUSD  : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx :  */
 	/* SMMLA  : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx :  */
-	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
+	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
 	if ((insn & 0x0ff00090) == 0x07000010 ||
-	    (insn & 0x0ff000d0) == 0x07500010 ||
-	    (insn & 0x0ff000d0) == 0x075000d0)
+	    (insn & 0x0ff000d0) == 0x07500010) {
+
+		if ((insn & 0x0000f000) == 0x0000f000) {
+			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+		} else {
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		}
+	}
+
+	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
+	if ((insn & 0x0ff000d0) == 0x075000d0)
 		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
 
-	/* SMUSD  : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :  */
-	/* SMUAD  : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */
-	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
-	return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
-- 
cgit v1.2.3


From c6e4ae32911c2a0ad02d47ee59ccba352a74c38e Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 12 Apr 2011 07:45:26 +0100
Subject: ARM: kprobes: Fix emulation of USAD8 instructions

The USAD8 instruction wasn't being explicitly decoded leading
to the incorrect emulation routine being called. It can be correctly
decoded in the same way as the signed multiply instructions so we move
the decoding there.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 068e5c846571..8391dac41855 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1425,11 +1425,6 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	if ((insn & 0x0ff000f0) == 0x03f000f0)
 		return INSN_REJECTED;
 
-	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */
-	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */
-	if ((insn & 0x0ff000f0) == 0x07800010)
-		 return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-
 	/* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */
 	/* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */
 	if ((insn & 0x0ff00090) == 0x07400010)
@@ -1441,8 +1436,11 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* SMUSD  : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx :  */
 	/* SMMLA  : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx :  */
 	/* SMMUL  : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx :  */
+	/* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx :  */
+	/* USAD8  : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx :  */
 	if ((insn & 0x0ff00090) == 0x07000010 ||
-	    (insn & 0x0ff000d0) == 0x07500010) {
+	    (insn & 0x0ff000d0) == 0x07500010 ||
+	    (insn & 0x0ff000f0) == 0x07800010) {
 
 		if ((insn & 0x0000f000) == 0x0000f000) {
 			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-- 
cgit v1.2.3


From fa1a03b429b3fd5f28e7fdd20ce99ca572bfd236 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Mon, 18 Apr 2011 08:53:54 +0100
Subject: ARM: kprobes: Reject probing of all coprocessor instructions

The kernel doesn't currently support VFP or Neon code, and probing of
code with CP15 operations is fraught with bad consequences. Therefore we
don't need the ability to probe coprocessor instructions and the code to
support this can be removed.

The removed code also had at least two bugs:
 - MRC into R15 should set CPSR not trash PC
 - LDC and STC which use PC as base register needed the address offset by 8

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 113 ++++-----------------------------------
 1 file changed, 11 insertions(+), 102 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 8391dac41855..ff3970741653 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -520,17 +520,6 @@ static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs)
 	regs->uregs[12] = regs->uregs[13];
 }
 
-static void __kprobes emulate_ldcstc(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-
-	/* Save Rn in case of writeback. */
-	regs->uregs[rn] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
@@ -635,31 +624,6 @@ static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs)
 		regs->uregs[rn] = rnv_wb;  /* Save Rn in case of writeback. */
 }
 
-static void __kprobes emulate_mrrc(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_llret_0arg_fn_t *i_fn = (insn_llret_0arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	union reg_pair fnr;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-
-	fnr.dr = insnslot_llret_0arg_rflags(regs->ARM_cpsr, i_fn);
-	regs->uregs[rn] = fnr.r0;
-	regs->uregs[rd] = fnr.r1;
-}
-
-static void __kprobes emulate_mcrr(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-	long rdv = regs->uregs[rd];
-
-	insnslot_2arg_rflags(rnv, rdv, regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
@@ -693,24 +657,6 @@ static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs)
 	insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_rd12(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rd = (insn >> 12) & 0xf;
-
-	regs->uregs[rd] = insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
-}
-
-static void __kprobes emulate_ird12(struct kprobe *p, struct pt_regs *regs)
-{
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int ird = (insn >> 12) & 0xf;
-
-	insnslot_1arg_rflags(regs->uregs[ird], regs->ARM_cpsr, i_fn);
-}
-
 static void __kprobes emulate_rn16(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
@@ -1010,40 +956,22 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	}
 
 	/* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
-	/* CDP2   : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0xffff00f0) == 0xf1010000 ||
-	    (insn & 0xff000010) == 0xfe000000) {
+	if ((insn & 0xffff00f0) == 0xf1010000) {
 		asi->insn[0] = insn;
 		asi->insn_handler = emulate_none;
 		return INSN_GOOD;
 	}
 
+	/* Coprocessor instructions... */
 	/* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
 	/* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-	if ((insn & 0xffe00000) == 0xfc400000) {
-		insn &= 0xfff00fff;	/* Rn = r0 */
-		insn |= 0x00001000;	/* Rd = r1 */
-		asi->insn[0] = insn;
-		asi->insn_handler =
-			(insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr;
-		return INSN_GOOD;
-	}
+	/* LDC2  : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC2  : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP2  : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR2  : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC2  : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
 
-	/* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0xfe000000) == 0xfc000000) {
-		insn &= 0xfff0ffff;      /* Rn = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_ldcstc;
-		return INSN_GOOD;
-	}
-
-	/* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0]      = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12;
-	return INSN_GOOD;
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1504,14 +1432,7 @@ space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
 	/* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
-	if (is_r15(insn, 16) || is_r15(insn, 12))
-		return INSN_REJECTED;	/* Rn or Rd is PC */
-
-	insn &= 0xfff00fff;
-	insn |= 0x00001000;	/* Rn = r0, Rd = r1 */
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr;
-	return INSN_GOOD;
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1519,10 +1440,7 @@ space_cccc_110x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
 	/* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
 	/* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	insn &= 0xfff0ffff;	/* Rn = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = emulate_ldcstc;
-	return INSN_GOOD;
+	return INSN_REJECTED;
 }
 
 static enum kprobe_insn __kprobes
@@ -1535,18 +1453,9 @@ space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		return INSN_REJECTED;
 
 	/* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	if ((insn & 0x0f000010) == 0x0e000000) {
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_none;
-		return INSN_GOOD;
-	}
-
 	/* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
 	/* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
-	insn &= 0xffff0fff;	/* Rd = r0 */
-	asi->insn[0] = insn;
-	asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12;
-	return INSN_GOOD;
+	return INSN_REJECTED;
 }
 
 static unsigned long __kprobes __check_eq(unsigned long cpsr)
-- 
cgit v1.2.3


From ac211c6994fb5f1f282745054c00d29e53639cb1 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Mon, 18 Apr 2011 08:53:55 +0100
Subject: ARM: kprobes: Consolidate stub decoding functions

Following the change to remove support for coprocessor instructions
we are left with three stub functions which can be consolidated.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 40 +++++++++-------------------------------
 1 file changed, 9 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index ff3970741653..fb818976f4cb 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1428,33 +1428,19 @@ space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 }
 
 static enum kprobe_insn __kprobes
-space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+space_cccc_11xx(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	/* Coprocessor instructions... */
 	/* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
 	/* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */
-	return INSN_REJECTED;
-}
+	/* LDC  : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
+	/* STC  : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
+	/* CDP  : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
+	/* MCR  : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
+	/* MRC  : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
 
-static enum kprobe_insn __kprobes
-space_cccc_110x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */
-	/* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */
-	return INSN_REJECTED;
-}
+	/* SVC  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
 
-static enum kprobe_insn __kprobes
-space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
-{
-	/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-	/* SWI  : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */
-	if ((insn & 0xfff000f0) == 0xe1200070 ||
-	    (insn & 0x0f000000) == 0x0f000000)
-		return INSN_REJECTED;
-
-	/* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */
-	/* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */
-	/* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */
 	return INSN_REJECTED;
 }
 
@@ -1598,17 +1584,9 @@ arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		return space_cccc_101x(insn, asi);
 
-	} else if ((insn & 0x0fe00000) == 0x0c400000) {
-
-		return space_cccc_1100_010x(insn, asi);
-
-	} else if ((insn & 0x0e000000) == 0x0c000000) {
-
-		return space_cccc_110x(insn, asi);
-
 	}
 
-	return space_cccc_111x(insn, asi);
+	return space_cccc_11xx(insn, asi);
 }
 
 void __init arm_kprobe_decode_init(void)
-- 
cgit v1.2.3


From f0aeb8bff0fe9de50e1e4093ef86ff8f17a9b1b0 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Mon, 18 Apr 2011 08:53:56 +0100
Subject: ARM: kprobes: Reject probing of SETEND instructions

The emulation of SETEND was broken as it changed the endianess for
the running kprobes handling code. Rather than adding a new simulation
routine to fix this we'll just reject probing of SETEND as these should
be very rare in the kernel.

Note, the function emulate_none is now unused but it is left in the
source code as future patches will use it.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index fb818976f4cb..b81fbfb33151 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -956,11 +956,6 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	}
 
 	/* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
-	if ((insn & 0xffff00f0) == 0xf1010000) {
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_none;
-		return INSN_GOOD;
-	}
 
 	/* Coprocessor instructions... */
 	/* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-- 
cgit v1.2.3


From 41713d1396312a027ec02abc4767041627c178ef Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Mon, 18 Apr 2011 08:53:57 +0100
Subject: ARM: kprobes: Fix emulation of PLD instructions

The PLD instructions wasn't being decoded correctly and the emulation
code wasn't adjusting PC correctly.

As the PLD instruction is only a performance hint we emulate it as a
simple nop, and we can broaden the instruction decoding to take into
account newer PLI and PLDW instructions.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index b81fbfb33151..bddf8d0f3dc1 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -657,14 +657,8 @@ static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs)
 	insnslot_0arg_rflags(regs->ARM_cpsr, i_fn);
 }
 
-static void __kprobes emulate_rn16(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs)
 {
-	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
-	kprobe_opcode_t insn = p->opcode;
-	int rn = (insn >> 16) & 0xf;
-	long rnv = regs->uregs[rn];
-
-	insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn);
 }
 
 static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs)
@@ -941,12 +935,13 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	    (insn & 0xfe5f0f00) == 0xf84d0500)
 		return INSN_REJECTED;
 
-	/* PLD : 1111 01x1 x101 xxxx xxxx xxxx xxxx xxxx : */
-	if ((insn & 0xfd700000) == 0xf4500000) {
-		insn &= 0xfff0ffff;	/* Rn = r0 */
-		asi->insn[0] = insn;
-		asi->insn_handler = emulate_rn16;
-		return INSN_GOOD;
+	/* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLDI        : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLDW        : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */
+	/* PLD         : 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx : */
+	if ((insn & 0xfe300000) == 0xf4100000) {
+		asi->insn_handler = emulate_nop;
+		return INSN_GOOD_NO_SLOT;
 	}
 
 	/* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */
-- 
cgit v1.2.3


From 72c2bab2be734d63dee4342e67b1754c54fded70 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Mon, 18 Apr 2011 08:53:58 +0100
Subject: ARM: kprobes: Remove redundant code in space_1111

The tests to explicitly reject probing CPS, RFE and SRS instructions
are redundant as the default case is now to reject undecoded patterns.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index bddf8d0f3dc1..dc315c12d5f3 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -927,14 +927,6 @@ prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn,
 static enum kprobe_insn __kprobes
 space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
-	/* CPS mmod == 1 : 1111 0001 0000 xx10 xxxx xxxx xx0x xxxx */
-	/* RFE           : 1111 100x x0x1 xxxx xxxx 1010 xxxx xxxx */
-	/* SRS           : 1111 100x x1x0 1101 xxxx 0101 xxxx xxxx */
-	if ((insn & 0xfff30020) == 0xf1020000 ||
-	    (insn & 0xfe500f00) == 0xf8100a00 ||
-	    (insn & 0xfe5f0f00) == 0xf84d0500)
-		return INSN_REJECTED;
-
 	/* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */
 	/* PLDI        : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */
 	/* PLDW        : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */
@@ -950,7 +942,11 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		return INSN_GOOD_NO_SLOT;
 	}
 
-	/* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
+	/* CPS   : 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */
+	/* SETEND: 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */
+
+	/* SRS   : 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */
+	/* RFE   : 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */
 
 	/* Coprocessor instructions... */
 	/* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */
-- 
cgit v1.2.3


From f704a6e25bd07e1381af81f19fb1d123975807b1 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 19 Apr 2011 10:52:16 +0100
Subject: ARM: kprobes: Reject probing of undefined data processing
 instructions

The instruction decoding in space_cccc_000x needs to reject probing of
instructions with undefined patterns as they may in future become
defined and then emulated faultily - as has already happened with the
SMC instruction.

This fix is achieved by testing for the instruction patterns we want to
probe and making the the default fall-through paths reject probes. This
also allows us to remove some explicit tests for instructions that we
wish to reject, as that is now the default action.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index dc315c12d5f3..4715537b490c 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -966,14 +966,6 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */
 	if ((insn & 0x0f900010) == 0x01000000) {
 
-		/* BXJ      : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
-		/* MSR      : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
-		/* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
-		if ((insn & 0x0ff000f0) == 0x01200020 ||
-		    (insn & 0x0fb000f0) == 0x01200000 ||
-		    (insn & 0x0ff000f0) == 0x01400000)
-			return INSN_REJECTED;
-
 		/* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */
 		if ((insn & 0x0ff000f0) == 0x01000000) {
 			if (is_r15(insn, 12))
@@ -994,17 +986,21 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		/* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */
 		/* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */
-		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		if ((insn & 0x0ff00090) == 0x01000080 ||
+		    (insn & 0x0ff000b0) == 0x01200080)
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+
+		/* BXJ      : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */
+		/* MSR      : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */
+		/* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */
 
+		/* Other instruction encodings aren't yet defined */
+		return INSN_REJECTED;
 	}
 
 	/* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */
 	else if ((insn & 0x0f900090) == 0x01000010) {
 
-		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
-		if ((insn & 0xfff000f0) == 0xe1200070)
-			return INSN_REJECTED;
-
 		/* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */
 		/* BX     : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */
 		if ((insn & 0x0ff000d0) == 0x01200010) {
@@ -1022,7 +1018,14 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* QSUB    : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */
 		/* QDADD   : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */
 		/* QDSUB   : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */
-		return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+		if ((insn & 0x0f9000f0) == 0x01000050)
+			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
+
+		/* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */
+		/* SMC  : cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */
+
+		/* Other instruction encodings aren't yet defined */
+		return INSN_REJECTED;
 	}
 
 	/* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */
-- 
cgit v1.2.3


From c9836777d5f22e64eefc759d682511560b6d6d78 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 19 Apr 2011 10:52:17 +0100
Subject: ARM: kprobes: Add emulation of MOVW and MOVT instructions

The MOVW and MOVT instructions account for approximately 7% of all
instructions in a ARMv7 kernel as GCC uses them instead of a literal
pool.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 4715537b490c..a063f152d9b3 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -661,6 +661,17 @@ static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs)
 {
 }
 
+static void __kprobes
+emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	long rdv = regs->uregs[rd];
+
+	regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn);
+}
+
 static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
@@ -846,6 +857,18 @@ prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	return INSN_GOOD;
 }
 
+static enum kprobe_insn __kprobes
+prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0fff;	/* Rd = r0 */
+	asi->insn[0] = insn;
+	asi->insn_handler = emulate_rd12_modify;
+	return INSN_GOOD;
+}
+
 static enum kprobe_insn __kprobes
 prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
@@ -1170,14 +1193,17 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 static enum kprobe_insn __kprobes
 space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
+	/* MOVW  : cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */
+	/* MOVT  : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */
+	if ((insn & 0x0fb00000) == 0x03000000)
+		return prep_emulate_rd12_modify(insn, asi);
+
 	/*
 	 * MSR   : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx
-	 * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx
 	 * ALU op with S bit and Rd == 15 :
 	 *	   cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx
 	 */
 	if ((insn & 0x0fb00000) == 0x03200000 ||	/* MSR */
-	    (insn & 0x0ff00000) == 0x03400000 ||	/* Undef */
 	    (insn & 0x0e10f000) == 0x0210f000)		/* ALU s-bit, R15  */
 		return INSN_REJECTED;
 
-- 
cgit v1.2.3


From 20e8155e24ba5c04558780d0a8da13c39f98c002 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 19 Apr 2011 10:52:18 +0100
Subject: ARM: kprobes: Add emulation of SBFX, UBFX, BFI and BFC instructions

These bit field manipulation instructions occur several thousand
times in an ARMv7 kernel.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 42 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index a063f152d9b3..6bed8b089af0 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -672,6 +672,19 @@ emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs)
 	regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn);
 }
 
+static void __kprobes
+emulate_rd12rn0_modify(struct kprobe *p, struct pt_regs *regs)
+{
+	insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0];
+	kprobe_opcode_t insn = p->opcode;
+	int rd = (insn >> 12) & 0xf;
+	int rn = insn & 0xf;
+	long rdv = regs->uregs[rd];
+	long rnv = regs->uregs[rn];
+
+	regs->uregs[rd] = insnslot_2arg_rflags(rdv, rnv, regs->ARM_cpsr, i_fn);
+}
+
 static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs)
 {
 	insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0];
@@ -869,6 +882,20 @@ prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	return INSN_GOOD;
 }
 
+static enum kprobe_insn __kprobes
+prep_emulate_rd12rn0_modify(kprobe_opcode_t insn,
+			    struct arch_specific_insn *asi)
+{
+	if (is_r15(insn, 12))
+		return INSN_REJECTED;	/* Rd is PC */
+
+	insn &= 0xffff0ff0;	/* Rd = r0 */
+	insn |= 0x00000001;	/* Rn = r1 */
+	asi->insn[0] = insn;
+	asi->insn_handler = emulate_rd12rn0_modify;
+	return INSN_GOOD;
+}
+
 static enum kprobe_insn __kprobes
 prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 {
@@ -1396,6 +1423,21 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	if ((insn & 0x0ff000d0) == 0x075000d0)
 		return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
 
+	/* SBFX   : cccc 0111 101x xxxx xxxx xxxx x101 xxxx :  */
+	/* UBFX   : cccc 0111 111x xxxx xxxx xxxx x101 xxxx :  */
+	if ((insn & 0x0fa00070) == 0x07a00050)
+		return prep_emulate_rd12rm0(insn, asi);
+
+	/* BFI    : cccc 0111 110x xxxx xxxx xxxx x001 xxxx :  */
+	/* BFC    : cccc 0111 110x xxxx xxxx xxxx x001 1111 :  */
+	if ((insn & 0x0fe00070) == 0x07c00010) {
+
+		if ((insn & 0x0000000f) == 0x0000000f)
+			return prep_emulate_rd12_modify(insn, asi);
+		else
+			return prep_emulate_rd12rn0_modify(insn, asi);
+	}
+
 	return INSN_REJECTED;
 }
 
-- 
cgit v1.2.3


From 94254930786216106100e9a28c9a244df58be61f Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 19 Apr 2011 10:52:19 +0100
Subject: ARM: kprobes: Add emulation of hint instructions like NOP and WFI

Being able to probe NOP instructions is useful for hard-coding probeable
locations and is used by the kprobes test code.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 6bed8b089af0..03b85ad525ab 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1225,6 +1225,30 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	if ((insn & 0x0fb00000) == 0x03000000)
 		return prep_emulate_rd12_modify(insn, asi);
 
+	/* hints : cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */
+	if ((insn & 0x0fff0000) == 0x03200000) {
+		unsigned op2 = insn & 0x000000ff;
+		if (op2 == 0x01 || op2 == 0x04) {
+			/* YIELD : cccc 0011 0010 0000 xxxx xxxx 0000 0001 */
+			/* SEV   : cccc 0011 0010 0000 xxxx xxxx 0000 0100 */
+			asi->insn[0] = insn;
+			asi->insn_handler = emulate_none;
+			return INSN_GOOD;
+		} else if (op2 <= 0x03) {
+			/* NOP   : cccc 0011 0010 0000 xxxx xxxx 0000 0000 */
+			/* WFE   : cccc 0011 0010 0000 xxxx xxxx 0000 0010 */
+			/* WFI   : cccc 0011 0010 0000 xxxx xxxx 0000 0011 */
+			/*
+			 * We make WFE and WFI true NOPs to avoid stalls due
+			 * to missing events whilst processing the probe.
+			 */
+			asi->insn_handler = emulate_nop;
+			return INSN_GOOD_NO_SLOT;
+		}
+		/* For DBG and unallocated hints it's safest to reject them */
+		return INSN_REJECTED;
+	}
+
 	/*
 	 * MSR   : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx
 	 * ALU op with S bit and Rd == 15 :
-- 
cgit v1.2.3


From cdc253611582f08036ccb6e10efe95b8e760a7e2 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@yxit.co.uk>
Date: Tue, 19 Apr 2011 10:52:20 +0100
Subject: ARM: kprobes: Tidy-up kprobes-decode.c

- Remove coding standard violations reported by checkpatch.pl
- Delete comment about handling of conditional branches which is no
  longer true.
- Delete comment at end of file which lists all ARM instructions. This
  duplicates data available in the ARM ARM and seems like an
  unnecessary maintenance burden to keep this up to date and accurate.

Signed-off-by: Jon Medhurst <tixy@yxit.co.uk>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/kernel/kprobes-decode.c | 131 +++++++--------------------------------
 1 file changed, 23 insertions(+), 108 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 03b85ad525ab..15eeff6aea0e 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -34,9 +34,6 @@
  *
  *   *) If the PC is written to by the instruction, the
  *      instruction must be fully simulated in software.
- *      If it is a conditional instruction, the handler
- *      will use insn[0] to copy its condition code to
- *	set r0 to 1 and insn[1] to "mov pc, lr" to return.
  *
  *   *) Otherwise, a modified form of the instruction is
  *      directly executed.  Its handler calls the
@@ -1026,7 +1023,8 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 		/* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */
 		if ((insn & 0x0ff00090) == 0x01400080)
-			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
+			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
+									asi);
 
 		/* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */
 		/* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */
@@ -1097,15 +1095,15 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		/* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */
 		/* SMLAL  : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx :   */
 		/* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */
-		if ((insn & 0x00d00000) == 0x00500000) {
+		if ((insn & 0x00d00000) == 0x00500000)
 			return INSN_REJECTED;
-		} else if ((insn & 0x00e00000) == 0x00000000) {
-		       return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		} else if ((insn & 0x00a00000) == 0x00200000) {
-		       return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-		} else {
-		       return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi);
-		}
+		else if ((insn & 0x00e00000) == 0x00000000)
+			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
+		else if ((insn & 0x00a00000) == 0x00200000)
+			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
+		else
+			return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn,
+									asi);
 	}
 
 	/* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */
@@ -1171,7 +1169,7 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 
 	/*
 	 * ALU op with S bit and Rd == 15 :
-	 * 	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
+	 *	cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx
 	 */
 	if ((insn & 0x0e10f000) == 0x0010f000)
 		return INSN_REJECTED;
@@ -1401,11 +1399,10 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 		if ((insn & 0x00300000) == 0x00100000)
 			return INSN_REJECTED;	/* Unallocated space */
 
-		if ((insn & 0x000f0000) == 0x000f0000) {
+		if ((insn & 0x000f0000) == 0x000f0000)
 			return prep_emulate_rd12rm0(insn, asi);
-		} else {
+		else
 			return prep_emulate_rd12rn16rm0_wflags(insn, asi);
-		}
 	}
 
 	/* Other instruction encodings aren't yet defined */
@@ -1436,11 +1433,10 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	    (insn & 0x0ff000d0) == 0x07500010 ||
 	    (insn & 0x0ff000f0) == 0x07800010) {
 
-		if ((insn & 0x0000f000) == 0x0000f000) {
+		if ((insn & 0x0000f000) == 0x0000f000)
 			return prep_emulate_rd16rs8rm0_wflags(insn, asi);
-		} else {
+		else
 			return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi);
-		}
 	}
 
 	/* SMMLS  : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx :  */
@@ -1633,40 +1629,38 @@ arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
 	asi->insn_check_cc = condition_checks[insn>>28];
 	asi->insn[1] = KPROBE_RETURN_INSTRUCTION;
 
-	if ((insn & 0xf0000000) == 0xf0000000) {
+	if ((insn & 0xf0000000) == 0xf0000000)
 
 		return space_1111(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x00000000) {
+	else if ((insn & 0x0e000000) == 0x00000000)
 
 		return space_cccc_000x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x02000000) {
+	else if ((insn & 0x0e000000) == 0x02000000)
 
 		return space_cccc_001x(insn, asi);
 
-	} else if ((insn & 0x0f000010) == 0x06000010) {
+	else if ((insn & 0x0f000010) == 0x06000010)
 
 		return space_cccc_0110__1(insn, asi);
 
-	} else if ((insn & 0x0f000010) == 0x07000010) {
+	else if ((insn & 0x0f000010) == 0x07000010)
 
 		return space_cccc_0111__1(insn, asi);
 
-	} else if ((insn & 0x0c000000) == 0x04000000) {
+	else if ((insn & 0x0c000000) == 0x04000000)
 
 		return space_cccc_01xx(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x08000000) {
+	else if ((insn & 0x0e000000) == 0x08000000)
 
 		return space_cccc_100x(insn, asi);
 
-	} else if ((insn & 0x0e000000) == 0x0a000000) {
+	else if ((insn & 0x0e000000) == 0x0a000000)
 
 		return space_cccc_101x(insn, asi);
 
-	}
-
 	return space_cccc_11xx(insn, asi);
 }
 
@@ -1674,82 +1668,3 @@ void __init arm_kprobe_decode_init(void)
 {
 	find_str_pc_offset();
 }
-
-
-/*
- * All ARM instructions listed below.
- *
- * Instructions and their general purpose registers are given.
- * If a particular register may not use R15, it is prefixed with a "!".
- * If marked with a "*" means the value returned by reading R15
- * is implementation defined.
- *
- * ADC/ADD/AND/BIC/CMN/CMP/EOR/MOV/MVN/ORR/RSB/RSC/SBC/SUB/TEQ
- *     TST: Rd, Rn, Rm, !Rs
- * BX: Rm
- * BLX(2): !Rm
- * BX: Rm (R15 legal, but discouraged)
- * BXJ: !Rm,
- * CLZ: !Rd, !Rm
- * CPY: Rd, Rm
- * LDC/2,STC/2 immediate offset & unindex: Rn
- * LDC/2,STC/2 immediate pre/post-indexed: !Rn
- * LDM(1/3): !Rn, register_list
- * LDM(2): !Rn, !register_list
- * LDR,STR,PLD immediate offset: Rd, Rn
- * LDR,STR,PLD register offset: Rd, Rn, !Rm
- * LDR,STR,PLD scaled register offset: Rd, !Rn, !Rm
- * LDR,STR immediate pre/post-indexed: Rd, !Rn
- * LDR,STR register pre/post-indexed: Rd, !Rn, !Rm
- * LDR,STR scaled register pre/post-indexed: Rd, !Rn, !Rm
- * LDRB,STRB immediate offset: !Rd, Rn
- * LDRB,STRB register offset: !Rd, Rn, !Rm
- * LDRB,STRB scaled register offset: !Rd, !Rn, !Rm
- * LDRB,STRB immediate pre/post-indexed: !Rd, !Rn
- * LDRB,STRB register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRB,STRB scaled register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRT,LDRBT,STRBT immediate pre/post-indexed: !Rd, !Rn
- * LDRT,LDRBT,STRBT register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRT,LDRBT,STRBT scaled register pre/post-indexed: !Rd, !Rn, !Rm
- * LDRH/SH/SB/D,STRH/SH/SB/D immediate offset: !Rd, Rn
- * LDRH/SH/SB/D,STRH/SH/SB/D register offset: !Rd, Rn, !Rm
- * LDRH/SH/SB/D,STRH/SH/SB/D immediate pre/post-indexed: !Rd, !Rn
- * LDRH/SH/SB/D,STRH/SH/SB/D register pre/post-indexed: !Rd, !Rn, !Rm
- * LDREX: !Rd, !Rn
- * MCR/2: !Rd
- * MCRR/2,MRRC/2: !Rd, !Rn
- * MLA: !Rd, !Rn, !Rm, !Rs
- * MOV: Rd
- * MRC/2: !Rd (if Rd==15, only changes cond codes, not the register)
- * MRS,MSR: !Rd
- * MUL: !Rd, !Rm, !Rs
- * PKH{BT,TB}: !Rd, !Rn, !Rm
- * QDADD,[U]QADD/16/8/SUBX: !Rd, !Rm, !Rn
- * QDSUB,[U]QSUB/16/8/ADDX: !Rd, !Rm, !Rn
- * REV/16/SH: !Rd, !Rm
- * RFE: !Rn
- * {S,U}[H]ADD{16,8,SUBX},{S,U}[H]SUB{16,8,ADDX}: !Rd, !Rn, !Rm
- * SEL: !Rd, !Rn, !Rm
- * SMLA<x><y>,SMLA{D,W<y>},SMLSD,SMML{A,S}: !Rd, !Rn, !Rm, !Rs
- * SMLAL<x><y>,SMLA{D,LD},SMLSLD,SMMULL,SMULW<y>: !RdHi, !RdLo, !Rm, !Rs
- * SMMUL,SMUAD,SMUL<x><y>,SMUSD: !Rd, !Rm, !Rs
- * SSAT/16: !Rd, !Rm
- * STM(1/2): !Rn, register_list* (R15 in reg list not recommended)
- * STRT immediate pre/post-indexed: Rd*, !Rn
- * STRT register pre/post-indexed: Rd*, !Rn, !Rm
- * STRT scaled register pre/post-indexed: Rd*, !Rn, !Rm
- * STREX: !Rd, !Rn, !Rm
- * SWP/B: !Rd, !Rn, !Rm
- * {S,U}XTA{B,B16,H}: !Rd, !Rn, !Rm
- * {S,U}XT{B,B16,H}: !Rd, !Rm
- * UM{AA,LA,UL}L: !RdHi, !RdLo, !Rm, !Rs
- * USA{D8,A8,T,T16}: !Rd, !Rm, !Rs
- *
- * May transfer control by writing R15 (possible mode changes or alternate
- * mode accesses marked by "*"):
- * ALU op (* with s-bit), B, BL, BKPT, BLX(1/2), BX, BXJ, CPS*, CPY,
- * LDM(1), LDM(2/3)*, LDR, MOV, RFE*, SWI*
- *
- * Instructions that do not take general registers, nor transfer control:
- * CDP/2, SETEND, SRS*
- */
-- 
cgit v1.2.3


From c7a7b814c9dca9ee01b38e63b4a46de87156d3b6 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Thu, 28 Apr 2011 11:00:30 -0600
Subject: ioremap: Delay sanity check until after a successful mapping

While tracking down the reason for an ioremap() failure I was
distracted  by the WARN_ONCE() in __ioremap_caller().

Performing a WARN_ONCE() sanity check before the mapping
is successful seems pointless if the caller sends bad values.

A case in point is when the BIOS provides erroneous screen_info
values causing vesafb_probe() to request an outrageuous size.
The WARN_ONCE is then wasted on bogosity. Move the warning to a
point where the mapping has been successfully allocated.

Addresses:

  http://bugs.launchpad.net/bugs/772042

Reviewed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Link: http://lkml.kernel.org/r/4DB99D2E.9080106@canonical.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0369843511dc..be1ef574ce9a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -90,13 +90,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	if (is_ISA_range(phys_addr, last_addr))
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
-	/*
-	 * Check if the request spans more than any BAR in the iomem resource
-	 * tree.
-	 */
-	WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
-		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
-
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	ret_addr = (void __iomem *) (vaddr + offset);
 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 
+	/*
+	 * Check if the request spans more than any BAR in the iomem resource
+	 * tree.
+	 */
+	WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
+		  KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
+
 	return ret_addr;
 err_free_area:
 	free_vm_area(area);
-- 
cgit v1.2.3


From a9851832857dc1e4efefca1713f5cff3e168a25c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 29 Apr 2011 10:42:19 +0200
Subject: [S390] irqstats: fix counting of pfault, dasd diag and virtio irqs

pfault, dasd diag and virtio all use the same external interrupt number.
The respective interrupt handlers decide by the subcode if they are
meant to handle the interrupt.
Counting is currently done before looking at the subcode which means
each handler counts an interrupt even if it is not handling it.
Fix this by moving the kstat code after the code which looks at the
subcode.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/fault.c           | 2 +-
 drivers/s390/block/dasd_diag.c | 2 +-
 drivers/s390/kvm/kvm_virtio.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4cf85fef407c..ab988135e5c6 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -543,7 +543,6 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	struct task_struct *tsk;
 	__u16 subcode;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 	/*
 	 * Get the external interruption subcode & pfault
 	 * initial/completion signal bit. VM stores this 
@@ -553,6 +552,7 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	subcode = ext_int_code >> 16;
 	if ((subcode & 0xff00) != __SUBCODE_MASK)
 		return;
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 
 	/*
 	 * Get the token (= address of the task structure of the affected task).
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 29143eda9dd9..85dddb1e4126 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -239,7 +239,6 @@ static void dasd_ext_handler(unsigned int ext_int_code,
 	addr_t ip;
 	int rc;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_DSD]++;
 	switch (ext_int_code >> 24) {
 	case DASD_DIAG_CODE_31BIT:
 		ip = (addr_t) param32;
@@ -250,6 +249,7 @@ static void dasd_ext_handler(unsigned int ext_int_code,
 	default:
 		return;
 	}
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_DSD]++;
 	if (!ip) {		/* no intparm: unsolicited interrupt */
 		DBF_EVENT(DBF_NOTICE, "%s", "caught unsolicited "
 			      "interrupt");
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 414427d64a8f..607998f0b7d8 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -381,10 +381,10 @@ static void kvm_extint_handler(unsigned int ext_int_code,
 	u16 subcode;
 	u32 param;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_VRT]++;
 	subcode = ext_int_code >> 16;
 	if ((subcode & 0xff00) != VIRTIO_SUBCODE_64)
 		return;
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_VRT]++;
 
 	/* The LSB might be overloaded, we have to mask it */
 	vq = (struct virtqueue *)(param64 & ~1UL);
-- 
cgit v1.2.3


From 8f62242246351b5a4bc0c1f00c0c7003edea128a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 Apr 2011 13:19:47 +0200
Subject: perf events: Add generic front-end and back-end stalled cycle event
 definitions

Add two generic hardware events: front-end and back-end stalled cycles.

These events measure conditions when the CPU is executing code but its
capabilities are not fully utilized. Understanding such situations and
analyzing them is an important sub-task of code optimization workflows.

Both events limit performance: most front end stalls tend to be caused
by branch misprediction or instruction fetch cachemisses, backend
stalls can be caused by various resource shortages or inefficient
instruction scheduling.

Front-end stalls are the more important ones: code cannot run fast
if the instruction stream is not being kept up.

An over-utilized back-end can cause front-end stalls and thus
has to be kept an eye on as well.

The exact composition is very program logic and instruction mix
dependent.

We use the terms 'stall', 'front-end' and 'back-end' loosely and
try to use the best available events from specific CPUs that
approximate these concepts.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 2 +-
 include/linux/perf_event.h             | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1ea94224f62e..393085b87a2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
 		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
 		if (ebx & 0x40) {
 			/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ac636dd20a0c..4e2d7ae71499 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,7 +52,8 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
-	PERF_COUNT_HW_STALLED_CYCLES		= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
-- 
cgit v1.2.3


From 91fc4cc00099986bc1ba50e1f421c3548cffae42 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 29 Apr 2011 14:17:19 +0200
Subject: perf, x86: Add new stalled cycles events for Intel and AMD CPUs

Extend the Intel and AMD event definitions with generic front-end and
back-end stall events.

( These are only approximations - suggestions are welcome for better events. )

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n001io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_amd.c   | 14 ++++++++------
 arch/x86/kernel/cpu/perf_event_intel.c |  4 +++-
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index cf4e369cea67..fe29c1d2219e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
  */
 static const u64 amd_perfmon_event_map[] =
 {
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
+  [PERF_COUNT_HW_CPU_CYCLES]			= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]			= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]			= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]		= 0x00c2,
+  [PERF_COUNT_HW_BRANCH_MISSES]			= 0x00c3,
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
+  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
 };
 
 static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 393085b87a2c..7983b9a9533b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,7 +1413,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
-		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
 		if (ebx & 0x40) {
-- 
cgit v1.2.3


From 301120396b766ae4480e52ece220516a1707822b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 30 Apr 2011 09:14:54 +0200
Subject: perf events, x86: Add Westmere stalled-cycles-frontend/backend events

Extend the Intel Westmere PMU driver with definitions for generic front-end and
back-end stall events.

( These are only approximations. )

Reported-by: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n008io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7983b9a9533b..be8363adf4e2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1458,6 +1458,12 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_westmere_extra_regs;
+
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+
 		pr_cont("Westmere events, ");
 		break;
 
-- 
cgit v1.2.3


From 0f22072ab50cac7983f9660d33974b45184da4f9 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 29 Apr 2011 15:48:07 +0100
Subject: ARM: 6891/1: prevent heap corruption in OABI semtimedop

When CONFIG_OABI_COMPAT is set, the wrapper for semtimedop does not
bound the nsops argument.  A sufficiently large value will cause an
integer overflow in allocation size, followed by copying too much data
into the allocated buffer.  Fix this by restricting nsops to SEMOPM.
Untested.

Cc: stable@kernel.org
Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/sys_oabi-compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 4ad8da15ef2b..af0aaebf4de6 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -311,7 +311,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 	long err;
 	int i;
 
-	if (nsops < 1)
+	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
 	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
 	if (!sops)
-- 
cgit v1.2.3


From 8ae6daca85c8bbd6a32c382db5e2a2a989f8bed2 Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@google.com>
Date: Wed, 27 Apr 2011 18:32:38 +0000
Subject: ethtool: Call ethtool's get/set_settings callbacks with cleaned data

This makes sure that when a driver calls the ethtool's
get/set_settings() callback of another driver, the data passed to it
is clean. This guarantees that speed_hi will be zeroed correctly if
the called callback doesn't explicitely set it: we are sure we don't
get a corrupted speed from the underlying driver. We also take care of
setting the cmd field appropriately (ETHTOOL_GSET/SSET).

This applies to dev_ethtool_get_settings(), which now makes sure it
sets up that ethtool command parameter correctly before passing it to
drivers. This also means that whoever calls dev_ethtool_get_settings()
does not have to clean the ethtool command parameter. This function
also becomes an exported symbol instead of an inline.

All drivers visible to make allyesconfig under x86_64 have been
updated.

Signed-off-by: David Decotigny <decot@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/txx9/generic/setup_tx4939.c | 21 ++++++++-------------
 drivers/net/e100.c                    |  2 +-
 drivers/net/mdio.c                    |  3 +++
 drivers/net/mii.c                     |  3 +++
 drivers/net/pch_gbe/pch_gbe_main.c    |  6 +++---
 drivers/net/pch_gbe/pch_gbe_phy.c     |  2 +-
 drivers/net/pcnet32.c                 | 16 ++++++++--------
 drivers/net/sfc/mdio_10g.c            |  4 ++--
 drivers/net/stmmac/stmmac_ethtool.c   |  5 ++---
 drivers/net/usb/asix.c                | 28 +++++++++++++++-------------
 drivers/net/usb/dm9601.c              |  6 +++---
 drivers/net/usb/smsc75xx.c            |  7 ++++---
 drivers/net/usb/smsc95xx.c            |  7 ++++---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c     | 11 +++++++----
 drivers/scsi/fcoe/fcoe.c              | 11 +++++++----
 include/linux/ethtool.h               |  4 +++-
 include/linux/netdevice.h             |  9 ++-------
 include/rdma/ib_addr.h                | 13 +++++++------
 net/core/dev.c                        | 24 ++++++++++++++++++++++++
 net/core/net-sysfs.c                  | 24 ++++++++++--------------
 20 files changed, 117 insertions(+), 89 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 3dc19f482959..e9f95dcde379 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -318,19 +318,15 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask)
 }
 
 #if defined(CONFIG_TC35815) || defined(CONFIG_TC35815_MODULE)
-static int tx4939_get_eth_speed(struct net_device *dev)
+static u32 tx4939_get_eth_speed(struct net_device *dev)
 {
-	struct ethtool_cmd cmd = { ETHTOOL_GSET };
-	int speed = 100;	/* default 100Mbps */
-	int err;
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return speed;
-	err = dev->ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return speed;
-	speed = cmd.speed == SPEED_100 ? 100 : 10;
-	return speed;
+	struct ethtool_cmd cmd;
+	if (dev_ethtool_get_settings(dev, &cmd))
+		return 100;	/* default 100Mbps */
+
+	return ethtool_cmd_speed(&cmd);
 }
+
 static int tx4939_netdev_event(struct notifier_block *this,
 			       unsigned long event,
 			       void *ptr)
@@ -343,8 +339,7 @@ static int tx4939_netdev_event(struct notifier_block *this,
 		else if (dev->irq == TXX9_IRQ_BASE + TX4939_IR_ETH(1))
 			bit = TX4939_PCFG_SPEED1;
 		if (bit) {
-			int speed = tx4939_get_eth_speed(dev);
-			if (speed == 100)
+			if (tx4939_get_eth_speed(dev) == 100)
 				txx9_set64(&tx4939_ccfgptr->pcfg, bit);
 			else
 				txx9_clear64(&tx4939_ccfgptr->pcfg, bit);
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index b0aa9e68990a..66ba596a4d37 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1668,7 +1668,7 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
 static void e100_watchdog(unsigned long data)
 {
 	struct nic *nic = (struct nic *)data;
-	struct ethtool_cmd cmd;
+	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 
 	netif_printk(nic, timer, KERN_DEBUG, nic->netdev,
 		     "right now = %ld\n", jiffies);
diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index e85bf04cf813..f2d10abd0403 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -176,6 +176,9 @@ static u32 mdio45_get_an(const struct mdio_if_info *mdio, u16 addr)
  * @npage_adv: Modes currently advertised on next pages
  * @npage_lpa: Modes advertised by link partner on next pages
  *
+ * The @ecmd parameter is expected to have been cleared before calling
+ * mdio45_ethtool_gset_npage().
+ *
  * Since the CSRs for auto-negotiation using next pages are not fully
  * standardised, this function does not attempt to decode them.  The
  * caller must pass them in.
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 0a6c6a2e7550..05acca78f63a 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -58,6 +58,9 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
  * @mii: MII interface
  * @ecmd: requested ethtool_cmd
  *
+ * The @ecmd parameter is expected to have been cleared before calling
+ * mii_ethtool_gset().
+ *
  * Returns 0 for success, negative on error.
  */
 int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 4cc9872f5ec4..f3e4b0adae93 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -888,12 +888,12 @@ static void pch_gbe_watchdog(unsigned long data)
 	struct pch_gbe_adapter *adapter = (struct pch_gbe_adapter *)data;
 	struct net_device *netdev = adapter->netdev;
 	struct pch_gbe_hw *hw = &adapter->hw;
-	struct ethtool_cmd cmd;
 
 	pr_debug("right now = %ld\n", jiffies);
 
 	pch_gbe_update_stats(adapter);
 	if ((mii_link_ok(&adapter->mii)) && (!netif_carrier_ok(netdev))) {
+		struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
 		netdev->tx_queue_len = adapter->tx_queue_len;
 		/* mii library handles link maintenance tasks */
 		if (mii_ethtool_gset(&adapter->mii, &cmd)) {
@@ -903,7 +903,7 @@ static void pch_gbe_watchdog(unsigned long data)
 						PCH_GBE_WATCHDOG_PERIOD));
 			return;
 		}
-		hw->mac.link_speed = cmd.speed;
+		hw->mac.link_speed = ethtool_cmd_speed(&cmd);
 		hw->mac.link_duplex = cmd.duplex;
 		/* Set the RGMII control. */
 		pch_gbe_set_rgmii_ctrl(adapter, hw->mac.link_speed,
@@ -913,7 +913,7 @@ static void pch_gbe_watchdog(unsigned long data)
 				 hw->mac.link_duplex);
 		netdev_dbg(netdev,
 			   "Link is Up %d Mbps %s-Duplex\n",
-			   cmd.speed,
+			   hw->mac.link_speed,
 			   cmd.duplex == DUPLEX_FULL ? "Full" : "Half");
 		netif_carrier_on(netdev);
 		netif_wake_queue(netdev);
diff --git a/drivers/net/pch_gbe/pch_gbe_phy.c b/drivers/net/pch_gbe/pch_gbe_phy.c
index 923a687acd30..9a8207f686fd 100644
--- a/drivers/net/pch_gbe/pch_gbe_phy.c
+++ b/drivers/net/pch_gbe/pch_gbe_phy.c
@@ -247,7 +247,7 @@ inline void pch_gbe_phy_set_rgmii(struct pch_gbe_hw *hw)
 void pch_gbe_phy_init_setting(struct pch_gbe_hw *hw)
 {
 	struct pch_gbe_adapter *adapter;
-	struct ethtool_cmd     cmd;
+	struct ethtool_cmd     cmd = { .cmd = ETHTOOL_GSET };
 	int ret;
 	u16 mii_reg;
 
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 0a1efbae1bc0..b48aba9e4227 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -2099,7 +2099,7 @@ static int pcnet32_open(struct net_device *dev)
 		int first_phy = -1;
 		u16 bmcr;
 		u32 bcr9;
-		struct ethtool_cmd ecmd;
+		struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 		/*
 		 * There is really no good other way to handle multiple PHYs
@@ -2115,9 +2115,9 @@ static int pcnet32_open(struct net_device *dev)
 			ecmd.port = PORT_MII;
 			ecmd.transceiver = XCVR_INTERNAL;
 			ecmd.autoneg = AUTONEG_DISABLE;
-			ecmd.speed =
-			    lp->
-			    options & PCNET32_PORT_100 ? SPEED_100 : SPEED_10;
+			ethtool_cmd_speed_set(&ecmd,
+					      (lp->options & PCNET32_PORT_100) ?
+					      SPEED_100 : SPEED_10);
 			bcr9 = lp->a.read_bcr(ioaddr, 9);
 
 			if (lp->options & PCNET32_PORT_FD) {
@@ -2763,11 +2763,11 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
 		netif_carrier_on(dev);
 		if (lp->mii) {
 			if (netif_msg_link(lp)) {
-				struct ethtool_cmd ecmd;
+				struct ethtool_cmd ecmd = {
+					.cmd = ETHTOOL_GSET };
 				mii_ethtool_gset(&lp->mii_if, &ecmd);
-				netdev_info(dev, "link up, %sMbps, %s-duplex\n",
-					    (ecmd.speed == SPEED_100)
-					    ? "100" : "10",
+				netdev_info(dev, "link up, %uMbps, %s-duplex\n",
+					    ethtool_cmd_speed(&ecmd),
 					    (ecmd.duplex == DUPLEX_FULL)
 					    ? "full" : "half");
 			}
diff --git a/drivers/net/sfc/mdio_10g.c b/drivers/net/sfc/mdio_10g.c
index 19e68c26d103..71159145b4bf 100644
--- a/drivers/net/sfc/mdio_10g.c
+++ b/drivers/net/sfc/mdio_10g.c
@@ -232,12 +232,12 @@ void efx_mdio_set_mmds_lpower(struct efx_nic *efx,
  */
 int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
 {
-	struct ethtool_cmd prev;
+	struct ethtool_cmd prev = { .cmd = ETHTOOL_GSET };
 
 	efx->phy_op->get_settings(efx, &prev);
 
 	if (ecmd->advertising == prev.advertising &&
-	    ecmd->speed == prev.speed &&
+	    ethtool_cmd_speed(ecmd) == ethtool_cmd_speed(&prev) &&
 	    ecmd->duplex == prev.duplex &&
 	    ecmd->port == prev.port &&
 	    ecmd->autoneg == prev.autoneg)
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index 0e61ac8707cb..6f5aaeb986ff 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -237,13 +237,12 @@ stmmac_set_pauseparam(struct net_device *netdev,
 
 	if (phy->autoneg) {
 		if (netif_running(netdev)) {
-			struct ethtool_cmd cmd;
+			struct ethtool_cmd cmd = { .cmd = ETHTOOL_SSET };
 			/* auto-negotiation automatically restarted */
-			cmd.cmd = ETHTOOL_NWAY_RST;
 			cmd.supported = phy->supported;
 			cmd.advertising = phy->advertising;
 			cmd.autoneg = phy->autoneg;
-			cmd.speed = phy->speed;
+			ethtool_cmd_speed_set(&cmd, phy->speed);
 			cmd.duplex = phy->duplex;
 			cmd.phy_address = phy->addr;
 			ret = phy_ethtool_sset(phy, &cmd);
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 6140b56cce53..6998aa6b7bb7 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -847,7 +847,7 @@ static void ax88172_set_multicast(struct net_device *net)
 static int ax88172_link_reset(struct usbnet *dev)
 {
 	u8 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
@@ -856,8 +856,8 @@ static int ax88172_link_reset(struct usbnet *dev)
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode |= ~AX88172_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88172_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -947,20 +947,20 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
 static int ax88772_link_reset(struct usbnet *dev)
 {
 	u16 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 	mode = AX88772_MEDIUM_DEFAULT;
 
-	if (ecmd.speed != SPEED_100)
+	if (ethtool_cmd_speed(&ecmd) != SPEED_100)
 		mode &= ~AX_MEDIUM_PS;
 
 	if (ecmd.duplex != DUPLEX_FULL)
 		mode &= ~AX_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88772_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
@@ -1173,18 +1173,20 @@ static int marvell_led_status(struct usbnet *dev, u16 speed)
 static int ax88178_link_reset(struct usbnet *dev)
 {
 	u16 mode;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	struct asix_data *data = (struct asix_data *)&dev->data;
+	u32 speed;
 
 	netdev_dbg(dev->net, "ax88178_link_reset()\n");
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 	mode = AX88178_MEDIUM_DEFAULT;
+	speed = ethtool_cmd_speed(&ecmd);
 
-	if (ecmd.speed == SPEED_1000)
+	if (speed == SPEED_1000)
 		mode |= AX_MEDIUM_GM;
-	else if (ecmd.speed == SPEED_100)
+	else if (speed == SPEED_100)
 		mode |= AX_MEDIUM_PS;
 	else
 		mode &= ~(AX_MEDIUM_PS | AX_MEDIUM_GM);
@@ -1196,13 +1198,13 @@ static int ax88178_link_reset(struct usbnet *dev)
 	else
 		mode &= ~AX_MEDIUM_FD;
 
-	netdev_dbg(dev->net, "ax88178_link_reset() speed: %d duplex: %d setting mode to 0x%04x\n",
-		   ecmd.speed, ecmd.duplex, mode);
+	netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
+		   speed, ecmd.duplex, mode);
 
 	asix_write_medium_mode(dev, mode);
 
 	if (data->phymode == PHY_MODE_MARVELL && data->ledmode)
-		marvell_led_status(dev, ecmd.speed);
+		marvell_led_status(dev, speed);
 
 	return 0;
 }
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 5002f5be47be..1d93133e9b74 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -599,13 +599,13 @@ static void dm9601_status(struct usbnet *dev, struct urb *urb)
 
 static int dm9601_link_reset(struct usbnet *dev)
 {
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 
 	mii_check_media(&dev->mii, 1, 1);
 	mii_ethtool_gset(&dev->mii, &ecmd);
 
-	netdev_dbg(dev->net, "link_reset() speed: %d duplex: %d\n",
-		   ecmd.speed, ecmd.duplex);
+	netdev_dbg(dev->net, "link_reset() speed: %u duplex: %d\n",
+		   ethtool_cmd_speed(&ecmd), ecmd.duplex);
 
 	return 0;
 }
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 860a20c938b4..15b3d6888ae9 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -503,7 +503,7 @@ static int smsc75xx_update_flowcontrol(struct usbnet *dev, u8 duplex,
 static int smsc75xx_link_reset(struct usbnet *dev)
 {
 	struct mii_if_info *mii = &dev->mii;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	u16 lcladv, rmtadv;
 	int ret;
 
@@ -519,8 +519,9 @@ static int smsc75xx_link_reset(struct usbnet *dev)
 	lcladv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
 	rmtadv = smsc75xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
-	netif_dbg(dev, link, dev->net, "speed: %d duplex: %d lcladv: %04x"
-		" rmtadv: %04x", ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+	netif_dbg(dev, link, dev->net, "speed: %u duplex: %d lcladv: %04x"
+		  " rmtadv: %04x", ethtool_cmd_speed(&ecmd),
+		  ecmd.duplex, lcladv, rmtadv);
 
 	return smsc75xx_update_flowcontrol(dev, ecmd.duplex, lcladv, rmtadv);
 }
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 24f4b3739dd2..b374a9997908 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -457,7 +457,7 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 {
 	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
 	struct mii_if_info *mii = &dev->mii;
-	struct ethtool_cmd ecmd;
+	struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET };
 	unsigned long flags;
 	u16 lcladv, rmtadv;
 	u32 intdata;
@@ -472,8 +472,9 @@ static int smsc95xx_link_reset(struct usbnet *dev)
 	lcladv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_ADVERTISE);
 	rmtadv = smsc95xx_mdio_read(dev->net, mii->phy_id, MII_LPA);
 
-	netif_dbg(dev, link, dev->net, "speed: %d duplex: %d lcladv: %04x rmtadv: %04x\n",
-		  ecmd.speed, ecmd.duplex, lcladv, rmtadv);
+	netif_dbg(dev, link, dev->net,
+		  "speed: %u duplex: %d lcladv: %04x rmtadv: %04x\n",
+		  ethtool_cmd_speed(&ecmd), ecmd.duplex, lcladv, rmtadv);
 
 	spin_lock_irqsave(&pdata->mac_cr_lock, flags);
 	if (ecmd.duplex != DUPLEX_FULL) {
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index e2e647509a73..cd050196a163 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -664,7 +664,7 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport)
 	struct fcoe_port *port = lport_priv(lport);
 	struct bnx2fc_hba *hba = port->priv;
 	struct net_device *netdev = hba->netdev;
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ethtool_cmd ecmd;
 
 	if (!dev_ethtool_get_settings(netdev, &ecmd)) {
 		lport->link_supported_speeds &=
@@ -675,12 +675,15 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport)
 		if (ecmd.supported & SUPPORTED_10000baseT_Full)
 			lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
 
-		if (ecmd.speed == SPEED_1000)
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
 			lport->link_speed = FC_PORTSPEED_1GBIT;
-		if (ecmd.speed == SPEED_10000)
+			break;
+		case SPEED_10000:
 			lport->link_speed = FC_PORTSPEED_10GBIT;
+			break;
+		}
 	}
-	return;
 }
 static int bnx2fc_link_ok(struct fc_lport *lport)
 {
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index bde6ee5333eb..04f346b562da 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2026,7 +2026,7 @@ out_nodev:
 int fcoe_link_speed_update(struct fc_lport *lport)
 {
 	struct net_device *netdev = fcoe_netdev(lport);
-	struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+	struct ethtool_cmd ecmd;
 
 	if (!dev_ethtool_get_settings(netdev, &ecmd)) {
 		lport->link_supported_speeds &=
@@ -2037,11 +2037,14 @@ int fcoe_link_speed_update(struct fc_lport *lport)
 		if (ecmd.supported & SUPPORTED_10000baseT_Full)
 			lport->link_supported_speeds |=
 				FC_PORTSPEED_10GBIT;
-		if (ecmd.speed == SPEED_1000)
+		switch (ethtool_cmd_speed(&ecmd)) {
+		case SPEED_1000:
 			lport->link_speed = FC_PORTSPEED_1GBIT;
-		if (ecmd.speed == SPEED_10000)
+			break;
+		case SPEED_10000:
 			lport->link_speed = FC_PORTSPEED_10GBIT;
-
+			break;
+		}
 		return 0;
 	}
 	return -1;
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 7e6e0a89ca26..4194a2067a14 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -744,7 +744,9 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 /**
  * struct ethtool_ops - optional netdev operations
  * @get_settings: Get various device settings including Ethernet link
- *	settings.  Returns a negative error code or zero.
+ *	settings. The @cmd parameter is expected to have been cleared
+ *	before get_settings is called. Returns a negative error code or
+ *	zero.
  * @set_settings: Set various device settings including Ethernet link
  *	settings.  Returns a negative error code or zero.
  * @get_drvinfo: Report driver/device information.  Should only set the
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e03af35843bc..d5de66af46f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2597,13 +2597,8 @@ static inline int netif_is_bond_slave(struct net_device *dev)
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
-static inline int dev_ethtool_get_settings(struct net_device *dev,
-					   struct ethtool_cmd *cmd)
-{
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
-		return -EOPNOTSUPP;
-	return dev->ethtool_ops->get_settings(dev, cmd);
-}
+int dev_ethtool_get_settings(struct net_device *dev,
+			     struct ethtool_cmd *cmd);
 
 static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
 {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index b5fc9f39122b..ae8c68f30f1b 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -217,18 +217,19 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
 static inline int iboe_get_rate(struct net_device *dev)
 {
 	struct ethtool_cmd cmd;
+	u32 speed;
 
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
-	    dev->ethtool_ops->get_settings(dev, &cmd))
+	if (dev_ethtool_get_settings(dev, &cmd))
 		return IB_RATE_PORT_CURRENT;
 
-	if (cmd.speed >= 40000)
+	speed = ethtool_cmd_speed(&cmd);
+	if (speed >= 40000)
 		return IB_RATE_40_GBPS;
-	else if (cmd.speed >= 30000)
+	else if (speed >= 30000)
 		return IB_RATE_30_GBPS;
-	else if (cmd.speed >= 20000)
+	else if (speed >= 20000)
 		return IB_RATE_20_GBPS;
-	else if (cmd.speed >= 10000)
+	else if (speed >= 10000)
 		return IB_RATE_10_GBPS;
 	else
 		return IB_RATE_PORT_CURRENT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7db99b52679f..e95dc30110eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4495,6 +4495,30 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
+/**
+ *	dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
+ *	@dev: device
+ *	@cmd: memory area for ethtool_ops::get_settings() result
+ *
+ *      The cmd arg is initialized properly (cleared and
+ *      ethtool_cmd::cmd field set to ETHTOOL_GSET).
+ *
+ *	Return device's ethtool_ops::get_settings() result value or
+ *	-EOPNOTSUPP when device doesn't expose
+ *	ethtool_ops::get_settings() operation.
+ */
+int dev_ethtool_get_settings(struct net_device *dev,
+			     struct ethtool_cmd *cmd)
+{
+	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
+		return -EOPNOTSUPP;
+
+	memset(cmd, 0, sizeof(struct ethtool_cmd));
+	cmd->cmd = ETHTOOL_GSET;
+	return dev->ethtool_ops->get_settings(dev, cmd);
+}
+EXPORT_SYMBOL(dev_ethtool_get_settings);
+
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5ceb257e860c..381813eae46c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -28,6 +28,7 @@
 static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
+static const char fmt_udec[] = "%u\n";
 static const char fmt_ulong[] = "%lu\n";
 static const char fmt_u64[] = "%llu\n";
 
@@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev) &&
-	    netdev->ethtool_ops &&
-	    netdev->ethtool_ops->get_settings) {
-		struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
-		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
-			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!dev_ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
 	}
 	rtnl_unlock();
 	return ret;
@@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev) &&
-	    netdev->ethtool_ops &&
-	    netdev->ethtool_ops->get_settings) {
-		struct ethtool_cmd cmd = { ETHTOOL_GSET };
-
-		if (!netdev->ethtool_ops->get_settings(netdev, &cmd))
-			ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half");
+	if (netif_running(netdev)) {
+		struct ethtool_cmd cmd;
+		if (!dev_ethtool_get_settings(netdev, &cmd))
+			ret = sprintf(buf, "%s\n",
+				      cmd.duplex ? "full" : "half");
 	}
 	rtnl_unlock();
 	return ret;
-- 
cgit v1.2.3


From 57d5f9f808b7650a92f31e9cd3acd3f415a22530 Mon Sep 17 00:00:00 2001
From: Mike Waychison <mikew@google.com>
Date: Mon, 14 Mar 2011 23:58:45 -0700
Subject: x86: get_bios_ebda_length()

Add a wrapper routine that tells us the length of the EBDA if it is
present.  This guy also ensures that the returned length doesn't let the
EBDA run past the 640KiB mark.

Signed-off-by: Mike Waychison <mikew@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/include/asm/bios_ebda.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 3c7521063d3f..5174cf0bf590 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -14,6 +14,27 @@ static inline unsigned int get_bios_ebda(void)
 	return address;	/* 0 means none */
 }
 
+/*
+ * Return the sanitized length of the EBDA in bytes, if it exists.
+ */
+static inline unsigned int get_bios_ebda_length(void)
+{
+	unsigned int address;
+	unsigned int length;
+
+	address = get_bios_ebda();
+	if (!address)
+		return 0;
+
+	/* EBDA length is byte 0 of the EBDA (stored in KiB) */
+	length = *(unsigned char *)phys_to_virt(address);
+	length <<= 10;
+
+	/* Trim the length if it extends beyond 640KiB */
+	length = min_t(unsigned int, (640 * 1024) - address, length);
+	return length;
+}
+
 void reserve_ebda_region(void);
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-- 
cgit v1.2.3


From f548ccd47d608e88d432745091e13f927ced83f7 Mon Sep 17 00:00:00 2001
From: Mike Waychison <mikew@google.com>
Date: Mon, 14 Mar 2011 23:58:50 -0700
Subject: x86: Better comments for get_bios_ebda()

Make the comments a bit clearer for get_bios_ebda so that it actually
tells us what it is returning.

Signed-off-by: Mike Waychison <mikew@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/include/asm/bios_ebda.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 5174cf0bf590..aa6a3170ab5a 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -4,11 +4,14 @@
 #include <asm/io.h>
 
 /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E.
+ * Returns physical address of EBDA.  Returns 0 if there is no EBDA.
  */
 static inline unsigned int get_bios_ebda(void)
 {
+	/*
+	 * There is a real-mode segmented pointer pointing to the
+	 * 4K EBDA area at 0x40E.
+	 */
 	unsigned int address = *(unsigned short *)phys_to_virt(0x40E);
 	address <<= 4;
 	return address;	/* 0 means none */
-- 
cgit v1.2.3


From 85eb8c8d0b0900c073b0e6f89979ac9c439ade1a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 30 Apr 2011 00:25:44 +0200
Subject: PM / Runtime: Generic clock manipulation rountines for runtime PM
 (v6)

Many different platforms and subsystems may want to disable device
clocks during suspend and enable them during resume which is going to
be done in a very similar way in all those cases.  For this reason,
provide generic routines for the manipulation of device clocks during
suspend and resume.

Convert the ARM shmobile platform to using the new routines.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/mach-shmobile/pm_runtime.c | 140 +-----------
 drivers/base/power/Makefile         |   1 +
 drivers/base/power/clock_ops.c      | 430 ++++++++++++++++++++++++++++++++++++
 include/linux/pm_runtime.h          |  42 ++++
 kernel/power/Kconfig                |   4 +
 5 files changed, 486 insertions(+), 131 deletions(-)
 create mode 100644 drivers/base/power/clock_ops.c

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 30bbe9a99ae1..2d1b67a59e4a 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -21,70 +21,6 @@
 #include <linux/slab.h>
 
 #ifdef CONFIG_PM_RUNTIME
-#define BIT_ONCE 0
-#define BIT_ACTIVE 1
-#define BIT_CLK_ENABLED 2
-
-struct pm_runtime_data {
-	unsigned long flags;
-	struct clk *clk;
-};
-
-static struct pm_runtime_data *__to_prd(struct device *dev)
-{
-	return dev ? dev->power.subsys_data : NULL;
-}
-
-static void platform_pm_runtime_init(struct device *dev,
-				     struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) {
-		prd->clk = clk_get(dev, NULL);
-		if (!IS_ERR(prd->clk)) {
-			set_bit(BIT_ACTIVE, &prd->flags);
-			dev_info(dev, "clocks managed by runtime pm\n");
-		}
-	}
-}
-
-static void platform_pm_runtime_bug(struct device *dev,
-				    struct pm_runtime_data *prd)
-{
-	if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags))
-		dev_err(dev, "runtime pm suspend before resume\n");
-}
-
-static int default_platform_runtime_suspend(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	platform_pm_runtime_bug(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_disable(prd->clk);
-		clear_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
-
-static int default_platform_runtime_resume(struct device *dev)
-{
-	struct pm_runtime_data *prd = __to_prd(dev);
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	platform_pm_runtime_init(dev, prd);
-
-	if (prd && test_bit(BIT_ACTIVE, &prd->flags)) {
-		clk_enable(prd->clk);
-		set_bit(BIT_CLK_ENABLED, &prd->flags);
-	}
-
-	return 0;
-}
 
 static int default_platform_runtime_idle(struct device *dev)
 {
@@ -94,87 +30,29 @@ static int default_platform_runtime_idle(struct device *dev)
 
 static struct dev_power_domain default_power_domain = {
 	.ops = {
-		.runtime_suspend = default_platform_runtime_suspend,
-		.runtime_resume = default_platform_runtime_resume,
+		.runtime_suspend = pm_runtime_clk_suspend,
+		.runtime_resume = pm_runtime_clk_resume,
 		.runtime_idle = default_platform_runtime_idle,
 		USE_PLATFORM_PM_SLEEP_OPS
 	},
 };
 
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct pm_runtime_data *prd;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		prd = kzalloc(sizeof(*prd), GFP_KERNEL);
-		if (prd) {
-			dev->power.subsys_data = prd;
-			dev->pwr_domain = &default_power_domain;
-		} else {
-			dev_err(dev, "unable to alloc memory for runtime pm\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		prd = __to_prd(dev);
-		if (prd) {
-			if (test_bit(BIT_CLK_ENABLED, &prd->flags))
-				clk_disable(prd->clk);
+#define DEFAULT_PWR_DOMAIN_PTR	(&default_power_domain)
 
-			if (test_bit(BIT_ACTIVE, &prd->flags))
-				clk_put(prd->clk);
-		}
-		break;
-	}
+#else
 
-	return 0;
-}
-
-#else /* CONFIG_PM_RUNTIME */
-
-static int platform_bus_notify(struct notifier_block *nb,
-			       unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct clk *clk;
-
-	dev_dbg(dev, "platform_bus_notify() %ld !\n", action);
-
-	switch (action) {
-	case BUS_NOTIFY_BIND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_enable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced on\n");
-		}
-		break;
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		clk = clk_get(dev, NULL);
-		if (!IS_ERR(clk)) {
-			clk_disable(clk);
-			clk_put(clk);
-			dev_info(dev, "runtime pm disabled, clock forced off\n");
-		}
-		break;
-	}
-
-	return 0;
-}
+#define DEFAULT_PWR_DOMAIN_PTR	NULL
 
 #endif /* CONFIG_PM_RUNTIME */
 
-static struct notifier_block platform_bus_notifier = {
-	.notifier_call = platform_bus_notify
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = DEFAULT_PWR_DOMAIN_PTR,
+	.con_ids = { NULL, },
 };
 
 static int __init sh_pm_runtime_init(void)
 {
-	bus_register_notifier(&platform_bus_type, &platform_bus_notifier);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 	return 0;
 }
 core_initcall(sh_pm_runtime_init);
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 118c1b92a511..06a7073f9027 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
+obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
new file mode 100644
index 000000000000..d74abf334391
--- /dev/null
+++ b/drivers/base/power/clock_ops.c
@@ -0,0 +1,430 @@
+/*
+ * drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks
+ *
+ * Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+struct pm_runtime_clk_data {
+	struct list_head clock_list;
+	struct mutex lock;
+};
+
+enum pce_status {
+	PCE_STATUS_NONE = 0,
+	PCE_STATUS_ACQUIRED,
+	PCE_STATUS_ENABLED,
+	PCE_STATUS_ERROR,
+};
+
+struct pm_clock_entry {
+	struct list_head node;
+	char *con_id;
+	struct clk *clk;
+	enum pce_status status;
+};
+
+static struct pm_runtime_clk_data *__to_prd(struct device *dev)
+{
+	return dev ? dev->power.subsys_data : NULL;
+}
+
+/**
+ * pm_runtime_clk_add - Start using a device clock for runtime PM.
+ * @dev: Device whose clock is going to be used for runtime PM.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the runtime PM of @dev.
+ */
+int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return -EINVAL;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		dev_err(dev, "Not enough memory for clock entry.\n");
+		return -ENOMEM;
+	}
+
+	if (con_id) {
+		ce->con_id = kstrdup(con_id, GFP_KERNEL);
+		if (!ce->con_id) {
+			dev_err(dev,
+				"Not enough memory for clock connection ID.\n");
+			kfree(ce);
+			return -ENOMEM;
+		}
+	}
+
+	mutex_lock(&prd->lock);
+	list_add_tail(&ce->node, &prd->clock_list);
+	mutex_unlock(&prd->lock);
+	return 0;
+}
+
+/**
+ * __pm_runtime_clk_remove - Destroy runtime PM clock entry.
+ * @ce: Runtime PM clock entry to destroy.
+ *
+ * This routine must be called under the mutex protecting the runtime PM list
+ * of clocks corresponding the the @ce's device.
+ */
+static void __pm_runtime_clk_remove(struct pm_clock_entry *ce)
+{
+	if (!ce)
+		return;
+
+	list_del(&ce->node);
+
+	if (ce->status < PCE_STATUS_ERROR) {
+		if (ce->status == PCE_STATUS_ENABLED)
+			clk_disable(ce->clk);
+
+		if (ce->status >= PCE_STATUS_ACQUIRED)
+			clk_put(ce->clk);
+	}
+
+	if (ce->con_id)
+		kfree(ce->con_id);
+
+	kfree(ce);
+}
+
+/**
+ * pm_runtime_clk_remove - Stop using a device clock for runtime PM.
+ * @dev: Device whose clock should not be used for runtime PM any more.
+ * @con_id: Connection ID of the clock.
+ *
+ * Remove the clock represented by @con_id from the list of clocks used for
+ * the runtime PM of @dev.
+ */
+void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!prd)
+		return;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (!con_id && !ce->con_id) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		} else if (!con_id || !ce->con_id) {
+			continue;
+		} else if (!strcmp(con_id, ce->con_id)) {
+			__pm_runtime_clk_remove(ce);
+			break;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+}
+
+/**
+ * pm_runtime_clk_init - Initialize a device's list of runtime PM clocks.
+ * @dev: Device to initialize the list of runtime PM clocks for.
+ *
+ * Allocate a struct pm_runtime_clk_data object, initialize its lock member and
+ * make the @dev's power.subsys_data field point to it.
+ */
+int pm_runtime_clk_init(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd;
+
+	prd = kzalloc(sizeof(*prd), GFP_KERNEL);
+	if (!prd) {
+		dev_err(dev, "Not enough memory fo runtime PM data.\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&prd->clock_list);
+	mutex_init(&prd->lock);
+	dev->power.subsys_data = prd;
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_destroy - Destroy a device's list of runtime PM clocks.
+ * @dev: Device to destroy the list of runtime PM clocks for.
+ *
+ * Clear the @dev's power.subsys_data field, remove the list of clock entries
+ * from the struct pm_runtime_clk_data object pointed to by it before and free
+ * that object.
+ */
+void pm_runtime_clk_destroy(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce, *c;
+
+	if (!prd)
+		return;
+
+	dev->power.subsys_data = NULL;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_safe_reverse(ce, c, &prd->clock_list, node)
+		__pm_runtime_clk_remove(ce);
+
+	mutex_unlock(&prd->lock);
+
+	kfree(prd);
+}
+
+/**
+ * pm_runtime_clk_acquire - Acquire a device clock.
+ * @dev: Device whose clock is to be acquired.
+ * @con_id: Connection ID of the clock.
+ */
+static void pm_runtime_clk_acquire(struct device *dev,
+				    struct pm_clock_entry *ce)
+{
+	ce->clk = clk_get(dev, ce->con_id);
+	if (IS_ERR(ce->clk)) {
+		ce->status = PCE_STATUS_ERROR;
+	} else {
+		ce->status = PCE_STATUS_ACQUIRED;
+		dev_dbg(dev, "Clock %s managed by runtime PM.\n", ce->con_id);
+	}
+}
+
+/**
+ * pm_runtime_clk_suspend - Disable clocks in a device's runtime PM clock list.
+ * @dev: Device to disable the clocks for.
+ */
+int pm_runtime_clk_suspend(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry_reverse(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_disable(ce->clk);
+			ce->status = PCE_STATUS_ACQUIRED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_resume - Enable clocks in a device's runtime PM clock list.
+ * @dev: Device to enable the clocks for.
+ */
+int pm_runtime_clk_resume(struct device *dev)
+{
+	struct pm_runtime_clk_data *prd = __to_prd(dev);
+	struct pm_clock_entry *ce;
+
+	dev_dbg(dev, "%s()\n", __func__);
+
+	if (!prd)
+		return 0;
+
+	mutex_lock(&prd->lock);
+
+	list_for_each_entry(ce, &prd->clock_list, node) {
+		if (ce->status == PCE_STATUS_NONE)
+			pm_runtime_clk_acquire(dev, ce);
+
+		if (ce->status < PCE_STATUS_ERROR) {
+			clk_enable(ce->clk);
+			ce->status = PCE_STATUS_ENABLED;
+		}
+	}
+
+	mutex_unlock(&prd->lock);
+
+	return 0;
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the pwr_domain member of that object is copied to the device's
+ * pwr_domain field and its con_ids member is used to populate the device's list
+ * of runtime PM clocks, depending on @action.
+ *
+ * If the device's pwr_domain field is already populated with a value different
+ * from the one stored in the struct pm_clk_notifier_block object, the function
+ * does nothing.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+	char *con_id;
+	int error;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->pwr_domain)
+			break;
+
+		error = pm_runtime_clk_init(dev);
+		if (error)
+			break;
+
+		dev->pwr_domain = clknb->pwr_domain;
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				pm_runtime_clk_add(dev, con_id);
+		} else {
+			pm_runtime_clk_add(dev, NULL);
+		}
+
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->pwr_domain != clknb->pwr_domain)
+			break;
+
+		dev->pwr_domain = NULL;
+		pm_runtime_clk_destroy(dev);
+		break;
+	}
+
+	return 0;
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+/**
+ * enable_clock - Enable a device clock.
+ * @dev: Device whose clock is to be enabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void enable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_enable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced on.\n");
+	}
+}
+
+/**
+ * disable_clock - Disable a device clock.
+ * @dev: Device whose clock is to be disabled.
+ * @con_id: Connection ID of the clock.
+ */
+static void disable_clock(struct device *dev, const char *con_id)
+{
+	struct clk *clk;
+
+	clk = clk_get(dev, con_id);
+	if (!IS_ERR(clk)) {
+		clk_disable(clk);
+		clk_put(clk);
+		dev_info(dev, "Runtime PM disabled, clock forced off.\n");
+	}
+}
+
+/**
+ * pm_runtime_clk_notify - Notify routine for device addition and removal.
+ * @nb: Notifier block object this function is a member of.
+ * @action: Operation being carried out by the caller.
+ * @data: Device the routine is being run for.
+ *
+ * For this function to work, @nb must be a member of an object of type
+ * struct pm_clk_notifier_block containing all of the requisite data.
+ * Specifically, the con_ids member of that object is used to enable or disable
+ * the device's clocks, depending on @action.
+ */
+static int pm_runtime_clk_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct pm_clk_notifier_block *clknb;
+	struct device *dev = data;
+
+	dev_dbg(dev, "%s() %ld\n", __func__, action);
+
+	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				enable_clock(dev, con_id);
+		} else {
+			enable_clock(dev, NULL);
+		}
+		break;
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (clknb->con_ids[0]) {
+			for (con_id = clknb->con_ids[0]; *con_id; con_id++)
+				disable_clock(dev, con_id);
+		} else {
+			disable_clock(dev, NULL);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+/**
+ * pm_runtime_clk_add_notifier - Add bus type notifier for runtime PM clocks.
+ * @bus: Bus type to add the notifier to.
+ * @clknb: Notifier to be added to the given bus type.
+ *
+ * The nb member of @clknb is not expected to be initialized and its
+ * notifier_call member will be replaced with pm_runtime_clk_notify().  However,
+ * the remaining members of @clknb should be populated prior to calling this
+ * routine.
+ */
+void pm_runtime_clk_add_notifier(struct bus_type *bus,
+				 struct pm_clk_notifier_block *clknb)
+{
+	if (!bus || !clknb)
+		return;
+
+	clknb->nb.notifier_call = pm_runtime_clk_notify;
+	bus_register_notifier(bus, &clknb->nb);
+}
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 8de9aa6e7def..878cf84baeb1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -245,4 +245,46 @@ static inline void pm_runtime_dont_use_autosuspend(struct device *dev)
 	__pm_runtime_use_autosuspend(dev, false);
 }
 
+struct pm_clk_notifier_block {
+	struct notifier_block nb;
+	struct dev_power_domain *pwr_domain;
+	char *con_ids[];
+};
+
+#ifdef CONFIG_PM_RUNTIME_CLK
+extern int pm_runtime_clk_init(struct device *dev);
+extern void pm_runtime_clk_destroy(struct device *dev);
+extern int pm_runtime_clk_add(struct device *dev, const char *con_id);
+extern void pm_runtime_clk_remove(struct device *dev, const char *con_id);
+extern int pm_runtime_clk_suspend(struct device *dev);
+extern int pm_runtime_clk_resume(struct device *dev);
+#else
+static inline int pm_runtime_clk_init(struct device *dev)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_destroy(struct device *dev)
+{
+}
+static inline int pm_runtime_clk_add(struct device *dev, const char *con_id)
+{
+	return -EINVAL;
+}
+static inline void pm_runtime_clk_remove(struct device *dev, const char *con_id)
+{
+}
+#define pm_runtime_clock_suspend	NULL
+#define pm_runtime_clock_resume		NULL
+#endif
+
+#ifdef CONFIG_HAVE_CLK
+extern void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb);
+#else
+static inline void pm_runtime_clk_add_notifier(struct bus_type *bus,
+					struct pm_clk_notifier_block *clknb)
+{
+}
+#endif
+
 #endif
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6de9a8fc3417..d74ad4a90695 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -229,3 +229,7 @@ config PM_OPP
 	  representing individual voltage domains and provides SOC
 	  implementations a ready to use framework to manage OPPs.
 	  For more information, read <file:Documentation/power/opp.txt>
+
+config PM_RUNTIME_CLK
+	def_bool y
+	depends on PM_RUNTIME && HAVE_CLK
-- 
cgit v1.2.3


From e20a2d205c05cef6b5783df339a7d54adeb50962 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <ostr@amd64.org>
Date: Fri, 29 Apr 2011 17:47:43 -0400
Subject: x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors

Older AMD K8 processors (Revisions A-E) are affected by erratum
400 (APIC timer interrupts don't occur in C states greater than
C1). This, for example, means that X86_FEATURE_ARAT flag should
not be set for these parts.

This addresses regression introduced by commit
b87cf80af3ba4b4c008b4face3c68d604e1715c6 ("x86, AMD: Set ARAT
feature on AMD processors") where the system may become
unresponsive until external interrupt (such as keyboard input)
occurs. This results, for example, in time not being reported
correctly, lack of progress on the system and other lockups.

Reported-by: Joerg-Volker Peetz <jvpeetz@web.de>
Tested-by: Joerg-Volker Peetz <jvpeetz@web.de>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Boris Ostrovsky <Boris.Ostrovsky@amd.com>
Cc: stable@kernel.org
Link: http://lkml.kernel.org/r/1304113663-6586-1-git-send-email-ostr@amd64.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3532d3bf8105..bb9eb29a52dd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
  */
 
 const int amd_erratum_400[] =
-	AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+	AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
 			    AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
 EXPORT_SYMBOL_GPL(amd_erratum_400);
 
-- 
cgit v1.2.3


From 2be19102b71c1a45d37fec50303791daa1a06869 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 1 May 2011 19:12:04 +0200
Subject: x86, NUMA: Fix empty memblk detection in numa_cleanup_meminfo()

numa_cleanup_meminfo() trims each memblk between low (0) and
high (max_pfn) limits and discards empty ones.  However, the
emptiness detection incorrectly used equality test.  If the
start of a memblk is higher than max_pfn, it is empty but fails
the equality test and doesn't get discarded.

The condition triggers when max_pfn is lower than start of a
NUMA node and results in memory misconfiguration - leading to
WARN_ON()s and other funnies.  The bug was discovered in devel
branch where 32bit too uses this code path for NUMA init.  If a
node is above the addressing limit, max_pfn ends up lower than
the node triggering this problem.

The failure hasn't been observed on x86-64 but is still possible
with broken hardware e820/NUMA info.  As the fix is very low
risk, it would be better to apply it even for 64bit.

Fix it by using >= instead of ==.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
[ Extracted the actual fix from the original patch and rewrote patch description. ]
Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20110501171204.GO29280@htj.dyndns.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/numa_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index e8c00cc72033..85b52fc03084 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -306,7 +306,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 		bi->end = min(bi->end, high);
 
 		/* and there's no empty block */
-		if (bi->start == bi->end) {
+		if (bi->start >= bi->end) {
 			numa_remove_memblk_from(i--, mi);
 			continue;
 		}
-- 
cgit v1.2.3


From 9de4966a4d218f29c68e96e8e7b4d2840dedec79 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Sun, 1 May 2011 14:09:21 +0200
Subject: x86: Fix spelling error in the memcpy() source code comment

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/201105011409.21629.bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/memcpy_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e38..2a560bb2573b 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -49,7 +49,7 @@ ENTRY(memcpy)
 	jb .Lhandle_tail
 
 	/*
-	 * We check whether memory false dependece could occur,
+	 * We check whether memory false dependence could occur,
 	 * then jump to corresponding copy mode.
 	 */
 	cmp  %dil, %sil
-- 
cgit v1.2.3


From 9688678a6670c7f0ae3872450a8047c0ad401efb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:51 +0200
Subject: x86-64, NUMA: Simplify hotadd memory handling

The only special handling NUMA needs to do for hotadd memory is
determining the node for the hotadd memory given the address of it and
there's nothing specific to specific config method used.

srat_64.c does somewhat elaborate error checking on
ACPI_SRAT_MEM_HOT_PLUGGABLE regions, remembers them and implements
memory_add_physaddr_to_nid() which determines the node for given
hotadd address.

This is almost completely redundant.  All the information is already
available to the generic NUMA code which already performs all the
sanity checking and merging.  All that's necessary is not using
__initdata from numa_meminfo and providing a function which uses it to
map address to node.

Drop the specific implementation from srat_64.c and add generic
memory_add_physaddr_to_nid() in numa_64.c, which is enabled if
CONFIG_MEMORY_HOTPLUG is set.  Other than dropping the code, srat_64.c
doesn't need any change as it already calls numa_add_memblk() for hot
pluggable regions which is enough.

While at it, change CONFIG_MEMORY_HOTPLUG_SPARSE in srat_64.c to
CONFIG_MEMORY_HOTPLUG, for NUMA on x86-64, the two are always the
same.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/init_64.c |  8 ------
 arch/x86/mm/numa_64.c | 22 ++++++++++++++-
 arch/x86/mm/srat_64.c | 78 +--------------------------------------------------
 3 files changed, 22 insertions(+), 86 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 794233587287..0404bb3a077e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -679,14 +679,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
-#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
-int memory_add_physaddr_to_nid(u64 start)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_vsyscall;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a96767cb068f..4057b5d43918 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -28,7 +28,12 @@ EXPORT_SYMBOL(node_data);
 
 nodemask_t numa_nodes_parsed __initdata;
 
-static struct numa_meminfo numa_meminfo __initdata;
+static struct numa_meminfo numa_meminfo
+#ifndef CONFIG_MEMORY_HOTPLUG
+__initdata
+#endif
+;
+
 static int numa_distance_cnt;
 static u8 *numa_distance;
 
@@ -540,3 +545,18 @@ int __cpuinit numa_cpu_node(int cpu)
 		return __apicid_to_node[apicid];
 	return NUMA_NO_NODE;
 }
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int memory_add_physaddr_to_nid(u64 start)
+{
+	struct numa_meminfo *mi = &numa_meminfo;
+	int nid = mi->blk[0].nid;
+	int i;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		if (mi->blk[i].start <= start && mi->blk[i].end > start)
+			nid = mi->blk[i].nid;
+	return nid;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 8e9d3394f6d4..9994d2cacf72 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,8 +26,6 @@
 
 int acpi_numa __initdata;
 
-static struct bootnode nodes_add[MAX_NUMNODES];
-
 static __init int setup_node(int pxm)
 {
 	return acpi_map_pxm_to_node(pxm);
@@ -37,7 +35,6 @@ static __init void bad_srat(void)
 {
 	printk(KERN_ERR "SRAT: SRAT not used.\n");
 	acpi_numa = -1;
-	memset(nodes_add, 0, sizeof(nodes_add));
 }
 
 static __init inline int srat_disabled(void)
@@ -131,67 +128,11 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	       pxm, apic_id, node);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
-/*
- * Update nodes_add[]
- * This code supports one contiguous hot add area per node
- */
-static void __init
-update_nodes_add(int node, unsigned long start, unsigned long end)
-{
-	unsigned long s_pfn = start >> PAGE_SHIFT;
-	unsigned long e_pfn = end >> PAGE_SHIFT;
-	int changed = 0;
-	struct bootnode *nd = &nodes_add[node];
-
-	/* I had some trouble with strange memory hotadd regions breaking
-	   the boot. Be very strict here and reject anything unexpected.
-	   If you want working memory hotadd write correct SRATs.
-
-	   The node size check is a basic sanity check to guard against
-	   mistakes */
-	if ((signed long)(end - start) < NODE_MIN_SIZE) {
-		printk(KERN_ERR "SRAT: Hotplug area too small\n");
-		return;
-	}
-
-	/* This check might be a bit too strict, but I'm keeping it for now. */
-	if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
-		printk(KERN_ERR
-			"SRAT: Hotplug area %lu -> %lu has existing memory\n",
-			s_pfn, e_pfn);
-		return;
-	}
-
-	/* Looks good */
-
-	if (nd->start == nd->end) {
-		nd->start = start;
-		nd->end = end;
-		changed = 1;
-	} else {
-		if (nd->start == end) {
-			nd->start = start;
-			changed = 1;
-		}
-		if (nd->end == start) {
-			nd->end = end;
-			changed = 1;
-		}
-		if (!changed)
-			printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
-	}
-
-	if (changed) {
-		node_set(node, numa_nodes_parsed);
-		printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
-				 nd->start, nd->end);
-	}
-}
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 void __init
@@ -228,9 +169,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 
 	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
 	       start, end);
-
-	if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
-		update_nodes_add(node, start, end);
 }
 
 void __init acpi_numa_arch_fixup(void) {}
@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void)
 		return ret;
 	return srat_disabled() ? -EINVAL : 0;
 }
-
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
-int memory_add_physaddr_to_nid(u64 start)
-{
-	int i, ret = 0;
-
-	for_each_node(i)
-		if (nodes_add[i].start <= start && nodes_add[i].end > start)
-			ret = i;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-- 
cgit v1.2.3


From ebe685f24eeb85fbdb0f33792f1dabdbf35eff38 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:51 +0200
Subject: x86-64, NUMA: trivial cleanups for setup_node_bootmem()

Make the following trivial changes in preparation for further updates.

* nodeid -> nid, nid -> tnid
* use nd_ prefix for nodedata related variables
* remove start/end_pfn and use start/end directly

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/numa_64.c | 52 +++++++++++++++++++++++----------------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 4057b5d43918..8043d5e7f0d3 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -128,14 +128,11 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 
 /* Initialize bootmem allocator for a node */
 void __init
-setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
+setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 {
-	unsigned long start_pfn, last_pfn, nodedata_phys;
-	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	int nid;
-
-	if (!end)
-		return;
+	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
+	unsigned long nd_pa;
+	int tnid;
 
 	/*
 	 * Don't confuse VM with a node that doesn't have the
@@ -146,30 +143,27 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 
 	start = roundup(start, ZONE_ALIGN);
 
-	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
-	       start, end);
-
-	start_pfn = start >> PAGE_SHIFT;
-	last_pfn = end >> PAGE_SHIFT;
+	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n",
+	       nid, start, end);
 
-	node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size,
-					   SMP_CACHE_BYTES);
-	if (node_data[nodeid] == NULL)
+	node_data[nid] = early_node_mem(nid, start, end, nd_size,
+					SMP_CACHE_BYTES);
+	if (node_data[nid] == NULL)
 		return;
-	nodedata_phys = __pa(node_data[nodeid]);
-	memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
-	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
-		nodedata_phys + pgdat_size - 1);
-	nid = early_pfn_to_nid(nodedata_phys >> PAGE_SHIFT);
-	if (nid != nodeid)
-		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
-
-	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->node_id = nodeid;
-	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
-
-	node_set_online(nodeid);
+	nd_pa = __pa(node_data[nid]);
+	memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
+	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n",
+	       nd_pa, nd_pa + nd_size - 1);
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != nid)
+		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
+	NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
+
+	node_set_online(nid);
 }
 
 /**
-- 
cgit v1.2.3


From acd26d611e60c1a7c2a14269ab99760f779121f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:51 +0200
Subject: x86-64, NUMA: simplify nodedata allocation

With top-down memblock allocation, the allocation range limits in
ealry_node_mem() can be simplified - try node-local first, then any
node but in any case don't allocate below DMA limit.

Remove early_node_mem() and implement simplified allocation directly
in setup_node_bootmem().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/numa_64.c | 53 +++++++++++++++++----------------------------------
 1 file changed, 17 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 8043d5e7f0d3..b4fd25e753cb 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -37,38 +37,6 @@ __initdata
 static int numa_distance_cnt;
 static u8 *numa_distance;
 
-static void * __init early_node_mem(int nodeid, unsigned long start,
-				    unsigned long end, unsigned long size,
-				    unsigned long align)
-{
-	unsigned long mem;
-
-	/*
-	 * put it on high as possible
-	 * something will go with NODE_DATA
-	 */
-	if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
-		start = MAX_DMA_PFN<<PAGE_SHIFT;
-	if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
-	    end > (MAX_DMA32_PFN<<PAGE_SHIFT))
-		start = MAX_DMA32_PFN<<PAGE_SHIFT;
-	mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align);
-	if (mem != MEMBLOCK_ERROR)
-		return __va(mem);
-
-	/* extend the search scope */
-	end = max_pfn_mapped << PAGE_SHIFT;
-	start = MAX_DMA_PFN << PAGE_SHIFT;
-	mem = memblock_find_in_range(start, end, size, align);
-	if (mem != MEMBLOCK_ERROR)
-		return __va(mem);
-
-	printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
-		       size, nodeid);
-
-	return NULL;
-}
-
 static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
 				     struct numa_meminfo *mi)
 {
@@ -130,6 +98,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 void __init
 setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 {
+	const u64 nd_low = (u64)MAX_DMA_PFN << PAGE_SHIFT;
+	const u64 nd_high = (u64)max_pfn_mapped << PAGE_SHIFT;
 	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	unsigned long nd_pa;
 	int tnid;
@@ -146,18 +116,29 @@ setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n",
 	       nid, start, end);
 
-	node_data[nid] = early_node_mem(nid, start, end, nd_size,
-					SMP_CACHE_BYTES);
-	if (node_data[nid] == NULL)
+	/*
+	 * Try to allocate node data on local node and then fall back to
+	 * all nodes.  Never allocate in DMA zone.
+	 */
+	nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+	if (nd_pa == MEMBLOCK_ERROR)
+		nd_pa = memblock_find_in_range(nd_low, nd_high,
+					       nd_size, SMP_CACHE_BYTES);
+	if (nd_pa == MEMBLOCK_ERROR) {
+		pr_err("Cannot find %lu bytes in node %d\n", nd_size, nid);
 		return;
-	nd_pa = __pa(node_data[nid]);
+	}
 	memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
+
+	/* report and initialize */
 	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n",
 	       nd_pa, nd_pa + nd_size - 1);
 	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
 	if (tnid != nid)
 		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
 
+	node_data[nid] = __va(nd_pa);
 	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
 	NODE_DATA(nid)->node_id = nid;
 	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
-- 
cgit v1.2.3


From c4b90c11992e61123071977c0e5556e59a70852c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86-32, NUMA: Automatically set apicid -> node in setup_local_APIC()

Some x86-32 NUMA implementations (NUMAQ) don't initialize apicid ->
node mapping using set_apicid_to_node() during NUMA init but implement
custom apic->x86_32_numa_cpu_node() instead.

This patch automatically initializes the default apic -> node mapping
table from apic->x86_32_numa_cpu_node() from setup_local_APIC() such
that the mapping table is in sync with the actual mapping.

As the table isn't used by custom implementations, this doesn't make
any difference at this point.  This is in preparation of unifying
numa_cpu_node() between x86-32 and 64.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/kernel/apic/apic.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2bc503bf9e99..a6cd02a92683 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1237,6 +1237,16 @@ void __cpuinit setup_local_APIC(void)
 	/* always use the value from LDR */
 	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
 		logical_smp_processor_id();
+
+	/*
+	 * Some NUMA implementations (NUMAQ) don't initialize apicid to
+	 * node mapping during NUMA init.  Now that logical apicid is
+	 * guaranteed to be known, give it another chance.  This is already
+	 * a bit too late - percpu allocation has already happened without
+	 * proper NUMA affinity.
+	 */
+	set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
+			   apic->x86_32_numa_cpu_node(cpu));
 #endif
 
 	/*
-- 
cgit v1.2.3


From 6bd262731bf7559bab8c749786e8652e2df1fb4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86, NUMA: Unify 32/64bit numa_cpu_node() implementation

Currently, the only meaningful user of apic->x86_32_numa_cpu_node() is
NUMAQ which returns valid mapping only after CPU is initialized during
SMP bringup; thus, the previous patch to set apicid -> node in
setup_local_APIC() makes __apicid_to_node[] always contain the correct
mapping whether custom apic->x86_32_numa_cpu_node() is used or not.

So, there is no reason to keep separate 32bit implementation.  We can
always consult __apicid_to_node[].  Move 64bit implementation from
numa_64.c to numa.c and remove 32bit implementation from numa_32.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/numa.h    | 10 ++++++++++
 arch/x86/include/asm/numa_32.h |  6 ------
 arch/x86/include/asm/numa_64.h |  3 ---
 arch/x86/mm/numa.c             |  9 +++++++++
 arch/x86/mm/numa_32.c          |  5 -----
 arch/x86/mm/numa_64.c          |  9 ---------
 6 files changed, 19 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index a50fc9f493b3..5982d418c358 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_NUMA_H
 #define _ASM_X86_NUMA_H
 
+#include <linux/nodemask.h>
+
 #include <asm/topology.h>
 #include <asm/apicdef.h>
 
@@ -22,10 +24,18 @@ static inline void set_apicid_to_node(int apicid, s16 node)
 {
 	__apicid_to_node[apicid] = node;
 }
+
+extern int __cpuinit numa_cpu_node(int cpu);
+
 #else	/* CONFIG_NUMA */
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 }
+
+static inline int numa_cpu_node(int cpu)
+{
+	return NUMA_NO_NODE;
+}
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index c6beed1ef103..242522fe9f8d 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -5,12 +5,6 @@ extern int numa_off;
 
 extern int pxm_to_nid(int pxm);
 
-#ifdef CONFIG_NUMA
-extern int __cpuinit numa_cpu_node(int cpu);
-#else	/* CONFIG_NUMA */
-static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
-#endif	/* CONFIG_NUMA */
-
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 344eb1790b46..12461ebec704 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
 
 extern nodemask_t numa_nodes_parsed __initdata;
 
-extern int __cpuinit numa_cpu_node(int cpu);
 extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
 extern void __init numa_set_distance(int from, int to, int distance);
 
@@ -35,8 +34,6 @@ extern void __init numa_set_distance(int from, int to, int distance);
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
 void numa_emu_cmdline(char *);
 #endif /* CONFIG_NUMA_EMU */
-#else
-static inline int numa_cpu_node(int cpu)		{ return NUMA_NO_NODE; }
 #endif
 
 #endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 745258dfc4dc..e9005c4ea29a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -32,6 +32,15 @@ s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
 
+int __cpuinit numa_cpu_node(int cpu)
+{
+	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+	if (apicid != BAD_APICID)
+		return __apicid_to_node[apicid];
+	return NUMA_NO_NODE;
+}
+
 cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index c757c0a3b529..e0d9716ab382 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -107,11 +107,6 @@ extern unsigned long highend_pfn, highstart_pfn;
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-int __cpuinit numa_cpu_node(int cpu)
-{
-	return apic->x86_32_numa_cpu_node(cpu);
-}
-
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index b4fd25e753cb..7f83adec0482 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -512,15 +512,6 @@ unsigned long __init numa_free_all_bootmem(void)
 	return pages;
 }
 
-int __cpuinit numa_cpu_node(int cpu)
-{
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
-	return NUMA_NO_NODE;
-}
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 int memory_add_physaddr_to_nid(u64 start)
 {
-- 
cgit v1.2.3


From 84914ed0ec6787d38e84b510f92ad4ca3a572fd8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86-32, NUMA: Make apic->x86_32_numa_cpu_node() optional

NUMAQ is the only meaningful user of this callback and
setup_local_APIC() the only callsite.  Stop torturing everyone else by
making the callback optional and removing all the boilerplate
implementations and assignments.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/apic.h      |  9 ++++++---
 arch/x86/kernel/apic/apic.c      | 20 +++-----------------
 arch/x86/kernel/apic/apic_noop.c |  9 ---------
 arch/x86/kernel/apic/bigsmp_32.c |  1 -
 arch/x86/kernel/apic/es7000_32.c |  7 -------
 arch/x86/kernel/apic/probe_32.c  |  1 -
 arch/x86/kernel/apic/summit_32.c |  1 -
 7 files changed, 9 insertions(+), 39 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 2b7d573be549..a0c46f061210 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -363,7 +363,12 @@ struct apic {
 	 */
 	int (*x86_32_early_logical_apicid)(int cpu);
 
-	/* determine CPU -> NUMA node mapping */
+	/*
+	 * Optional method called from setup_local_APIC() after logical
+	 * apicid is guaranteed to be known to initialize apicid -> node
+	 * mapping if NUMA initialization hasn't done so already.  Don't
+	 * add new users.
+	 */
 	int (*x86_32_numa_cpu_node)(int cpu);
 #endif
 };
@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 	return cpuid_apic >> index_msb;
 }
 
-extern int default_x86_32_numa_cpu_node(int cpu);
-
 #endif
 
 static inline unsigned int
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a6cd02a92683..0c67b4fc25b1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1245,8 +1245,9 @@ void __cpuinit setup_local_APIC(void)
 	 * a bit too late - percpu allocation has already happened without
 	 * proper NUMA affinity.
 	 */
-	set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
-			   apic->x86_32_numa_cpu_node(cpu));
+	if (apic->x86_32_numa_cpu_node)
+		set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
+				   apic->x86_32_numa_cpu_node(cpu));
 #endif
 
 	/*
@@ -2013,21 +2014,6 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-#ifdef CONFIG_X86_32
-int default_x86_32_numa_cpu_node(int cpu)
-{
-#ifdef CONFIG_NUMA
-	int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-
-	if (apicid != BAD_APICID)
-		return __apicid_to_node[apicid];
-	return NUMA_NO_NODE;
-#else
-	return 0;
-#endif
-}
-#endif
-
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index f1baa2dc087a..775b82bc655c 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
 }
 
-#ifdef CONFIG_X86_32
-static int noop_x86_32_numa_cpu_node(int cpu)
-{
-	/* we're always on node 0 */
-	return 0;
-}
-#endif
-
 struct apic apic_noop = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -195,6 +187,5 @@ struct apic apic_noop = {
 
 #ifdef CONFIG_X86_32
 	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
-	.x86_32_numa_cpu_node		= noop_x86_32_numa_cpu_node,
 #endif
 };
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 541a2e431659..d84ac5a584b5 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -253,5 +253,4 @@ struct apic apic_bigsmp = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 3e9de4854c5b..70533de5bd29 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void)
 		nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
 }
 
-static int es7000_numa_cpu_node(int cpu)
-{
-	return 0;
-}
-
 static int es7000_cpu_present_to_apicid(int mps_cpu)
 {
 	if (!mps_cpu)
@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
-	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
 
 struct apic __refdata apic_es7000 = {
@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= es7000_early_logical_apicid,
-	.x86_32_numa_cpu_node		= es7000_numa_cpu_node,
 };
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fc84c7b61108..6541e471fd91 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -172,7 +172,6 @@ struct apic apic_default = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
 
 extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index e4b8059b414a..35bcd7d995a1 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -551,5 +551,4 @@ struct apic apic_summit = {
 	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
 
 	.x86_32_early_logical_apicid	= summit_early_logical_apicid,
-	.x86_32_numa_cpu_node		= default_x86_32_numa_cpu_node,
 };
-- 
cgit v1.2.3


From 797390d8554b1e07aabea37d0140933b0412dba0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86-32, NUMA: use sparse_memory_present_with_active_regions()

Instead of calling memory_present() for each region from NUMA init,
call sparse_memory_present_with_active_regions() from paging_init()
similarly to x86-64.

For flat and numaq, this results in exactly the same memory_present()
calls.  For srat, if there are multiple memory chunks for a node,
after this change, memory_present() will be called separately for each
chunk instead of being called once to encompass the whole range, which
doesn't cause any harm and actually is the better behavior.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/kernel/apic/numaq_32.c | 2 --
 arch/x86/mm/init_32.c           | 1 +
 arch/x86/mm/numa_32.c           | 1 -
 arch/x86/mm/srat_32.c           | 8 +-------
 4 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 0aced70815f0..41b8b29d36f5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -91,8 +91,6 @@ static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
 
 	memblock_x86_register_active_regions(node, node_start_pfn[node],
 						node_end_pfn[node]);
-
-	memory_present(node, node_start_pfn[node], node_end_pfn[node]);
 }
 
 /*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 80088f994193..2cde0a34bed6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -716,6 +716,7 @@ void __init paging_init(void)
 	 * NOTE: at this point the bootmem allocator is fully available.
 	 */
 	olpc_dt_build_devicetree();
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 	zone_sizes_init();
 }
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index e0d9716ab382..f847fa1e02dc 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -119,7 +119,6 @@ int __init get_memcfg_numa_flat(void)
 	node_start_pfn[0] = 0;
 	node_end_pfn[0] = max_pfn;
 	memblock_x86_register_active_regions(0, 0, max_pfn);
-	memory_present(0, 0, max_pfn);
 
         /* Indicate there is one node available. */
 	nodes_clear(node_online_map);
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae20046a9e98..6b9bfd78bc35 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -209,7 +209,7 @@ static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_ch
 
 int __init get_memcfg_from_srat(void)
 {
-	int i, j, nid;
+	int i, j;
 
 	if (srat_disabled())
 		goto out_fail;
@@ -273,12 +273,6 @@ int __init get_memcfg_from_srat(void)
 	/* for out of order entries in SRAT */
 	sort_node_map();
 
-	for_each_online_node(nid) {
-		unsigned long start = node_start_pfn[nid];
-		unsigned long end = min(node_end_pfn[nid], max_pfn);
-
-		memory_present(nid, start, end);
-	}
 	return 1;
 out_fail:
 	printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
-- 
cgit v1.2.3


From 1201e10a092adc9c88a6ce5f27740cc5cd0d26e5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86, NUMA: trivial cleanups

* Kill no longer used struct bootnode.

* Kill dangling declaration of pxm_to_nid() in numa_32.h.

* Make setup_node_bootmem() static.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/acpi.h    | 2 --
 arch/x86/include/asm/amd_nb.h  | 1 -
 arch/x86/include/asm/numa_32.h | 2 --
 arch/x86/include/asm/numa_64.h | 7 -------
 arch/x86/mm/numa_64.c          | 2 +-
 5 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 12e0e7dd869c..416d865eae39 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { }
 
 #define ARCH_HAS_POWER_INIT	1
 
-struct bootnode;
-
 #ifdef CONFIG_ACPI_NUMA
 extern int acpi_numa;
 extern int x86_acpi_numa_init(void);
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 331682231bb4..67f87f257611 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range {
 
 extern const struct pci_device_id amd_nb_misc_ids[];
 extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
-struct bootnode;
 
 extern bool early_is_amd_nb(u32 value);
 extern int amd_cache_northbridges(void);
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index 242522fe9f8d..7e54b64623a9 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -3,8 +3,6 @@
 
 extern int numa_off;
 
-extern int pxm_to_nid(int pxm);
-
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 12461ebec704..794da6de6892 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -3,18 +3,11 @@
 
 #include <linux/nodemask.h>
 
-struct bootnode {
-	u64 start;
-	u64 end;
-};
-
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
 extern int numa_off;
 
 extern unsigned long numa_free_all_bootmem(void);
-extern void setup_node_bootmem(int nodeid, unsigned long start,
-			       unsigned long end);
 
 #ifdef CONFIG_NUMA
 /*
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7f83adec0482..287ae798935f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -95,7 +95,7 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 }
 
 /* Initialize bootmem allocator for a node */
-void __init
+static void __init
 setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 {
 	const u64 nd_low = (u64)MAX_DMA_PFN << PAGE_SHIFT;
-- 
cgit v1.2.3


From 7b2600f8ee0536bb738f3387cf2c30e8e334e149 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86, NUMA: rename srat_64.c to srat.c

Rename srat_64.c to srat.c.  This is to prepare for unification of
NUMA init paths between 32 and 64bit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/Makefile  |   5 +-
 arch/x86/mm/srat.c    | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/srat_64.c | 184 --------------------------------------------------
 3 files changed, 188 insertions(+), 185 deletions(-)
 create mode 100644 arch/x86/mm/srat.c
 delete mode 100644 arch/x86/mm/srat_64.c

(limited to 'arch')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 3e608edf9958..37e7043362a1 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -24,7 +24,10 @@ obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
 obj-$(CONFIG_AMD_NUMA)		+= amdtopology_64.o
-obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
+ifeq ($(CONFIG_ACPI_NUMA),y)
+obj-$(CONFIG_X86_64)		+= srat.o
+obj-$(CONFIG_X86_32)		+= srat_32.o
+endif
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
new file mode 100644
index 000000000000..9994d2cacf72
--- /dev/null
+++ b/arch/x86/mm/srat.c
@@ -0,0 +1,184 @@
+/*
+ * ACPI 3.0 based NUMA setup
+ * Copyright 2004 Andi Kleen, SuSE Labs.
+ *
+ * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
+ *
+ * Called from acpi_numa_init while reading the SRAT and SLIT tables.
+ * Assumes all memory regions belonging to a single proximity domain
+ * are in one chunk. Holes between them will be included in the node.
+ */
+
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/mmzone.h>
+#include <linux/bitmap.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <asm/proto.h>
+#include <asm/numa.h>
+#include <asm/e820.h>
+#include <asm/apic.h>
+#include <asm/uv/uv.h>
+
+int acpi_numa __initdata;
+
+static __init int setup_node(int pxm)
+{
+	return acpi_map_pxm_to_node(pxm);
+}
+
+static __init void bad_srat(void)
+{
+	printk(KERN_ERR "SRAT: SRAT not used.\n");
+	acpi_numa = -1;
+}
+
+static __init inline int srat_disabled(void)
+{
+	return acpi_numa < 0;
+}
+
+/* Callback for SLIT parsing */
+void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
+{
+	int i, j;
+
+	for (i = 0; i < slit->locality_count; i++)
+		for (j = 0; j < slit->locality_count; j++)
+			numa_set_distance(pxm_to_node(i), pxm_to_node(j),
+				slit->entry[slit->locality_count * i + j]);
+}
+
+/* Callback for Proximity Domain -> x2APIC mapping */
+void __init
+acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
+{
+	int pxm, node;
+	int apic_id;
+
+	if (srat_disabled())
+		return;
+	if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
+		bad_srat();
+		return;
+	}
+	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
+		return;
+	pxm = pa->proximity_domain;
+	node = setup_node(pxm);
+	if (node < 0) {
+		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
+		bad_srat();
+		return;
+	}
+
+	apic_id = pa->apic_id;
+	if (apic_id >= MAX_LOCAL_APIC) {
+		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+		return;
+	}
+	set_apicid_to_node(apic_id, node);
+	node_set(node, numa_nodes_parsed);
+	acpi_numa = 1;
+	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
+	       pxm, apic_id, node);
+}
+
+/* Callback for Proximity Domain -> LAPIC mapping */
+void __init
+acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
+{
+	int pxm, node;
+	int apic_id;
+
+	if (srat_disabled())
+		return;
+	if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
+		bad_srat();
+		return;
+	}
+	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
+		return;
+	pxm = pa->proximity_domain_lo;
+	node = setup_node(pxm);
+	if (node < 0) {
+		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
+		bad_srat();
+		return;
+	}
+
+	if (get_uv_system_type() >= UV_X2APIC)
+		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
+	else
+		apic_id = pa->apic_id;
+
+	if (apic_id >= MAX_LOCAL_APIC) {
+		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+		return;
+	}
+
+	set_apicid_to_node(apic_id, node);
+	node_set(node, numa_nodes_parsed);
+	acpi_numa = 1;
+	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
+	       pxm, apic_id, node);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline int save_add_info(void) {return 1;}
+#else
+static inline int save_add_info(void) {return 0;}
+#endif
+
+/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
+void __init
+acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
+{
+	unsigned long start, end;
+	int node, pxm;
+
+	if (srat_disabled())
+		return;
+	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
+		bad_srat();
+		return;
+	}
+	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
+		return;
+
+	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
+		return;
+	start = ma->base_address;
+	end = start + ma->length;
+	pxm = ma->proximity_domain;
+	node = setup_node(pxm);
+	if (node < 0) {
+		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
+		bad_srat();
+		return;
+	}
+
+	if (numa_add_memblk(node, start, end) < 0) {
+		bad_srat();
+		return;
+	}
+
+	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
+	       start, end);
+}
+
+void __init acpi_numa_arch_fixup(void) {}
+
+int __init x86_acpi_numa_init(void)
+{
+	int ret;
+
+	ret = acpi_numa_init();
+	if (ret < 0)
+		return ret;
+	return srat_disabled() ? -EINVAL : 0;
+}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
deleted file mode 100644
index 9994d2cacf72..000000000000
--- a/arch/x86/mm/srat_64.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * ACPI 3.0 based NUMA setup
- * Copyright 2004 Andi Kleen, SuSE Labs.
- *
- * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
- *
- * Called from acpi_numa_init while reading the SRAT and SLIT tables.
- * Assumes all memory regions belonging to a single proximity domain
- * are in one chunk. Holes between them will be included in the node.
- */
-
-#include <linux/kernel.h>
-#include <linux/acpi.h>
-#include <linux/mmzone.h>
-#include <linux/bitmap.h>
-#include <linux/module.h>
-#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mm.h>
-#include <asm/proto.h>
-#include <asm/numa.h>
-#include <asm/e820.h>
-#include <asm/apic.h>
-#include <asm/uv/uv.h>
-
-int acpi_numa __initdata;
-
-static __init int setup_node(int pxm)
-{
-	return acpi_map_pxm_to_node(pxm);
-}
-
-static __init void bad_srat(void)
-{
-	printk(KERN_ERR "SRAT: SRAT not used.\n");
-	acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return acpi_numa < 0;
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-	int i, j;
-
-	for (i = 0; i < slit->locality_count; i++)
-		for (j = 0; j < slit->locality_count; j++)
-			numa_set_distance(pxm_to_node(i), pxm_to_node(j),
-				slit->entry[slit->locality_count * i + j]);
-}
-
-/* Callback for Proximity Domain -> x2APIC mapping */
-void __init
-acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
-{
-	int pxm, node;
-	int apic_id;
-
-	if (srat_disabled())
-		return;
-	if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
-		bad_srat();
-		return;
-	}
-	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
-		return;
-	pxm = pa->proximity_domain;
-	node = setup_node(pxm);
-	if (node < 0) {
-		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
-		bad_srat();
-		return;
-	}
-
-	apic_id = pa->apic_id;
-	if (apic_id >= MAX_LOCAL_APIC) {
-		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
-		return;
-	}
-	set_apicid_to_node(apic_id, node);
-	node_set(node, numa_nodes_parsed);
-	acpi_numa = 1;
-	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
-	       pxm, apic_id, node);
-}
-
-/* Callback for Proximity Domain -> LAPIC mapping */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
-{
-	int pxm, node;
-	int apic_id;
-
-	if (srat_disabled())
-		return;
-	if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
-		bad_srat();
-		return;
-	}
-	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
-		return;
-	pxm = pa->proximity_domain_lo;
-	node = setup_node(pxm);
-	if (node < 0) {
-		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
-		bad_srat();
-		return;
-	}
-
-	if (get_uv_system_type() >= UV_X2APIC)
-		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
-	else
-		apic_id = pa->apic_id;
-
-	if (apic_id >= MAX_LOCAL_APIC) {
-		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
-		return;
-	}
-
-	set_apicid_to_node(apic_id, node);
-	node_set(node, numa_nodes_parsed);
-	acpi_numa = 1;
-	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
-	       pxm, apic_id, node);
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int save_add_info(void) {return 1;}
-#else
-static inline int save_add_info(void) {return 0;}
-#endif
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
-{
-	unsigned long start, end;
-	int node, pxm;
-
-	if (srat_disabled())
-		return;
-	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
-		bad_srat();
-		return;
-	}
-	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		return;
-
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
-		return;
-	start = ma->base_address;
-	end = start + ma->length;
-	pxm = ma->proximity_domain;
-	node = setup_node(pxm);
-	if (node < 0) {
-		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
-		bad_srat();
-		return;
-	}
-
-	if (numa_add_memblk(node, start, end) < 0) {
-		bad_srat();
-		return;
-	}
-
-	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
-	       start, end);
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
-int __init x86_acpi_numa_init(void)
-{
-	int ret;
-
-	ret = acpi_numa_init();
-	if (ret < 0)
-		return ret;
-	return srat_disabled() ? -EINVAL : 0;
-}
-- 
cgit v1.2.3


From eca9ad313293c41021bfcf23e985a14f6991a121 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:52 +0200
Subject: x86, NUMA: make srat.c 32bit safe

Make srat.c 32bit safe by removing the assumption that unsigned long
is 64bit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/srat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 9994d2cacf72..81dbfdeb080d 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -138,7 +138,7 @@ static inline int save_add_info(void) {return 0;}
 void __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
-	unsigned long start, end;
+	u64 start, end;
 	int node, pxm;
 
 	if (srat_disabled())
@@ -167,7 +167,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		return;
 	}
 
-	printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
+	printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
 	       start, end);
 }
 
-- 
cgit v1.2.3


From daf4f480ae24270bac06db4293908d36b4834e21 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86-32, NUMA: Move get_memcfg_numa() into numa_32.c

There's no reason get_memcfg_numa() to be implemented inline in
mmzone_32.h.  Move it to numa_32.c and also make
get_memcfg_numa_flag() static.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/mmzone_32.h | 18 ------------------
 arch/x86/mm/numa_32.c            | 11 ++++++++++-
 2 files changed, 10 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 91df7c51806c..73e5745aef34 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -16,28 +16,10 @@ extern struct pglist_data *node_data[];
 /* summit or generic arch */
 #include <asm/srat.h>
 
-extern int get_memcfg_numa_flat(void);
-/*
- * This allows any one NUMA architecture to be compiled
- * for, and still fall back to the flat function if it
- * fails.
- */
-static inline void get_memcfg_numa(void)
-{
-
-	if (get_memcfg_numaq())
-		return;
-	if (get_memcfg_from_srat())
-		return;
-	get_memcfg_numa_flat();
-}
-
 extern void resume_map_numa_kva(pgd_t *pgd);
 
 #else /* !CONFIG_NUMA */
 
-#define get_memcfg_numa get_memcfg_numa_flat
-
 static inline void resume_map_numa_kva(pgd_t *pgd) {}
 
 #endif /* CONFIG_NUMA */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index f847fa1e02dc..abf1247a4c32 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -112,7 +112,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
  *        a single node with all available processors in it with a flat
  *        memory map.
  */
-int __init get_memcfg_numa_flat(void)
+static int __init get_memcfg_numa_flat(void)
 {
 	printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
 
@@ -332,6 +332,15 @@ static __init void init_alloc_remap(int nid)
 	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 }
 
+static void get_memcfg_numa(void)
+{
+	if (get_memcfg_numaq())
+		return;
+	if (get_memcfg_from_srat())
+		return;
+	get_memcfg_numa_flat();
+}
+
 void __init initmem_init(void)
 {
 	int nid;
-- 
cgit v1.2.3


From e6df595b37c7c033ef7400b4fdd382a2dc4f4131 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86, NUMA: Move numa_nodes_parsed to numa.[hc]

Move numa_nodes_parsed from numa_64.[hc] to numa.[hc] to prepare for
NUMA init path unification.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/numa.h    | 1 +
 arch/x86/include/asm/numa_64.h | 4 ----
 arch/x86/mm/numa.c             | 1 +
 arch/x86/mm/numa_64.c          | 2 --
 4 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 5982d418c358..c24306cd1504 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -19,6 +19,7 @@
  * numa_cpu_node().
  */
 extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+extern nodemask_t numa_nodes_parsed __initdata;
 
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 794da6de6892..e84113f8fff3 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_X86_NUMA_64_H
 #define _ASM_X86_NUMA_64_H
 
-#include <linux/nodemask.h>
-
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
 extern int numa_off;
@@ -17,8 +15,6 @@ extern unsigned long numa_free_all_bootmem(void);
  */
 #define NODE_MIN_SIZE (4*1024*1024)
 
-extern nodemask_t numa_nodes_parsed __initdata;
-
 extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
 extern void __init numa_set_distance(int from, int to, int distance);
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index e9005c4ea29a..cce174109ca9 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -6,6 +6,7 @@
 #include <asm/acpi.h>
 
 int __initdata numa_off;
+nodemask_t numa_nodes_parsed __initdata;
 
 static __init int numa_setup(char *opt)
 {
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 287ae798935f..70bd8221f928 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,8 +26,6 @@
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
-nodemask_t numa_nodes_parsed __initdata;
-
 static struct numa_meminfo numa_meminfo
 #ifndef CONFIG_MEMORY_HOTPLUG
 __initdata
-- 
cgit v1.2.3


From b0d310801a4c1f95b44357e4ebc22a9903e3bf3d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86-32, NUMA: implement temporary NUMA init shims

To help transition to common NUMA init, implement temporary 32bit
shims for numa_add_memblk() and numa_set_distance().
numa_add_memblk() registers the memblk and adjusts
node_start/end_pfn[].  numa_set_distance() is noop.

These shims will allow using 64bit NUMA init functions on 32bit and
gradual transition to common NUMA init path.

For detailed description, please read description of commits which
make use of the shim functions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/numa.h    |  3 +++
 arch/x86/include/asm/numa_64.h |  3 ---
 arch/x86/mm/numa_32.c          | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index c24306cd1504..db449c7d89b3 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -21,6 +21,9 @@
 extern s16 __apicid_to_node[MAX_LOCAL_APIC];
 extern nodemask_t numa_nodes_parsed __initdata;
 
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern void __init numa_set_distance(int from, int to, int distance);
+
 static inline void set_apicid_to_node(int apicid, s16 node)
 {
 	__apicid_to_node[apicid] = node;
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index e84113f8fff3..506dd050ff30 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -15,9 +15,6 @@ extern unsigned long numa_free_all_bootmem(void);
  */
 #define NODE_MIN_SIZE (4*1024*1024)
 
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
 #ifdef CONFIG_NUMA_EMU
 #define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
 #define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index abf1247a4c32..d0369a56f843 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -414,3 +414,37 @@ int memory_add_physaddr_to_nid(u64 addr)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
+/* temporary shim, will go away soon */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = end >> PAGE_SHIFT;
+
+	printk(KERN_DEBUG "nid %d start_pfn %08lx end_pfn %08lx\n",
+	       nid, start_pfn, end_pfn);
+
+	if (start >= (u64)max_pfn << PAGE_SHIFT) {
+		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
+		       start_pfn, end_pfn);
+		return 0;
+	}
+
+	node_set_online(nid);
+	memblock_x86_register_active_regions(nid, start_pfn,
+					     min(end_pfn, max_pfn));
+
+	if (!node_has_online_mem(nid)) {
+		node_start_pfn[nid] = start_pfn;
+		node_end_pfn[nid] = end_pfn;
+	} else {
+		node_start_pfn[nid] = min(node_start_pfn[nid], start_pfn);
+		node_end_pfn[nid] = max(node_end_pfn[nid], end_pfn);
+	}
+	return 0;
+}
+
+/* temporary shim, will go away soon */
+void __init numa_set_distance(int from, int to, int distance)
+{
+	/* nada */
+}
-- 
cgit v1.2.3


From 5acd91ab837c9d066af7345aea6462dc55695db7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86-32, NUMA: Replace srat_32.c with srat.c

SRAT support implementation in srat_32.c and srat.c are generally
similar; however, there are some differences.

First of all, 64bit implementation supports more types of SRAT
entries.  64bit supports x2apic, affinity, memory and SLIT.  32bit
only supports processor and memory.

Most other differences stem from different initialization protocols
employed by 64bit and 32bit NUMA init paths.

On 64bit,

* Mappings among PXM, node and apicid are directly done in each SRAT
  entry callback.

* Memory affinity information is passed to numa_add_memblk() which
  takes care of all interfacing with NUMA init.

* Doesn't directly initialize NUMA configurations.  All the
  information is recorded in numa_nodes_parsed and memblks.

On 32bit,

* Checks numa_off.

* Things go through one more level of indirection via private tables
  but eventually end up initializing the same mappings.

* node_start/end_pfn[] are initialized and
  memblock_x86_register_active_regions() is called for each memory
  chunk.

* node_set_online() is called for each online node.

* sort_node_map() is called.

There are also other minor differences in sanity checking and messages
but taking 64bit version should be good enough.

This patch drops the 32bit specific implementation and makes the 64bit
implementation common for both 32 and 64bit.

The init protocol differences are dealt with in two places - the
numa_add_memblk() shim added in the previous patch and new temporary
numa_32.c:get_memcfg_from_srat() which wraps invocation of
x86_acpi_numa_init().

The shim numa_add_memblk() handles the folowings.

* node_start/end_pfn[] initialization.

* node_set_online() for memory nodes.

* Invocation of memblock_x86_register_active_regions().

The shim get_memcfg_from_srat() handles the followings.

* numa_off check.

* node_set_online() for CPU nodes.

* sort_node_map() invocation.

* Clearing of numa_nodes_parsed and active_ranges on failure.

The shims are temporary and will be removed as the generic NUMA init
path in 32bit is replaced with 64bit one.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/mmzone_32.h |   2 -
 arch/x86/include/asm/srat.h      |  39 ------
 arch/x86/mm/Makefile             |   5 +-
 arch/x86/mm/numa_32.c            |  23 ++++
 arch/x86/mm/srat_32.c            | 281 ---------------------------------------
 5 files changed, 24 insertions(+), 326 deletions(-)
 delete mode 100644 arch/x86/include/asm/srat.h
 delete mode 100644 arch/x86/mm/srat_32.c

(limited to 'arch')

diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 73e5745aef34..5e83a416eca8 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -13,8 +13,6 @@ extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)	(node_data[nid])
 
 #include <asm/numaq.h>
-/* summit or generic arch */
-#include <asm/srat.h>
 
 extern void resume_map_numa_kva(pgd_t *pgd);
 
diff --git a/arch/x86/include/asm/srat.h b/arch/x86/include/asm/srat.h
deleted file mode 100644
index b508d639d1a7..000000000000
--- a/arch/x86/include/asm/srat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-
-#ifndef _ASM_X86_SRAT_H
-#define _ASM_X86_SRAT_H
-
-#ifdef CONFIG_ACPI_NUMA
-extern int get_memcfg_from_srat(void);
-#else
-static inline int get_memcfg_from_srat(void)
-{
-	return 0;
-}
-#endif
-
-#endif /* _ASM_X86_SRAT_H */
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 37e7043362a1..62997be33072 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -24,10 +24,7 @@ obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
 obj-$(CONFIG_AMD_NUMA)		+= amdtopology_64.o
-ifeq ($(CONFIG_ACPI_NUMA),y)
-obj-$(CONFIG_X86_64)		+= srat.o
-obj-$(CONFIG_X86_32)		+= srat_32.o
-endif
+obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_HAVE_MEMBLOCK)		+= memblock.o
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d0369a56f843..8641239a0667 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -332,6 +332,29 @@ static __init void init_alloc_remap(int nid)
 	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 }
 
+static int get_memcfg_from_srat(void)
+{
+#ifdef CONFIG_ACPI_NUMA
+	int nid;
+
+	if (numa_off)
+		return 0;
+
+	if (x86_acpi_numa_init() < 0) {
+		nodes_clear(numa_nodes_parsed);
+		remove_all_active_ranges();
+		return 0;
+	}
+
+	for_each_node_mask(nid, numa_nodes_parsed)
+		node_set_online(nid);
+	sort_node_map();
+	return 1;
+#else
+	return 0;
+#endif
+}
+
 static void get_memcfg_numa(void)
 {
 	if (get_memcfg_numaq())
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
deleted file mode 100644
index 6b9bfd78bc35..000000000000
--- a/arch/x86/mm/srat_32.c
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Some of the code in this file has been gleaned from the 64 bit 
- * discontigmem support code base.
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.          
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to Pat Gaughen <gone@us.ibm.com>
- */
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/acpi.h>
-#include <linux/nodemask.h>
-#include <asm/srat.h>
-#include <asm/topology.h>
-#include <asm/smp.h>
-#include <asm/e820.h>
-
-/*
- * proximity macros and definitions
- */
-#define NODE_ARRAY_INDEX(x)	((x) / 8)	/* 8 bits/char */
-#define NODE_ARRAY_OFFSET(x)	((x) % 8)	/* 8 bits/char */
-#define BMAP_SET(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
-#define BMAP_TEST(bmap, bit)	((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
-/* bitmap length; _PXM is at most 255 */
-#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) 
-static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN];	/* bitmap of proximity domains */
-
-#define MAX_CHUNKS_PER_NODE	3
-#define MAXCHUNKS		(MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
-struct node_memory_chunk_s {
-	unsigned long	start_pfn;
-	unsigned long	end_pfn;
-	u8	pxm;		// proximity domain of node
-	u8	nid;		// which cnode contains this chunk?
-	u8	bank;		// which mem bank on this node
-};
-static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
-
-static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
-
-int acpi_numa __initdata;
-
-static __init void bad_srat(void)
-{
-        printk(KERN_ERR "SRAT: SRAT not used.\n");
-        acpi_numa = -1;
-	num_memory_chunks = 0;
-}
-
-static __init inline int srat_disabled(void)
-{
-	return numa_off || acpi_numa < 0;
-}
-
-/* Identify CPU proximity domains */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
-{
-	if (srat_disabled())
-		return;
-	if (cpu_affinity->header.length !=
-	     sizeof(struct acpi_srat_cpu_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
-		return;		/* empty entry */
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
-
-	/* don't need to check apic_id here, because it is always 8 bits */
-	apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
-
-	printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
-		cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
-}
-
-/*
- * Identify memory proximity domains and hot-remove capabilities.
- * Fill node memory chunk list structure.
- */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
-{
-	unsigned long long paddr, size;
-	unsigned long start_pfn, end_pfn;
-	u8 pxm;
-	struct node_memory_chunk_s *p, *q, *pend;
-
-	if (srat_disabled())
-		return;
-	if (memory_affinity->header.length !=
-	     sizeof(struct acpi_srat_mem_affinity)) {
-		bad_srat();
-		return;
-	}
-
-	if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
-		return;		/* empty entry */
-
-	pxm = memory_affinity->proximity_domain & 0xff;
-
-	/* mark this node as "seen" in node bitmap */
-	BMAP_SET(pxm_bitmap, pxm);
-
-	/* calculate info for memory chunk structure */
-	paddr = memory_affinity->base_address;
-	size = memory_affinity->length;
-
-	start_pfn = paddr >> PAGE_SHIFT;
-	end_pfn = (paddr + size) >> PAGE_SHIFT;
-
-
-	if (num_memory_chunks >= MAXCHUNKS) {
-		printk(KERN_WARNING "Too many mem chunks in SRAT."
-			" Ignoring %lld MBytes at %llx\n",
-			size/(1024*1024), paddr);
-		return;
-	}
-
-	/* Insertion sort based on base address */
-	pend = &node_memory_chunk[num_memory_chunks];
-	for (p = &node_memory_chunk[0]; p < pend; p++) {
-		if (start_pfn < p->start_pfn)
-			break;
-	}
-	if (p < pend) {
-		for (q = pend; q >= p; q--)
-			*(q + 1) = *q;
-	}
-	p->start_pfn = start_pfn;
-	p->end_pfn = end_pfn;
-	p->pxm = pxm;
-
-	num_memory_chunks++;
-
-	printk(KERN_DEBUG "Memory range %08lx to %08lx"
-			  " in proximity domain %02x %s\n",
-		start_pfn, end_pfn,
-		pxm,
-		((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
-		 "enabled and removable" : "enabled" ) );
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
-}
-
-void acpi_numa_arch_fixup(void)
-{
-}
-/*
- * The SRAT table always lists ascending addresses, so can always
- * assume that the first "start" address that you see is the real
- * start of the node, and that the current "end" address is after
- * the previous one.
- */
-static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
-{
-	/*
-	 * Only add present memory as told by the e820.
-	 * There is no guarantee from the SRAT that the memory it
-	 * enumerates is present at boot time because it represents
-	 * *possible* memory hotplug areas the same as normal RAM.
-	 */
-	if (memory_chunk->start_pfn >= max_pfn) {
-		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
-			memory_chunk->start_pfn, memory_chunk->end_pfn);
-		return -1;
-	}
-	if (memory_chunk->nid != nid)
-		return -1;
-
-	if (!node_has_online_mem(nid))
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_start_pfn[nid] > memory_chunk->start_pfn)
-		node_start_pfn[nid] = memory_chunk->start_pfn;
-
-	if (node_end_pfn[nid] < memory_chunk->end_pfn)
-		node_end_pfn[nid] = memory_chunk->end_pfn;
-
-	return 0;
-}
-
-int __init get_memcfg_from_srat(void)
-{
-	int i, j;
-
-	if (srat_disabled())
-		goto out_fail;
-
-	if (acpi_numa_init() < 0)
-		goto out_fail;
-
-	if (num_memory_chunks == 0) {
-		printk(KERN_DEBUG
-			 "could not find any ACPI SRAT memory areas.\n");
-		goto out_fail;
-	}
-
-	/* Calculate total number of nodes in system from PXM bitmap and create
-	 * a set of sequential node IDs starting at zero.  (ACPI doesn't seem
-	 * to specify the range of _PXM values.)
-	 */
-	/*
-	 * MCD - we no longer HAVE to number nodes sequentially.  PXM domain
-	 * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
-	 * 32, so we will continue numbering them in this manner until MAX_NUMNODES
-	 * approaches MAX_PXM_DOMAINS for i386.
-	 */
-	nodes_clear(node_online_map);
-	for (i = 0; i < MAX_PXM_DOMAINS; i++) {
-		if (BMAP_TEST(pxm_bitmap, i)) {
-			int nid = acpi_map_pxm_to_node(i);
-			node_set_online(nid);
-		}
-	}
-	BUG_ON(num_online_nodes() == 0);
-
-	/* set cnode id in memory chunk structure */
-	for (i = 0; i < num_memory_chunks; i++)
-		node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
-
-	printk(KERN_DEBUG "pxm bitmap: ");
-	for (i = 0; i < sizeof(pxm_bitmap); i++) {
-		printk(KERN_CONT "%02x ", pxm_bitmap[i]);
-	}
-	printk(KERN_CONT "\n");
-	printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
-			 num_online_nodes());
-	printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
-			 num_memory_chunks);
-
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
-
-	for (j = 0; j < num_memory_chunks; j++){
-		struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
-		printk(KERN_DEBUG
-			"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
-		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
-		if (node_read_chunk(chunk->nid, chunk))
-			continue;
-
-		memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn,
-					     min(chunk->end_pfn, max_pfn));
-	}
-	/* for out of order entries in SRAT */
-	sort_node_map();
-
-	return 1;
-out_fail:
-	printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
-			" table\n");
-	return 0;
-}
-- 
cgit v1.2.3


From 299a180aec6a8ee3069cf0fe90d722ac20c1f837 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86-32, NUMA: Update numaq to use new NUMA init protocol

Update numaq such that it calls numa_add_memblk() and sets
numa_nodes_parsed instead of directly diddling with NUMA states.  The
original get_memcfg_numaq() is renamed to numaq_numa_init() and new
get_memcfg_numaq() is created in numa_32.c.

The shim numa_add_memblk() implementation handles node_start/end_pfn[]
and node_set_online() for nodes with memory.  The new
get_memcfg_numaq() exactly the same with get_memcfg_from_srat() other
than calling the numaq init function.  Things get_memcfgs_numaq() do
are not strictly necessary for numaq but added for consistency and to
help unifying NUMA init handling.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/numaq.h    |  7 +------
 arch/x86/kernel/apic/numaq_32.c | 28 ++++++++++------------------
 arch/x86/mm/numa_32.c           | 23 +++++++++++++++++++++++
 3 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 37c516545ec8..c3b3c322fd87 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -29,7 +29,7 @@
 #ifdef CONFIG_X86_NUMAQ
 
 extern int found_numaq;
-extern int get_memcfg_numaq(void);
+extern int numaq_numa_init(void);
 extern int pci_numaq_init(void);
 
 extern void *xquad_portio;
@@ -166,11 +166,6 @@ struct sys_cfg_data {
 
 void numaq_tsc_disable(void);
 
-#else
-static inline int get_memcfg_numaq(void)
-{
-	return 0;
-}
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* _ASM_X86_NUMAQ_H */
 
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 41b8b29d36f5..30f13319e24b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -48,8 +48,6 @@
 #include <asm/e820.h>
 #include <asm/ipi.h>
 
-#define	MB_TO_PAGES(addr)		((addr) << (20 - PAGE_SHIFT))
-
 int found_numaq;
 
 /*
@@ -79,25 +77,20 @@ int					quad_local_to_mp_bus_id[NR_CPUS/4][4];
 static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
 {
 	struct eachquadmem *eq = scd->eq + node;
+	u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
+	u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
+	int ret;
 
-	node_set_online(node);
-
-	/* Convert to pages */
-	node_start_pfn[node] =
-		 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
-
-	node_end_pfn[node] =
-		 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
-
-	memblock_x86_register_active_regions(node, node_start_pfn[node],
-						node_end_pfn[node]);
+	node_set(node, numa_nodes_parsed);
+	ret = numa_add_memblk(node, start, end);
+	BUG_ON(ret < 0);
 }
 
 /*
  * Function: smp_dump_qct()
  *
  * Description: gets memory layout from the quad config table.  This
- * function also updates node_online_map with the nodes (quads) present.
+ * function also updates numa_nodes_parsed with the nodes (quads) present.
  */
 static void __init smp_dump_qct(void)
 {
@@ -106,7 +99,6 @@ static void __init smp_dump_qct(void)
 
 	scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
 
-	nodes_clear(node_online_map);
 	for_each_node(node) {
 		if (scd->quads_present31_0 & (1 << node))
 			numaq_register_node(node, scd);
@@ -276,14 +268,14 @@ static __init void early_check_numaq(void)
 	}
 }
 
-int __init get_memcfg_numaq(void)
+int __init numaq_numa_init(void)
 {
 	early_check_numaq();
 	if (!found_numaq)
-		return 0;
+		return -ENOENT;
 	smp_dump_qct();
 
-	return 1;
+	return 0;
 }
 
 #define NUMAQ_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8641239a0667..14135e52cef5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -332,6 +332,29 @@ static __init void init_alloc_remap(int nid)
 	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 }
 
+static int get_memcfg_numaq(void)
+{
+#ifdef CONFIG_X86_NUMAQ
+	int nid;
+
+	if (numa_off)
+		return 0;
+
+	if (numaq_numa_init() < 0) {
+		nodes_clear(numa_nodes_parsed);
+		remove_all_active_ranges();
+		return 0;
+	}
+
+	for_each_node_mask(nid, numa_nodes_parsed)
+		node_set_online(nid);
+	sort_node_map();
+	return 1;
+#else
+	return 0;
+#endif
+}
+
 static int get_memcfg_from_srat(void)
 {
 #ifdef CONFIG_ACPI_NUMA
-- 
cgit v1.2.3


From a4106eae650a4d5d30fcdd36d998edfa5ccb0ec4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86, NUMA: Move NUMA init logic from numa_64.c to numa.c

Move the generic 64bit NUMA init machinery from numa_64.c to numa.c.

* node_data[], numa_mem_info and numa_distance
* numa_add_memblk[_to](), numa_remove_memblk[_from]()
* numa_set_distance() and friends
* numa_init() and all the numa_meminfo handling helpers called from it
* dummy_numa_init()
* memory_add_physaddr_to_nid()

A new function x86_numa_init() is added and the content of
numa_64.c::initmem_init() is moved into it.  initmem_init() now simply
calls x86_numa_init().

Constants and numa_off declaration are moved from numa_{32|64}.h to
numa.h.

This is code reorganization and doesn't involve any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/numa.h    |  16 ++
 arch/x86/include/asm/numa_32.h |   2 -
 arch/x86/include/asm/numa_64.h |  19 --
 arch/x86/mm/numa.c             | 523 ++++++++++++++++++++++++++++++++++++++++-
 arch/x86/mm/numa_64.c          | 503 +--------------------------------------
 arch/x86/mm/numa_internal.h    |   2 +
 6 files changed, 539 insertions(+), 526 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index db449c7d89b3..74540865dd83 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -9,6 +9,16 @@
 #ifdef CONFIG_NUMA
 
 #define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
+#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
+
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
+extern int numa_off;
 
 /*
  * __apicid_to_node[] stores the raw mapping between physical apicid and
@@ -68,4 +78,10 @@ static inline void numa_remove_cpu(int cpu)		{ }
 void debug_cpumask_set_cpu(int cpu, int node, bool enable);
 #endif
 
+#ifdef CONFIG_NUMA_EMU
+#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
+#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
+void numa_emu_cmdline(char *);
+#endif /* CONFIG_NUMA_EMU */
+
 #endif	/* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index 7e54b64623a9..e7d6b8254742 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_X86_NUMA_32_H
 #define _ASM_X86_NUMA_32_H
 
-extern int numa_off;
-
 #ifdef CONFIG_HIGHMEM
 extern void set_highmem_pages_init(void);
 #else
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 506dd050ff30..0c05f7ae46e8 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -1,25 +1,6 @@
 #ifndef _ASM_X86_NUMA_64_H
 #define _ASM_X86_NUMA_64_H
 
-#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-
-extern int numa_off;
-
 extern unsigned long numa_free_all_bootmem(void);
 
-#ifdef CONFIG_NUMA
-/*
- * Too small node sizes may confuse the VM badly. Usually they
- * result from BIOS bugs. So dont recognize nodes as standalone
- * NUMA entities that have less than this amount of RAM listed:
- */
-#define NODE_MIN_SIZE (4*1024*1024)
-
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE	((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK	(~(FAKE_NODE_MIN_SIZE - 1UL))
-void numa_emu_cmdline(char *);
-#endif /* CONFIG_NUMA_EMU */
-#endif
-
 #endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index cce174109ca9..ed1daba54906 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -1,13 +1,42 @@
 /* Common code for 32 and 64-bit NUMA */
-#include <linux/topology.h>
-#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/init.h>
 #include <linux/bootmem.h>
-#include <asm/numa.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/sched.h>
+#include <linux/topology.h>
+
+#include <asm/e820.h>
+#include <asm/proto.h>
+#include <asm/dma.h>
 #include <asm/acpi.h>
+#include <asm/amd_nb.h>
+
+#include "numa_internal.h"
 
 int __initdata numa_off;
 nodemask_t numa_nodes_parsed __initdata;
 
+#ifdef CONFIG_X86_64
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+
+static struct numa_meminfo numa_meminfo
+#ifndef CONFIG_MEMORY_HOTPLUG
+__initdata
+#endif
+;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
+#endif
+
 static __init int numa_setup(char *opt)
 {
 	if (!opt)
@@ -105,6 +134,392 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
+#ifdef CONFIG_X86_64
+static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
+				     struct numa_meminfo *mi)
+{
+	/* ignore zero length blks */
+	if (start == end)
+		return 0;
+
+	/* whine about and ignore invalid blks */
+	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
+		pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
+			   nid, start, end);
+		return 0;
+	}
+
+	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
+		pr_err("NUMA: too many memblk ranges\n");
+		return -EINVAL;
+	}
+
+	mi->blk[mi->nr_blks].start = start;
+	mi->blk[mi->nr_blks].end = end;
+	mi->blk[mi->nr_blks].nid = nid;
+	mi->nr_blks++;
+	return 0;
+}
+
+/**
+ * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
+ * @idx: Index of memblk to remove
+ * @mi: numa_meminfo to remove memblk from
+ *
+ * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
+ * decrementing @mi->nr_blks.
+ */
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
+{
+	mi->nr_blks--;
+	memmove(&mi->blk[idx], &mi->blk[idx + 1],
+		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
+}
+
+/**
+ * numa_add_memblk - Add one numa_memblk to numa_meminfo
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * Add a new memblk to the default numa_meminfo.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
+}
+
+/* Initialize bootmem allocator for a node */
+static void __init
+setup_node_bootmem(int nid, unsigned long start, unsigned long end)
+{
+	const u64 nd_low = (u64)MAX_DMA_PFN << PAGE_SHIFT;
+	const u64 nd_high = (u64)max_pfn_mapped << PAGE_SHIFT;
+	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
+	unsigned long nd_pa;
+	int tnid;
+
+	/*
+	 * Don't confuse VM with a node that doesn't have the
+	 * minimum amount of memory:
+	 */
+	if (end && (end - start) < NODE_MIN_SIZE)
+		return;
+
+	start = roundup(start, ZONE_ALIGN);
+
+	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n",
+	       nid, start, end);
+
+	/*
+	 * Try to allocate node data on local node and then fall back to
+	 * all nodes.  Never allocate in DMA zone.
+	 */
+	nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+	if (nd_pa == MEMBLOCK_ERROR)
+		nd_pa = memblock_find_in_range(nd_low, nd_high,
+					       nd_size, SMP_CACHE_BYTES);
+	if (nd_pa == MEMBLOCK_ERROR) {
+		pr_err("Cannot find %lu bytes in node %d\n", nd_size, nid);
+		return;
+	}
+	memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
+
+	/* report and initialize */
+	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n",
+	       nd_pa, nd_pa + nd_size - 1);
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != nid)
+		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	node_data[nid] = __va(nd_pa);
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
+	NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
+
+	node_set_online(nid);
+}
+
+/**
+ * numa_cleanup_meminfo - Cleanup a numa_meminfo
+ * @mi: numa_meminfo to clean up
+ *
+ * Sanitize @mi by merging and removing unncessary memblks.  Also check for
+ * conflicts and clear unused memblks.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
+{
+	const u64 low = 0;
+	const u64 high = (u64)max_pfn << PAGE_SHIFT;
+	int i, j, k;
+
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *bi = &mi->blk[i];
+
+		/* make sure all blocks are inside the limits */
+		bi->start = max(bi->start, low);
+		bi->end = min(bi->end, high);
+
+		/* and there's no empty block */
+		if (bi->start >= bi->end) {
+			numa_remove_memblk_from(i--, mi);
+			continue;
+		}
+
+		for (j = i + 1; j < mi->nr_blks; j++) {
+			struct numa_memblk *bj = &mi->blk[j];
+			unsigned long start, end;
+
+			/*
+			 * See whether there are overlapping blocks.  Whine
+			 * about but allow overlaps of the same nid.  They
+			 * will be merged below.
+			 */
+			if (bi->end > bj->start && bi->start < bj->end) {
+				if (bi->nid != bj->nid) {
+					pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
+					       bi->nid, bi->start, bi->end,
+					       bj->nid, bj->start, bj->end);
+					return -EINVAL;
+				}
+				pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
+					   bi->nid, bi->start, bi->end,
+					   bj->start, bj->end);
+			}
+
+			/*
+			 * Join together blocks on the same node, holes
+			 * between which don't overlap with memory on other
+			 * nodes.
+			 */
+			if (bi->nid != bj->nid)
+				continue;
+			start = max(min(bi->start, bj->start), low);
+			end = min(max(bi->end, bj->end), high);
+			for (k = 0; k < mi->nr_blks; k++) {
+				struct numa_memblk *bk = &mi->blk[k];
+
+				if (bi->nid == bk->nid)
+					continue;
+				if (start < bk->end && end > bk->start)
+					break;
+			}
+			if (k < mi->nr_blks)
+				continue;
+			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
+			       bi->nid, bi->start, bi->end, bj->start, bj->end,
+			       start, end);
+			bi->start = start;
+			bi->end = end;
+			numa_remove_memblk_from(j--, mi);
+		}
+	}
+
+	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
+		mi->blk[i].start = mi->blk[i].end = 0;
+		mi->blk[i].nid = NUMA_NO_NODE;
+	}
+
+	return 0;
+}
+
+/*
+ * Set nodes, which have memory in @mi, in *@nodemask.
+ */
+static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+					      const struct numa_meminfo *mi)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
+		if (mi->blk[i].start != mi->blk[i].end &&
+		    mi->blk[i].nid != NUMA_NO_NODE)
+			node_set(mi->blk[i].nid, *nodemask);
+}
+
+/**
+ * numa_reset_distance - Reset NUMA distance table
+ *
+ * The current table is freed.  The next numa_set_distance() call will
+ * create a new one.
+ */
+void __init numa_reset_distance(void)
+{
+	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
+
+	/* numa_distance could be 1LU marking allocation failure, test cnt */
+	if (numa_distance_cnt)
+		memblock_x86_free_range(__pa(numa_distance),
+					__pa(numa_distance) + size);
+	numa_distance_cnt = 0;
+	numa_distance = NULL;	/* enable table creation */
+}
+
+static int __init numa_alloc_distance(void)
+{
+	nodemask_t nodes_parsed;
+	size_t size;
+	int i, j, cnt = 0;
+	u64 phys;
+
+	/* size the new table and allocate it */
+	nodes_parsed = numa_nodes_parsed;
+	numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
+
+	for_each_node_mask(i, nodes_parsed)
+		cnt = i;
+	cnt++;
+	size = cnt * cnt * sizeof(numa_distance[0]);
+
+	phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
+				      size, PAGE_SIZE);
+	if (phys == MEMBLOCK_ERROR) {
+		pr_warning("NUMA: Warning: can't allocate distance table!\n");
+		/* don't retry until explicitly reset */
+		numa_distance = (void *)1LU;
+		return -ENOMEM;
+	}
+	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
+
+	numa_distance = __va(phys);
+	numa_distance_cnt = cnt;
+
+	/* fill with the default distances */
+	for (i = 0; i < cnt; i++)
+		for (j = 0; j < cnt; j++)
+			numa_distance[i * cnt + j] = i == j ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+	printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
+
+	return 0;
+}
+
+/**
+ * numa_set_distance - Set NUMA distance from one NUMA to another
+ * @from: the 'from' node to set distance
+ * @to: the 'to'  node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.  If distance table
+ * doesn't exist, one which is large enough to accommodate all the currently
+ * known nodes will be created.
+ *
+ * If such table cannot be allocated, a warning is printed and further
+ * calls are ignored until the distance table is reset with
+ * numa_reset_distance().
+ *
+ * If @from or @to is higher than the highest known node at the time of
+ * table creation or @distance doesn't make sense, the call is ignored.
+ * This is to allow simplification of specific NUMA config implementations.
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+	if (!numa_distance && numa_alloc_distance() < 0)
+		return;
+
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
+		printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+			    from, to, distance);
+		return;
+	}
+
+	if ((u8)distance != distance ||
+	    (from == to && distance != LOCAL_DISTANCE)) {
+		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+			     from, to, distance);
+		return;
+	}
+
+	numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+int __node_distance(int from, int to)
+{
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+/*
+ * Sanity check to catch more bad NUMA configurations (they are amazingly
+ * common).  Make sure the nodes cover all memory.
+ */
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+{
+	unsigned long numaram, e820ram;
+	int i;
+
+	numaram = 0;
+	for (i = 0; i < mi->nr_blks; i++) {
+		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
+		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
+		numaram += e - s;
+		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
+		if ((long)numaram < 0)
+			numaram = 0;
+	}
+
+	e820ram = max_pfn - (memblock_x86_hole_size(0,
+					max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+	if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
+		       (numaram << PAGE_SHIFT) >> 20,
+		       (e820ram << PAGE_SHIFT) >> 20);
+		return false;
+	}
+	return true;
+}
+
+static int __init numa_register_memblks(struct numa_meminfo *mi)
+{
+	int i, nid;
+
+	/* Account for nodes with cpus and no memory */
+	node_possible_map = numa_nodes_parsed;
+	numa_nodemask_from_meminfo(&node_possible_map, mi);
+	if (WARN_ON(nodes_empty(node_possible_map)))
+		return -EINVAL;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		memblock_x86_register_active_regions(mi->blk[i].nid,
+					mi->blk[i].start >> PAGE_SHIFT,
+					mi->blk[i].end >> PAGE_SHIFT);
+
+	/* for out of order entries */
+	sort_node_map();
+	if (!numa_meminfo_cover_memory(mi))
+		return -EINVAL;
+
+	/* Finally register nodes. */
+	for_each_node_mask(nid, node_possible_map) {
+		u64 start = (u64)max_pfn << PAGE_SHIFT;
+		u64 end = 0;
+
+		for (i = 0; i < mi->nr_blks; i++) {
+			if (nid != mi->blk[i].nid)
+				continue;
+			start = min(mi->blk[i].start, start);
+			end = max(mi->blk[i].end, end);
+		}
+
+		if (start < end)
+			setup_node_bootmem(nid, start, end);
+	}
+
+	return 0;
+}
+#endif
+
 /*
  * There are unfortunately some poorly designed mainboards around that
  * only connect memory to a single CPU. This breaks the 1:1 cpu->node
@@ -127,6 +542,93 @@ void __init numa_init_array(void)
 	}
 }
 
+#ifdef CONFIG_X86_64
+static int __init numa_init(int (*init_func)(void))
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < MAX_LOCAL_APIC; i++)
+		set_apicid_to_node(i, NUMA_NO_NODE);
+
+	nodes_clear(numa_nodes_parsed);
+	nodes_clear(node_possible_map);
+	nodes_clear(node_online_map);
+	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+	remove_all_active_ranges();
+	numa_reset_distance();
+
+	ret = init_func();
+	if (ret < 0)
+		return ret;
+	ret = numa_cleanup_meminfo(&numa_meminfo);
+	if (ret < 0)
+		return ret;
+
+	numa_emulation(&numa_meminfo, numa_distance_cnt);
+
+	ret = numa_register_memblks(&numa_meminfo);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < nr_cpu_ids; i++) {
+		int nid = early_cpu_to_node(i);
+
+		if (nid == NUMA_NO_NODE)
+			continue;
+		if (!node_online(nid))
+			numa_clear_node(i);
+	}
+	numa_init_array();
+	return 0;
+}
+
+/**
+ * dummy_numa_init - Fallback dummy NUMA init
+ *
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
+ *
+ * Must online at least one node and add memory blocks that cover all
+ * allowed memory.  This function must not fail.
+ */
+static int __init dummy_numa_init(void)
+{
+	printk(KERN_INFO "%s\n",
+	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
+	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
+	       0LU, max_pfn << PAGE_SHIFT);
+
+	node_set(0, numa_nodes_parsed);
+	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
+
+	return 0;
+}
+
+/**
+ * x86_numa_init - Initialize NUMA
+ *
+ * Try each configured NUMA initialization method until one succeeds.  The
+ * last fallback is dummy single node config encomapssing whole memory and
+ * never fails.
+ */
+void __init x86_numa_init(void)
+{
+	if (!numa_off) {
+#ifdef CONFIG_ACPI_NUMA
+		if (!numa_init(x86_acpi_numa_init))
+			return;
+#endif
+#ifdef CONFIG_AMD_NUMA
+		if (!numa_init(amd_numa_init))
+			return;
+#endif
+	}
+
+	numa_init(dummy_numa_init);
+}
+#endif
+
 static __init int find_near_online_node(int node)
 {
 	int n, val;
@@ -292,3 +794,18 @@ const struct cpumask *cpumask_of_node(int node)
 EXPORT_SYMBOL(cpumask_of_node);
 
 #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_MEMORY_HOTPLUG)
+int memory_add_physaddr_to_nid(u64 start)
+{
+	struct numa_meminfo *mi = &numa_meminfo;
+	int nid = mi->blk[0].nid;
+	int i;
+
+	for (i = 0; i < mi->nr_blks; i++)
+		if (mi->blk[i].start <= start && mi->blk[i].end > start)
+			nid = mi->blk[i].nid;
+	return nid;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 70bd8221f928..dd27f401f0a0 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -2,499 +2,13 @@
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/init.h>
 #include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/sched.h>
-#include <linux/acpi.h>
-
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/dma.h>
-#include <asm/acpi.h>
-#include <asm/amd_nb.h>
 
 #include "numa_internal.h"
 
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-static struct numa_meminfo numa_meminfo
-#ifndef CONFIG_MEMORY_HOTPLUG
-__initdata
-#endif
-;
-
-static int numa_distance_cnt;
-static u8 *numa_distance;
-
-static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
-				     struct numa_meminfo *mi)
-{
-	/* ignore zero length blks */
-	if (start == end)
-		return 0;
-
-	/* whine about and ignore invalid blks */
-	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
-		pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
-			   nid, start, end);
-		return 0;
-	}
-
-	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
-		pr_err("NUMA: too many memblk ranges\n");
-		return -EINVAL;
-	}
-
-	mi->blk[mi->nr_blks].start = start;
-	mi->blk[mi->nr_blks].end = end;
-	mi->blk[mi->nr_blks].nid = nid;
-	mi->nr_blks++;
-	return 0;
-}
-
-/**
- * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
- * @idx: Index of memblk to remove
- * @mi: numa_meminfo to remove memblk from
- *
- * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
- * decrementing @mi->nr_blks.
- */
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
-{
-	mi->nr_blks--;
-	memmove(&mi->blk[idx], &mi->blk[idx + 1],
-		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
-}
-
-/**
- * numa_add_memblk - Add one numa_memblk to numa_meminfo
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end: End address of the new memblk
- *
- * Add a new memblk to the default numa_meminfo.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
-	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
-}
-
-/* Initialize bootmem allocator for a node */
-static void __init
-setup_node_bootmem(int nid, unsigned long start, unsigned long end)
-{
-	const u64 nd_low = (u64)MAX_DMA_PFN << PAGE_SHIFT;
-	const u64 nd_high = (u64)max_pfn_mapped << PAGE_SHIFT;
-	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	unsigned long nd_pa;
-	int tnid;
-
-	/*
-	 * Don't confuse VM with a node that doesn't have the
-	 * minimum amount of memory:
-	 */
-	if (end && (end - start) < NODE_MIN_SIZE)
-		return;
-
-	start = roundup(start, ZONE_ALIGN);
-
-	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n",
-	       nid, start, end);
-
-	/*
-	 * Try to allocate node data on local node and then fall back to
-	 * all nodes.  Never allocate in DMA zone.
-	 */
-	nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
-						nd_size, SMP_CACHE_BYTES);
-	if (nd_pa == MEMBLOCK_ERROR)
-		nd_pa = memblock_find_in_range(nd_low, nd_high,
-					       nd_size, SMP_CACHE_BYTES);
-	if (nd_pa == MEMBLOCK_ERROR) {
-		pr_err("Cannot find %lu bytes in node %d\n", nd_size, nid);
-		return;
-	}
-	memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
-
-	/* report and initialize */
-	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n",
-	       nd_pa, nd_pa + nd_size - 1);
-	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
-	if (tnid != nid)
-		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
-
-	node_data[nid] = __va(nd_pa);
-	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
-	NODE_DATA(nid)->node_id = nid;
-	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
-	NODE_DATA(nid)->node_spanned_pages = (end - start) >> PAGE_SHIFT;
-
-	node_set_online(nid);
-}
-
-/**
- * numa_cleanup_meminfo - Cleanup a numa_meminfo
- * @mi: numa_meminfo to clean up
- *
- * Sanitize @mi by merging and removing unncessary memblks.  Also check for
- * conflicts and clear unused memblks.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
-{
-	const u64 low = 0;
-	const u64 high = (u64)max_pfn << PAGE_SHIFT;
-	int i, j, k;
-
-	for (i = 0; i < mi->nr_blks; i++) {
-		struct numa_memblk *bi = &mi->blk[i];
-
-		/* make sure all blocks are inside the limits */
-		bi->start = max(bi->start, low);
-		bi->end = min(bi->end, high);
-
-		/* and there's no empty block */
-		if (bi->start >= bi->end) {
-			numa_remove_memblk_from(i--, mi);
-			continue;
-		}
-
-		for (j = i + 1; j < mi->nr_blks; j++) {
-			struct numa_memblk *bj = &mi->blk[j];
-			unsigned long start, end;
-
-			/*
-			 * See whether there are overlapping blocks.  Whine
-			 * about but allow overlaps of the same nid.  They
-			 * will be merged below.
-			 */
-			if (bi->end > bj->start && bi->start < bj->end) {
-				if (bi->nid != bj->nid) {
-					pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
-					       bi->nid, bi->start, bi->end,
-					       bj->nid, bj->start, bj->end);
-					return -EINVAL;
-				}
-				pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
-					   bi->nid, bi->start, bi->end,
-					   bj->start, bj->end);
-			}
-
-			/*
-			 * Join together blocks on the same node, holes
-			 * between which don't overlap with memory on other
-			 * nodes.
-			 */
-			if (bi->nid != bj->nid)
-				continue;
-			start = max(min(bi->start, bj->start), low);
-			end = min(max(bi->end, bj->end), high);
-			for (k = 0; k < mi->nr_blks; k++) {
-				struct numa_memblk *bk = &mi->blk[k];
-
-				if (bi->nid == bk->nid)
-					continue;
-				if (start < bk->end && end > bk->start)
-					break;
-			}
-			if (k < mi->nr_blks)
-				continue;
-			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
-			       bi->nid, bi->start, bi->end, bj->start, bj->end,
-			       start, end);
-			bi->start = start;
-			bi->end = end;
-			numa_remove_memblk_from(j--, mi);
-		}
-	}
-
-	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
-		mi->blk[i].start = mi->blk[i].end = 0;
-		mi->blk[i].nid = NUMA_NO_NODE;
-	}
-
-	return 0;
-}
-
-/*
- * Set nodes, which have memory in @mi, in *@nodemask.
- */
-static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
-					      const struct numa_meminfo *mi)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
-		if (mi->blk[i].start != mi->blk[i].end &&
-		    mi->blk[i].nid != NUMA_NO_NODE)
-			node_set(mi->blk[i].nid, *nodemask);
-}
-
-/**
- * numa_reset_distance - Reset NUMA distance table
- *
- * The current table is freed.  The next numa_set_distance() call will
- * create a new one.
- */
-void __init numa_reset_distance(void)
-{
-	size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
-
-	/* numa_distance could be 1LU marking allocation failure, test cnt */
-	if (numa_distance_cnt)
-		memblock_x86_free_range(__pa(numa_distance),
-					__pa(numa_distance) + size);
-	numa_distance_cnt = 0;
-	numa_distance = NULL;	/* enable table creation */
-}
-
-static int __init numa_alloc_distance(void)
-{
-	nodemask_t nodes_parsed;
-	size_t size;
-	int i, j, cnt = 0;
-	u64 phys;
-
-	/* size the new table and allocate it */
-	nodes_parsed = numa_nodes_parsed;
-	numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
-
-	for_each_node_mask(i, nodes_parsed)
-		cnt = i;
-	cnt++;
-	size = cnt * cnt * sizeof(numa_distance[0]);
-
-	phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
-				      size, PAGE_SIZE);
-	if (phys == MEMBLOCK_ERROR) {
-		pr_warning("NUMA: Warning: can't allocate distance table!\n");
-		/* don't retry until explicitly reset */
-		numa_distance = (void *)1LU;
-		return -ENOMEM;
-	}
-	memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
-
-	numa_distance = __va(phys);
-	numa_distance_cnt = cnt;
-
-	/* fill with the default distances */
-	for (i = 0; i < cnt; i++)
-		for (j = 0; j < cnt; j++)
-			numa_distance[i * cnt + j] = i == j ?
-				LOCAL_DISTANCE : REMOTE_DISTANCE;
-	printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
-
-	return 0;
-}
-
-/**
- * numa_set_distance - Set NUMA distance from one NUMA to another
- * @from: the 'from' node to set distance
- * @to: the 'to'  node to set distance
- * @distance: NUMA distance
- *
- * Set the distance from node @from to @to to @distance.  If distance table
- * doesn't exist, one which is large enough to accommodate all the currently
- * known nodes will be created.
- *
- * If such table cannot be allocated, a warning is printed and further
- * calls are ignored until the distance table is reset with
- * numa_reset_distance().
- *
- * If @from or @to is higher than the highest known node at the time of
- * table creation or @distance doesn't make sense, the call is ignored.
- * This is to allow simplification of specific NUMA config implementations.
- */
-void __init numa_set_distance(int from, int to, int distance)
-{
-	if (!numa_distance && numa_alloc_distance() < 0)
-		return;
-
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
-		printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
-			    from, to, distance);
-		return;
-	}
-
-	if ((u8)distance != distance ||
-	    (from == to && distance != LOCAL_DISTANCE)) {
-		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
-			     from, to, distance);
-		return;
-	}
-
-	numa_distance[from * numa_distance_cnt + to] = distance;
-}
-
-int __node_distance(int from, int to)
-{
-	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
-		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
-	return numa_distance[from * numa_distance_cnt + to];
-}
-EXPORT_SYMBOL(__node_distance);
-
-/*
- * Sanity check to catch more bad NUMA configurations (they are amazingly
- * common).  Make sure the nodes cover all memory.
- */
-static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
-{
-	unsigned long numaram, e820ram;
-	int i;
-
-	numaram = 0;
-	for (i = 0; i < mi->nr_blks; i++) {
-		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
-		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
-		numaram += e - s;
-		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
-		if ((long)numaram < 0)
-			numaram = 0;
-	}
-
-	e820ram = max_pfn - (memblock_x86_hole_size(0,
-					max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
-	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
-	if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
-		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
-		       (numaram << PAGE_SHIFT) >> 20,
-		       (e820ram << PAGE_SHIFT) >> 20);
-		return false;
-	}
-	return true;
-}
-
-static int __init numa_register_memblks(struct numa_meminfo *mi)
-{
-	int i, nid;
-
-	/* Account for nodes with cpus and no memory */
-	node_possible_map = numa_nodes_parsed;
-	numa_nodemask_from_meminfo(&node_possible_map, mi);
-	if (WARN_ON(nodes_empty(node_possible_map)))
-		return -EINVAL;
-
-	for (i = 0; i < mi->nr_blks; i++)
-		memblock_x86_register_active_regions(mi->blk[i].nid,
-					mi->blk[i].start >> PAGE_SHIFT,
-					mi->blk[i].end >> PAGE_SHIFT);
-
-	/* for out of order entries */
-	sort_node_map();
-	if (!numa_meminfo_cover_memory(mi))
-		return -EINVAL;
-
-	/* Finally register nodes. */
-	for_each_node_mask(nid, node_possible_map) {
-		u64 start = (u64)max_pfn << PAGE_SHIFT;
-		u64 end = 0;
-
-		for (i = 0; i < mi->nr_blks; i++) {
-			if (nid != mi->blk[i].nid)
-				continue;
-			start = min(mi->blk[i].start, start);
-			end = max(mi->blk[i].end, end);
-		}
-
-		if (start < end)
-			setup_node_bootmem(nid, start, end);
-	}
-
-	return 0;
-}
-
-/**
- * dummy_numma_init - Fallback dummy NUMA init
- *
- * Used if there's no underlying NUMA architecture, NUMA initialization
- * fails, or NUMA is disabled on the command line.
- *
- * Must online at least one node and add memory blocks that cover all
- * allowed memory.  This function must not fail.
- */
-static int __init dummy_numa_init(void)
-{
-	printk(KERN_INFO "%s\n",
-	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
-	       0LU, max_pfn << PAGE_SHIFT);
-
-	node_set(0, numa_nodes_parsed);
-	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
-
-	return 0;
-}
-
-static int __init numa_init(int (*init_func)(void))
-{
-	int i;
-	int ret;
-
-	for (i = 0; i < MAX_LOCAL_APIC; i++)
-		set_apicid_to_node(i, NUMA_NO_NODE);
-
-	nodes_clear(numa_nodes_parsed);
-	nodes_clear(node_possible_map);
-	nodes_clear(node_online_map);
-	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
-	remove_all_active_ranges();
-	numa_reset_distance();
-
-	ret = init_func();
-	if (ret < 0)
-		return ret;
-	ret = numa_cleanup_meminfo(&numa_meminfo);
-	if (ret < 0)
-		return ret;
-
-	numa_emulation(&numa_meminfo, numa_distance_cnt);
-
-	ret = numa_register_memblks(&numa_meminfo);
-	if (ret < 0)
-		return ret;
-
-	for (i = 0; i < nr_cpu_ids; i++) {
-		int nid = early_cpu_to_node(i);
-
-		if (nid == NUMA_NO_NODE)
-			continue;
-		if (!node_online(nid))
-			numa_clear_node(i);
-	}
-	numa_init_array();
-	return 0;
-}
-
 void __init initmem_init(void)
 {
-	if (!numa_off) {
-#ifdef CONFIG_ACPI_NUMA
-		if (!numa_init(x86_acpi_numa_init))
-			return;
-#endif
-#ifdef CONFIG_AMD_NUMA
-		if (!numa_init(amd_numa_init))
-			return;
-#endif
-	}
-
-	numa_init(dummy_numa_init);
+	x86_numa_init();
 }
 
 unsigned long __init numa_free_all_bootmem(void)
@@ -509,18 +23,3 @@ unsigned long __init numa_free_all_bootmem(void)
 
 	return pages;
 }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int memory_add_physaddr_to_nid(u64 start)
-{
-	struct numa_meminfo *mi = &numa_meminfo;
-	int nid = mi->blk[0].nid;
-	int i;
-
-	for (i = 0; i < mi->nr_blks; i++)
-		if (mi->blk[i].start <= start && mi->blk[i].end > start)
-			nid = mi->blk[i].nid;
-	return nid;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index ef2d97377d7c..ad86ec91e640 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -19,6 +19,8 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
 int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
 void __init numa_reset_distance(void);
 
+void __init x86_numa_init(void);
+
 #ifdef CONFIG_NUMA_EMU
 void __init numa_emulation(struct numa_meminfo *numa_meminfo,
 			   int numa_dist_cnt);
-- 
cgit v1.2.3


From 744baba0c4072b04664952a89292e4708eaf949a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86, NUMA: Enable build of generic NUMA init code on 32bit

Generic NUMA init code was moved to numa.c from numa_64.c but is still
guaraded by CONFIG_X86_64.  This patch removes the compile guard and
enables compiling on 32bit.

* numa_add_memblk() and numa_set_distance() clash with the shim
  implementation in numa_32.c and are left out.

* memory_add_physaddr_to_nid() clashes with 32bit implementation and
  is left out.

* MAX_DMA_PFN definition in dma.h moved out of !CONFIG_X86_32.

* node_data definition in numa_32.c removed in favor of the one in
  numa.c.

There are places where ulong is assumed to be 64bit.  The next patch
will fix them up.  Note that although the code is compiled it isn't
used yet and this patch doesn't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/dma.h |  6 +++---
 arch/x86/mm/numa.c         | 10 ++++------
 arch/x86/mm/numa_32.c      |  3 ---
 3 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index 057099e5faba..d1a314b610f6 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -69,6 +69,9 @@
 
 #define MAX_DMA_CHANNELS	8
 
+/* 16MB ISA DMA zone */
+#define MAX_DMA_PFN   ((16 * 1024 * 1024) >> PAGE_SHIFT)
+
 #ifdef CONFIG_X86_32
 
 /* The maximum address that we can perform a DMA transfer to on this platform */
@@ -76,9 +79,6 @@
 
 #else
 
-/* 16MB ISA DMA zone */
-#define MAX_DMA_PFN   ((16 * 1024 * 1024) >> PAGE_SHIFT)
-
 /* 4GB broken PCI/AGP hardware bus master zone */
 #define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ed1daba54906..c400f3b2b93e 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -23,7 +23,6 @@
 int __initdata numa_off;
 nodemask_t numa_nodes_parsed __initdata;
 
-#ifdef CONFIG_X86_64
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 
@@ -35,7 +34,6 @@ __initdata
 
 static int numa_distance_cnt;
 static u8 *numa_distance;
-#endif
 
 static __init int numa_setup(char *opt)
 {
@@ -134,7 +132,6 @@ void __init setup_node_to_cpumask_map(void)
 	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
-#ifdef CONFIG_X86_64
 static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
 				     struct numa_meminfo *mi)
 {
@@ -176,6 +173,7 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
 		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
 }
 
+#ifdef CONFIG_X86_64
 /**
  * numa_add_memblk - Add one numa_memblk to numa_meminfo
  * @nid: NUMA node ID of the new memblk
@@ -191,6 +189,7 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 {
 	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
 }
+#endif
 
 /* Initialize bootmem allocator for a node */
 static void __init
@@ -402,6 +401,7 @@ static int __init numa_alloc_distance(void)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 /**
  * numa_set_distance - Set NUMA distance from one NUMA to another
  * @from: the 'from' node to set distance
@@ -440,6 +440,7 @@ void __init numa_set_distance(int from, int to, int distance)
 
 	numa_distance[from * numa_distance_cnt + to] = distance;
 }
+#endif
 
 int __node_distance(int from, int to)
 {
@@ -518,7 +519,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 
 	return 0;
 }
-#endif
 
 /*
  * There are unfortunately some poorly designed mainboards around that
@@ -542,7 +542,6 @@ void __init numa_init_array(void)
 	}
 }
 
-#ifdef CONFIG_X86_64
 static int __init numa_init(int (*init_func)(void))
 {
 	int i;
@@ -627,7 +626,6 @@ void __init x86_numa_init(void)
 
 	numa_init(dummy_numa_init);
 }
-#endif
 
 static __init int find_near_online_node(int node)
 {
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 14135e52cef5..975a76f622ba 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -41,9 +41,6 @@
 #include <asm/bios_ebda.h>
 #include <asm/proto.h>
 
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
 /*
  * numa interface - we expect the numa architecture specific code to have
  *                  populated the following initialisation.
-- 
cgit v1.2.3


From 38f3e1ca24cc3ec416855e02676f91c898a8a262 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:53 +0200
Subject: x86, NUMA: Remove long 64bit assumption from numa.c

Code moved from numa_64.c has assumption that long is 64bit in several
places.  This patch removes the assumption by using {s|u}64_t
explicity, using PFN_PHYS() for page number -> addr conversions and
adjusting printf formats.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/numa.c | 45 ++++++++++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c400f3b2b93e..b45caa39f7cf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -192,13 +192,12 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 #endif
 
 /* Initialize bootmem allocator for a node */
-static void __init
-setup_node_bootmem(int nid, unsigned long start, unsigned long end)
+static void __init setup_node_bootmem(int nid, u64 start, u64 end)
 {
-	const u64 nd_low = (u64)MAX_DMA_PFN << PAGE_SHIFT;
-	const u64 nd_high = (u64)max_pfn_mapped << PAGE_SHIFT;
+	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
+	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
 	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	unsigned long nd_pa;
+	u64 nd_pa;
 	int tnid;
 
 	/*
@@ -210,7 +209,7 @@ setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 
 	start = roundup(start, ZONE_ALIGN);
 
-	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n",
+	printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
 	       nid, start, end);
 
 	/*
@@ -223,13 +222,13 @@ setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 		nd_pa = memblock_find_in_range(nd_low, nd_high,
 					       nd_size, SMP_CACHE_BYTES);
 	if (nd_pa == MEMBLOCK_ERROR) {
-		pr_err("Cannot find %lu bytes in node %d\n", nd_size, nid);
+		pr_err("Cannot find %zu bytes in node %d\n", nd_size, nid);
 		return;
 	}
 	memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
 
 	/* report and initialize */
-	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n",
+	printk(KERN_INFO "  NODE_DATA [%016Lx - %016Lx]\n",
 	       nd_pa, nd_pa + nd_size - 1);
 	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
 	if (tnid != nid)
@@ -257,7 +256,7 @@ setup_node_bootmem(int nid, unsigned long start, unsigned long end)
 int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 {
 	const u64 low = 0;
-	const u64 high = (u64)max_pfn << PAGE_SHIFT;
+	const u64 high = PFN_PHYS(max_pfn);
 	int i, j, k;
 
 	for (i = 0; i < mi->nr_blks; i++) {
@@ -275,7 +274,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 
 		for (j = i + 1; j < mi->nr_blks; j++) {
 			struct numa_memblk *bj = &mi->blk[j];
-			unsigned long start, end;
+			u64 start, end;
 
 			/*
 			 * See whether there are overlapping blocks.  Whine
@@ -313,7 +312,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 			}
 			if (k < mi->nr_blks)
 				continue;
-			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
+			printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%Lx,%Lx)\n",
 			       bi->nid, bi->start, bi->end, bj->start, bj->end,
 			       start, end);
 			bi->start = start;
@@ -378,7 +377,7 @@ static int __init numa_alloc_distance(void)
 	cnt++;
 	size = cnt * cnt * sizeof(numa_distance[0]);
 
-	phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
+	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 				      size, PAGE_SIZE);
 	if (phys == MEMBLOCK_ERROR) {
 		pr_warning("NUMA: Warning: can't allocate distance table!\n");
@@ -456,24 +455,24 @@ EXPORT_SYMBOL(__node_distance);
  */
 static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
 {
-	unsigned long numaram, e820ram;
+	u64 numaram, e820ram;
 	int i;
 
 	numaram = 0;
 	for (i = 0; i < mi->nr_blks; i++) {
-		unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
-		unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
+		u64 s = mi->blk[i].start >> PAGE_SHIFT;
+		u64 e = mi->blk[i].end >> PAGE_SHIFT;
 		numaram += e - s;
 		numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
-		if ((long)numaram < 0)
+		if ((s64)numaram < 0)
 			numaram = 0;
 	}
 
 	e820ram = max_pfn - (memblock_x86_hole_size(0,
-					max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+					PFN_PHYS(max_pfn)) >> PAGE_SHIFT);
 	/* We seem to lose 3 pages somewhere. Allow 1M of slack. */
-	if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
-		printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
+	if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+		printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n",
 		       (numaram << PAGE_SHIFT) >> 20,
 		       (e820ram << PAGE_SHIFT) >> 20);
 		return false;
@@ -503,7 +502,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 
 	/* Finally register nodes. */
 	for_each_node_mask(nid, node_possible_map) {
-		u64 start = (u64)max_pfn << PAGE_SHIFT;
+		u64 start = PFN_PHYS(max_pfn);
 		u64 end = 0;
 
 		for (i = 0; i < mi->nr_blks; i++) {
@@ -595,11 +594,11 @@ static int __init dummy_numa_init(void)
 {
 	printk(KERN_INFO "%s\n",
 	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
-	       0LU, max_pfn << PAGE_SHIFT);
+	printk(KERN_INFO "Faking a node at %016Lx-%016Lx\n",
+	       0LLU, PFN_PHYS(max_pfn));
 
 	node_set(0, numa_nodes_parsed);
-	numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
+	numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
 
 	return 0;
 }
-- 
cgit v1.2.3


From 99cca492ea8ced305bfd687521ed69fb9e0147aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:54 +0200
Subject: x86-32, NUMA: Add @start and @end to init_alloc_remap()

Instead of dereferencing node_start/end_pfn[] directly, make
init_alloc_remap() take @start and @end and let the caller be
responsible for making sure the range is sane.  This is to prepare for
use from unified NUMA init code.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/numa_32.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 975a76f622ba..900863204be2 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -265,8 +265,10 @@ void resume_map_numa_kva(pgd_t *pgd_base)
  * opportunistically and the callers will fall back to other memory
  * allocation mechanisms on failure.
  */
-static __init void init_alloc_remap(int nid)
+static __init void init_alloc_remap(int nid, u64 start, u64 end)
 {
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = end >> PAGE_SHIFT;
 	unsigned long size, pfn;
 	u64 node_pa, remap_pa;
 	void *remap_va;
@@ -276,24 +278,15 @@ static __init void init_alloc_remap(int nid)
 	 * memory could be added but not currently present.
 	 */
 	printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n",
-	       nid, node_start_pfn[nid], node_end_pfn[nid]);
-	if (node_start_pfn[nid] > max_pfn)
-		return;
-	if (!node_end_pfn[nid])
-		return;
-	if (node_end_pfn[nid] > max_pfn)
-		node_end_pfn[nid] = max_pfn;
+	       nid, start_pfn, end_pfn);
 
 	/* calculate the necessary space aligned to large page size */
-	size = node_memmap_size_bytes(nid, node_start_pfn[nid],
-				      min(node_end_pfn[nid], max_pfn));
+	size = node_memmap_size_bytes(nid, start_pfn, end_pfn);
 	size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
 	size = ALIGN(size, LARGE_PAGE_BYTES);
 
 	/* allocate node memory and the lowmem remap area */
-	node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
-					 (u64)node_end_pfn[nid] << PAGE_SHIFT,
-					 size, LARGE_PAGE_BYTES);
+	node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES);
 	if (node_pa == MEMBLOCK_ERROR) {
 		pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n",
 			   size, nid);
@@ -391,8 +384,14 @@ void __init initmem_init(void)
 	get_memcfg_numa();
 	numa_init_array();
 
-	for_each_online_node(nid)
-		init_alloc_remap(nid);
+	for_each_online_node(nid) {
+		u64 start = (u64)node_start_pfn[nid] << PAGE_SHIFT;
+		u64 end = min((u64)node_end_pfn[nid] << PAGE_SHIFT,
+			      (u64)max_pfn << PAGE_SHIFT);
+
+		if (start < end)
+			init_alloc_remap(nid, start, end);
+	}
 
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
-- 
cgit v1.2.3


From 7888e96b264fad27f97f58c0f3a4d20326eaf181 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 14:18:54 +0200
Subject: x86, NUMA: Initialize and use remap allocator from
 setup_node_bootmem()

setup_node_bootmem() is taken from 64bit and doesn't use remap
allocator.  It's about to be shared with 32bit so add support for it.
If NODE_DATA is remapped, it's noted in the debug message and node
locality check is skipped as the __pa() of the remapped address
doesn't reflect the actual physical address.

On 64bit, remap allocator becomes noop and doesn't affect the
behavior.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/numa.c          | 41 +++++++++++++++++++++++++++--------------
 arch/x86/mm/numa_32.c       |  2 +-
 arch/x86/mm/numa_internal.h |  6 ++++++
 3 files changed, 34 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index b45caa39f7cf..a72317ae74c5 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -197,7 +197,9 @@ static void __init setup_node_bootmem(int nid, u64 start, u64 end)
 	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
 	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
 	const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
+	bool remapped = false;
 	u64 nd_pa;
+	void *nd;
 	int tnid;
 
 	/*
@@ -207,34 +209,45 @@ static void __init setup_node_bootmem(int nid, u64 start, u64 end)
 	if (end && (end - start) < NODE_MIN_SIZE)
 		return;
 
+	/* initialize remap allocator before aligning to ZONE_ALIGN */
+	init_alloc_remap(nid, start, end);
+
 	start = roundup(start, ZONE_ALIGN);
 
 	printk(KERN_INFO "Initmem setup node %d %016Lx-%016Lx\n",
 	       nid, start, end);
 
 	/*
-	 * Try to allocate node data on local node and then fall back to
-	 * all nodes.  Never allocate in DMA zone.
+	 * Allocate node data.  Try remap allocator first, node-local
+	 * memory and then any node.  Never allocate in DMA zone.
 	 */
-	nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
+	nd = alloc_remap(nid, nd_size);
+	if (nd) {
+		nd_pa = __pa(nd);
+		remapped = true;
+	} else {
+		nd_pa = memblock_x86_find_in_range_node(nid, nd_low, nd_high,
 						nd_size, SMP_CACHE_BYTES);
-	if (nd_pa == MEMBLOCK_ERROR)
-		nd_pa = memblock_find_in_range(nd_low, nd_high,
-					       nd_size, SMP_CACHE_BYTES);
-	if (nd_pa == MEMBLOCK_ERROR) {
-		pr_err("Cannot find %zu bytes in node %d\n", nd_size, nid);
-		return;
+		if (nd_pa == MEMBLOCK_ERROR)
+			nd_pa = memblock_find_in_range(nd_low, nd_high,
+						nd_size, SMP_CACHE_BYTES);
+		if (nd_pa == MEMBLOCK_ERROR) {
+			pr_err("Cannot find %zu bytes in node %d\n",
+			       nd_size, nid);
+			return;
+		}
+		memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
+		nd = __va(nd_pa);
 	}
-	memblock_x86_reserve_range(nd_pa, nd_pa + nd_size, "NODE_DATA");
 
 	/* report and initialize */
-	printk(KERN_INFO "  NODE_DATA [%016Lx - %016Lx]\n",
-	       nd_pa, nd_pa + nd_size - 1);
+	printk(KERN_INFO "  NODE_DATA [%016Lx - %016Lx]%s\n",
+	       nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
 	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
-	if (tnid != nid)
+	if (!remapped && tnid != nid)
 		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nid, tnid);
 
-	node_data[nid] = __va(nd_pa);
+	node_data[nid] = nd;
 	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
 	NODE_DATA(nid)->node_id = nid;
 	NODE_DATA(nid)->node_start_pfn = start >> PAGE_SHIFT;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 900863204be2..fbd558fe10bc 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -265,7 +265,7 @@ void resume_map_numa_kva(pgd_t *pgd_base)
  * opportunistically and the callers will fall back to other memory
  * allocation mechanisms on failure.
  */
-static __init void init_alloc_remap(int nid, u64 start, u64 end)
+void __init init_alloc_remap(int nid, u64 start, u64 end)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long end_pfn = end >> PAGE_SHIFT;
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index ad86ec91e640..7178c3afe05e 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -21,6 +21,12 @@ void __init numa_reset_distance(void);
 
 void __init x86_numa_init(void);
 
+#ifdef CONFIG_X86_64
+static inline void init_alloc_remap(int nid, u64 start, u64 end)	{ }
+#else
+void __init init_alloc_remap(int nid, u64 start, u64 end);
+#endif
+
 #ifdef CONFIG_NUMA_EMU
 void __init numa_emulation(struct numa_meminfo *numa_meminfo,
 			   int numa_dist_cnt);
-- 
cgit v1.2.3


From bd6709a91a593d8fe35d08da542e9f93bb74a304 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 17:24:48 +0200
Subject: x86, NUMA: Make 32bit use common NUMA init path

With both _numa_init() methods converted and the rest of init code
adjusted, numa_32.c now can switch from the 32bit only init code to
the common one in numa.c.

* Shim get_memcfg_*()'s are dropped and initmem_init() calls
  x86_numa_init(), which is updated to handle NUMAQ.

* All boilerplate operations including node range limiting, pgdat
  alloc/init are handled by numa_init().  32bit only implementation is
  removed.

* 32bit numa_add_memblk(), numa_set_distance() and
  memory_add_physaddr_to_nid() removed and common versions in
  numa_32.c enabled for 32bit.

This change causes the following behavior changes.

* NODE_DATA()->node_start_pfn/node_spanned_pages properly initialized
  for 32bit too.

* Much more sanity checks and configuration cleanups.

* Proper handling of node distances.

* The same NUMA init messages as 64bit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/topology.h |   7 --
 arch/x86/mm/numa.c              |  10 +-
 arch/x86/mm/numa_32.c           | 231 +---------------------------------------
 3 files changed, 7 insertions(+), 241 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 8dba76972fd7..c00692476e9f 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -93,18 +93,11 @@ extern void setup_node_to_cpumask_map(void);
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
 
 #ifdef CONFIG_X86_32
-extern unsigned long node_start_pfn[];
-extern unsigned long node_end_pfn[];
-#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
-
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-
 #else
-
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-
 #endif
 
 /* sched_domains SD_NODE_INIT for NUMA machines */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a72317ae74c5..56ed714ed24f 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -173,7 +173,6 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
 		(mi->nr_blks - idx) * sizeof(mi->blk[0]));
 }
 
-#ifdef CONFIG_X86_64
 /**
  * numa_add_memblk - Add one numa_memblk to numa_meminfo
  * @nid: NUMA node ID of the new memblk
@@ -189,7 +188,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 {
 	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
 }
-#endif
 
 /* Initialize bootmem allocator for a node */
 static void __init setup_node_bootmem(int nid, u64 start, u64 end)
@@ -413,7 +411,6 @@ static int __init numa_alloc_distance(void)
 	return 0;
 }
 
-#ifdef CONFIG_X86_64
 /**
  * numa_set_distance - Set NUMA distance from one NUMA to another
  * @from: the 'from' node to set distance
@@ -452,7 +449,6 @@ void __init numa_set_distance(int from, int to, int distance)
 
 	numa_distance[from * numa_distance_cnt + to] = distance;
 }
-#endif
 
 int __node_distance(int from, int to)
 {
@@ -626,6 +622,10 @@ static int __init dummy_numa_init(void)
 void __init x86_numa_init(void)
 {
 	if (!numa_off) {
+#ifdef CONFIG_X86_NUMAQ
+		if (!numa_init(numaq_numa_init))
+			return;
+#endif
 #ifdef CONFIG_ACPI_NUMA
 		if (!numa_init(x86_acpi_numa_init))
 			return;
@@ -805,7 +805,7 @@ EXPORT_SYMBOL(cpumask_of_node);
 
 #endif	/* !CONFIG_DEBUG_PER_CPU_MAPS */
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_MEMORY_HOTPLUG)
+#ifdef CONFIG_MEMORY_HOTPLUG
 int memory_add_physaddr_to_nid(u64 start)
 {
 	struct numa_meminfo *mi = &numa_meminfo;
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index fbd558fe10bc..849a975d3fa0 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -22,36 +22,11 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/mmzone.h>
-#include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/nodemask.h>
 #include <linux/module.h>
-#include <linux/kexec.h>
-#include <linux/pfn.h>
-#include <linux/swap.h>
-#include <linux/acpi.h>
-
-#include <asm/e820.h>
-#include <asm/setup.h>
-#include <asm/mmzone.h>
-#include <asm/bios_ebda.h>
-#include <asm/proto.h>
-
-/*
- * numa interface - we expect the numa architecture specific code to have
- *                  populated the following initialisation.
- *
- * 1) node_online_map  - the map of all nodes configured (online) in the system
- * 2) node_start_pfn   - the starting page frame number for a node
- * 3) node_end_pfn     - the ending page fram number for a node
- */
-unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly;
-unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
 
+#include "numa_internal.h"
 
 #ifdef CONFIG_DISCONTIGMEM
 /*
@@ -96,7 +71,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 }
 #endif
 
-extern unsigned long find_max_low_pfn(void);
 extern unsigned long highend_pfn, highstart_pfn;
 
 #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE)
@@ -104,68 +78,6 @@ extern unsigned long highend_pfn, highstart_pfn;
 static void *node_remap_start_vaddr[MAX_NUMNODES];
 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-/*
- * FLAT - support for basic PC memory model with discontig enabled, essentially
- *        a single node with all available processors in it with a flat
- *        memory map.
- */
-static int __init get_memcfg_numa_flat(void)
-{
-	printk(KERN_DEBUG "NUMA - single node, flat memory mode\n");
-
-	node_start_pfn[0] = 0;
-	node_end_pfn[0] = max_pfn;
-	memblock_x86_register_active_regions(0, 0, max_pfn);
-
-        /* Indicate there is one node available. */
-	nodes_clear(node_online_map);
-	node_set_online(0);
-	return 1;
-}
-
-/*
- * Find the highest page frame number we have available for the node
- */
-static void __init propagate_e820_map_node(int nid)
-{
-	if (node_end_pfn[nid] > max_pfn)
-		node_end_pfn[nid] = max_pfn;
-	/*
-	 * if a user has given mem=XXXX, then we need to make sure 
-	 * that the node _starts_ before that, too, not just ends
-	 */
-	if (node_start_pfn[nid] > max_pfn)
-		node_start_pfn[nid] = max_pfn;
-	BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]);
-}
-
-/* 
- * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
- * method.  For node zero take this from the bottom of memory, for
- * subsequent nodes place them at node_remap_start_vaddr which contains
- * node local data in physically node local memory.  See setup_memory()
- * for details.
- */
-static void __init allocate_pgdat(int nid)
-{
-	char buf[16];
-
-	NODE_DATA(nid) = alloc_remap(nid, ALIGN(sizeof(pg_data_t), PAGE_SIZE));
-	if (!NODE_DATA(nid)) {
-		unsigned long pgdat_phys;
-		pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT,
-				 max_pfn_mapped<<PAGE_SHIFT,
-				 sizeof(pg_data_t),
-				 PAGE_SIZE);
-		NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
-		memset(buf, 0, sizeof(buf));
-		sprintf(buf, "NODE_DATA %d",  nid);
-		memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf);
-	}
-	printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
-		nid, (unsigned long)NODE_DATA(nid));
-}
-
 /*
  * Remap memory allocator
  */
@@ -322,76 +234,9 @@ void __init init_alloc_remap(int nid, u64 start, u64 end)
 	       nid, node_pa, node_pa + size, remap_va, remap_va + size);
 }
 
-static int get_memcfg_numaq(void)
-{
-#ifdef CONFIG_X86_NUMAQ
-	int nid;
-
-	if (numa_off)
-		return 0;
-
-	if (numaq_numa_init() < 0) {
-		nodes_clear(numa_nodes_parsed);
-		remove_all_active_ranges();
-		return 0;
-	}
-
-	for_each_node_mask(nid, numa_nodes_parsed)
-		node_set_online(nid);
-	sort_node_map();
-	return 1;
-#else
-	return 0;
-#endif
-}
-
-static int get_memcfg_from_srat(void)
-{
-#ifdef CONFIG_ACPI_NUMA
-	int nid;
-
-	if (numa_off)
-		return 0;
-
-	if (x86_acpi_numa_init() < 0) {
-		nodes_clear(numa_nodes_parsed);
-		remove_all_active_ranges();
-		return 0;
-	}
-
-	for_each_node_mask(nid, numa_nodes_parsed)
-		node_set_online(nid);
-	sort_node_map();
-	return 1;
-#else
-	return 0;
-#endif
-}
-
-static void get_memcfg_numa(void)
-{
-	if (get_memcfg_numaq())
-		return;
-	if (get_memcfg_from_srat())
-		return;
-	get_memcfg_numa_flat();
-}
-
 void __init initmem_init(void)
 {
-	int nid;
-
-	get_memcfg_numa();
-	numa_init_array();
-
-	for_each_online_node(nid) {
-		u64 start = (u64)node_start_pfn[nid] << PAGE_SHIFT;
-		u64 end = min((u64)node_end_pfn[nid] << PAGE_SHIFT,
-			      (u64)max_pfn << PAGE_SHIFT);
-
-		if (start < end)
-			init_alloc_remap(nid, start, end);
-	}
+	x86_numa_init();
 
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
@@ -412,81 +257,9 @@ void __init initmem_init(void)
 
 	printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(max_low_pfn));
-	for_each_online_node(nid)
-		allocate_pgdat(nid);
 
 	printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
-	for_each_online_node(nid)
-		propagate_e820_map_node(nid);
-
-	for_each_online_node(nid) {
-		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
-		NODE_DATA(nid)->node_id = nid;
-	}
 
 	setup_bootmem_allocator();
 }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int paddr_to_nid(u64 addr)
-{
-	int nid;
-	unsigned long pfn = PFN_DOWN(addr);
-
-	for_each_node(nid)
-		if (node_start_pfn[nid] <= pfn &&
-		    pfn < node_end_pfn[nid])
-			return nid;
-
-	return -1;
-}
-
-/*
- * This function is used to ask node id BEFORE memmap and mem_section's
- * initialization (pfn_to_nid() can't be used yet).
- * If _PXM is not defined on ACPI's DSDT, node id must be found by this.
- */
-int memory_add_physaddr_to_nid(u64 addr)
-{
-	int nid = paddr_to_nid(addr);
-	return (nid >= 0) ? nid : 0;
-}
-
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-#endif
-
-/* temporary shim, will go away soon */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
-	unsigned long start_pfn = start >> PAGE_SHIFT;
-	unsigned long end_pfn = end >> PAGE_SHIFT;
-
-	printk(KERN_DEBUG "nid %d start_pfn %08lx end_pfn %08lx\n",
-	       nid, start_pfn, end_pfn);
-
-	if (start >= (u64)max_pfn << PAGE_SHIFT) {
-		printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
-		       start_pfn, end_pfn);
-		return 0;
-	}
-
-	node_set_online(nid);
-	memblock_x86_register_active_regions(nid, start_pfn,
-					     min(end_pfn, max_pfn));
-
-	if (!node_has_online_mem(nid)) {
-		node_start_pfn[nid] = start_pfn;
-		node_end_pfn[nid] = end_pfn;
-	} else {
-		node_start_pfn[nid] = min(node_start_pfn[nid], start_pfn);
-		node_end_pfn[nid] = max(node_end_pfn[nid], end_pfn);
-	}
-	return 0;
-}
-
-/* temporary shim, will go away soon */
-void __init numa_set_distance(int from, int to, int distance)
-{
-	/* nada */
-}
-- 
cgit v1.2.3


From 752d4f372f90a2f6eb562aaffb639957890cbcab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 17:24:48 +0200
Subject: x86, NUMA: Make numa_init_array() static

numa_init_array() no longer has users outside of numa.c.  Make it
static.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/include/asm/numa.h | 2 --
 arch/x86/mm/numa.c          | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 74540865dd83..bfacd2ccf651 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -61,14 +61,12 @@ static inline int numa_cpu_node(int cpu)
 #ifdef CONFIG_NUMA
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
-extern void __init numa_init_array(void);
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_add_cpu(int cpu);
 extern void __cpuinit numa_remove_cpu(int cpu);
 #else	/* CONFIG_NUMA */
 static inline void numa_set_node(int cpu, int node)	{ }
 static inline void numa_clear_node(int cpu)		{ }
-static inline void numa_init_array(void)		{ }
 static inline void init_cpu_to_node(void)		{ }
 static inline void numa_add_cpu(int cpu)		{ }
 static inline void numa_remove_cpu(int cpu)		{ }
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 56ed714ed24f..ecb5685d7d20 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -535,7 +535,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
  * as the number of CPUs is not known yet. We round robin the existing
  * nodes.
  */
-void __init numa_init_array(void)
+static void __init numa_init_array(void)
 {
 	int rr, i;
 
-- 
cgit v1.2.3


From c6f58878204b0414e00b43f9bcebf754284f95b4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 17:24:48 +0200
Subject: x86, NUMA: Rename amdtopology_64.c to amdtopology.c

amdtopology is going to be used by 32bit too drop _64 suffix.  This is
pure rename.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/mm/Makefile         |   2 +-
 arch/x86/mm/amdtopology.c    | 196 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/amdtopology_64.c | 196 -------------------------------------------
 3 files changed, 197 insertions(+), 197 deletions(-)
 create mode 100644 arch/x86/mm/amdtopology.c
 delete mode 100644 arch/x86/mm/amdtopology_64.c

(limited to 'arch')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 62997be33072..3d11327c9ab4 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
 
 obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
-obj-$(CONFIG_AMD_NUMA)		+= amdtopology_64.o
+obj-$(CONFIG_AMD_NUMA)		+= amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
new file mode 100644
index 000000000000..0919c26820d4
--- /dev/null
+++ b/arch/x86/mm/amdtopology.c
@@ -0,0 +1,196 @@
+/*
+ * AMD NUMA support.
+ * Discover the memory map and associated nodes.
+ *
+ * This version reads it directly from the AMD northbridge.
+ *
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/nodemask.h>
+#include <linux/memblock.h>
+
+#include <asm/io.h>
+#include <linux/pci_ids.h>
+#include <linux/acpi.h>
+#include <asm/types.h>
+#include <asm/mmzone.h>
+#include <asm/proto.h>
+#include <asm/e820.h>
+#include <asm/pci-direct.h>
+#include <asm/numa.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/amd_nb.h>
+
+static unsigned char __initdata nodeids[8];
+
+static __init int find_northbridge(void)
+{
+	int num;
+
+	for (num = 0; num < 32; num++) {
+		u32 header;
+
+		header = read_pci_config(0, num, 0, 0x00);
+		if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
+			continue;
+
+		header = read_pci_config(0, num, 1, 0x00);
+		if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
+			header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
+			continue;
+		return num;
+	}
+
+	return -ENOENT;
+}
+
+static __init void early_get_boot_cpu_id(void)
+{
+	/*
+	 * need to get the APIC ID of the BSP so can use that to
+	 * create apicid_to_node in amd_scan_nodes()
+	 */
+#ifdef CONFIG_X86_MPPARSE
+	/*
+	 * get boot-time SMP configuration:
+	 */
+	if (smp_found_config)
+		early_get_smp_config();
+#endif
+}
+
+int __init amd_numa_init(void)
+{
+	unsigned long start = PFN_PHYS(0);
+	unsigned long end = PFN_PHYS(max_pfn);
+	unsigned numnodes;
+	unsigned long prevbase;
+	int i, j, nb;
+	u32 nodeid, reg;
+	unsigned int bits, cores, apicid_base;
+
+	if (!early_pci_allowed())
+		return -EINVAL;
+
+	nb = find_northbridge();
+	if (nb < 0)
+		return nb;
+
+	pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
+
+	reg = read_pci_config(0, nb, 0, 0x60);
+	numnodes = ((reg >> 4) & 0xF) + 1;
+	if (numnodes <= 1)
+		return -ENOENT;
+
+	pr_info("Number of physical nodes %d\n", numnodes);
+
+	prevbase = 0;
+	for (i = 0; i < 8; i++) {
+		unsigned long base, limit;
+
+		base = read_pci_config(0, nb, 1, 0x40 + i*8);
+		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
+
+		nodeids[i] = nodeid = limit & 7;
+		if ((base & 3) == 0) {
+			if (i < numnodes)
+				pr_info("Skipping disabled node %d\n", i);
+			continue;
+		}
+		if (nodeid >= numnodes) {
+			pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+				base, limit);
+			continue;
+		}
+
+		if (!limit) {
+			pr_info("Skipping node entry %d (base %lx)\n",
+				i, base);
+			continue;
+		}
+		if ((base >> 8) & 3 || (limit >> 8) & 3) {
+			pr_err("Node %d using interleaving mode %lx/%lx\n",
+			       nodeid, (base >> 8) & 3, (limit >> 8) & 3);
+			return -EINVAL;
+		}
+		if (node_isset(nodeid, numa_nodes_parsed)) {
+			pr_info("Node %d already present, skipping\n",
+				nodeid);
+			continue;
+		}
+
+		limit >>= 16;
+		limit <<= 24;
+		limit |= (1<<24)-1;
+		limit++;
+
+		if (limit > end)
+			limit = end;
+		if (limit <= base)
+			continue;
+
+		base >>= 16;
+		base <<= 24;
+
+		if (base < start)
+			base = start;
+		if (limit > end)
+			limit = end;
+		if (limit == base) {
+			pr_err("Empty node %d\n", nodeid);
+			continue;
+		}
+		if (limit < base) {
+			pr_err("Node %d bogus settings %lx-%lx.\n",
+			       nodeid, base, limit);
+			continue;
+		}
+
+		/* Could sort here, but pun for now. Should not happen anyroads. */
+		if (prevbase > base) {
+			pr_err("Node map not sorted %lx,%lx\n",
+			       prevbase, base);
+			return -EINVAL;
+		}
+
+		pr_info("Node %d MemBase %016lx Limit %016lx\n",
+			nodeid, base, limit);
+
+		prevbase = base;
+		numa_add_memblk(nodeid, base, limit);
+		node_set(nodeid, numa_nodes_parsed);
+	}
+
+	if (!nodes_weight(numa_nodes_parsed))
+		return -ENOENT;
+
+	/*
+	 * We seem to have valid NUMA configuration.  Map apicids to nodes
+	 * using the coreid bits from early_identify_cpu.
+	 */
+	bits = boot_cpu_data.x86_coreid_bits;
+	cores = 1 << bits;
+	apicid_base = 0;
+
+	/* get the APIC ID of the BSP early for systems with apicid lifting */
+	early_get_boot_cpu_id();
+	if (boot_cpu_physical_apicid > 0) {
+		pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
+		apicid_base = boot_cpu_physical_apicid;
+	}
+
+	for_each_node_mask(i, numa_nodes_parsed)
+		for (j = apicid_base; j < cores + apicid_base; j++)
+			set_apicid_to_node((i << bits) + j, i);
+
+	return 0;
+}
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
deleted file mode 100644
index 0919c26820d4..000000000000
--- a/arch/x86/mm/amdtopology_64.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * AMD NUMA support.
- * Discover the memory map and associated nodes.
- *
- * This version reads it directly from the AMD northbridge.
- *
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <linux/memblock.h>
-
-#include <asm/io.h>
-#include <linux/pci_ids.h>
-#include <linux/acpi.h>
-#include <asm/types.h>
-#include <asm/mmzone.h>
-#include <asm/proto.h>
-#include <asm/e820.h>
-#include <asm/pci-direct.h>
-#include <asm/numa.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <asm/amd_nb.h>
-
-static unsigned char __initdata nodeids[8];
-
-static __init int find_northbridge(void)
-{
-	int num;
-
-	for (num = 0; num < 32; num++) {
-		u32 header;
-
-		header = read_pci_config(0, num, 0, 0x00);
-		if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
-			header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
-			header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
-			continue;
-
-		header = read_pci_config(0, num, 1, 0x00);
-		if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
-			header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
-			header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
-			continue;
-		return num;
-	}
-
-	return -ENOENT;
-}
-
-static __init void early_get_boot_cpu_id(void)
-{
-	/*
-	 * need to get the APIC ID of the BSP so can use that to
-	 * create apicid_to_node in amd_scan_nodes()
-	 */
-#ifdef CONFIG_X86_MPPARSE
-	/*
-	 * get boot-time SMP configuration:
-	 */
-	if (smp_found_config)
-		early_get_smp_config();
-#endif
-}
-
-int __init amd_numa_init(void)
-{
-	unsigned long start = PFN_PHYS(0);
-	unsigned long end = PFN_PHYS(max_pfn);
-	unsigned numnodes;
-	unsigned long prevbase;
-	int i, j, nb;
-	u32 nodeid, reg;
-	unsigned int bits, cores, apicid_base;
-
-	if (!early_pci_allowed())
-		return -EINVAL;
-
-	nb = find_northbridge();
-	if (nb < 0)
-		return nb;
-
-	pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
-
-	reg = read_pci_config(0, nb, 0, 0x60);
-	numnodes = ((reg >> 4) & 0xF) + 1;
-	if (numnodes <= 1)
-		return -ENOENT;
-
-	pr_info("Number of physical nodes %d\n", numnodes);
-
-	prevbase = 0;
-	for (i = 0; i < 8; i++) {
-		unsigned long base, limit;
-
-		base = read_pci_config(0, nb, 1, 0x40 + i*8);
-		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
-
-		nodeids[i] = nodeid = limit & 7;
-		if ((base & 3) == 0) {
-			if (i < numnodes)
-				pr_info("Skipping disabled node %d\n", i);
-			continue;
-		}
-		if (nodeid >= numnodes) {
-			pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
-				base, limit);
-			continue;
-		}
-
-		if (!limit) {
-			pr_info("Skipping node entry %d (base %lx)\n",
-				i, base);
-			continue;
-		}
-		if ((base >> 8) & 3 || (limit >> 8) & 3) {
-			pr_err("Node %d using interleaving mode %lx/%lx\n",
-			       nodeid, (base >> 8) & 3, (limit >> 8) & 3);
-			return -EINVAL;
-		}
-		if (node_isset(nodeid, numa_nodes_parsed)) {
-			pr_info("Node %d already present, skipping\n",
-				nodeid);
-			continue;
-		}
-
-		limit >>= 16;
-		limit <<= 24;
-		limit |= (1<<24)-1;
-		limit++;
-
-		if (limit > end)
-			limit = end;
-		if (limit <= base)
-			continue;
-
-		base >>= 16;
-		base <<= 24;
-
-		if (base < start)
-			base = start;
-		if (limit > end)
-			limit = end;
-		if (limit == base) {
-			pr_err("Empty node %d\n", nodeid);
-			continue;
-		}
-		if (limit < base) {
-			pr_err("Node %d bogus settings %lx-%lx.\n",
-			       nodeid, base, limit);
-			continue;
-		}
-
-		/* Could sort here, but pun for now. Should not happen anyroads. */
-		if (prevbase > base) {
-			pr_err("Node map not sorted %lx,%lx\n",
-			       prevbase, base);
-			return -EINVAL;
-		}
-
-		pr_info("Node %d MemBase %016lx Limit %016lx\n",
-			nodeid, base, limit);
-
-		prevbase = base;
-		numa_add_memblk(nodeid, base, limit);
-		node_set(nodeid, numa_nodes_parsed);
-	}
-
-	if (!nodes_weight(numa_nodes_parsed))
-		return -ENOENT;
-
-	/*
-	 * We seem to have valid NUMA configuration.  Map apicids to nodes
-	 * using the coreid bits from early_identify_cpu.
-	 */
-	bits = boot_cpu_data.x86_coreid_bits;
-	cores = 1 << bits;
-	apicid_base = 0;
-
-	/* get the APIC ID of the BSP early for systems with apicid lifting */
-	early_get_boot_cpu_id();
-	if (boot_cpu_physical_apicid > 0) {
-		pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
-		apicid_base = boot_cpu_physical_apicid;
-	}
-
-	for_each_node_mask(i, numa_nodes_parsed)
-		for (j = apicid_base; j < cores + apicid_base; j++)
-			set_apicid_to_node((i << bits) + j, i);
-
-	return 0;
-}
-- 
cgit v1.2.3


From 2706a0bf7b02693ed88752df877f10c2206292ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 17:24:48 +0200
Subject: x86, NUMA: Enable CONFIG_AMD_NUMA on 32bit too

Now that NUMA init path is unified, amdtopology can be enabled on
32bit.  Make amdtopology.c safe on 32bit by explicitly using u64 and
drop X86_64 dependency from Kconfig.

Inclusion of bootmem.h is added for max_pfn declaration.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/Kconfig          |  2 +-
 arch/x86/mm/amdtopology.c | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8db4fbf30b59..50cb68d2901d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1174,7 +1174,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 config AMD_NUMA
 	def_bool y
 	prompt "Old style AMD Opteron NUMA detection"
-	depends on X86_64 && NUMA && PCI
+	depends on NUMA && PCI
 	---help---
 	  Enable AMD NUMA node topology detection.  You should say Y here if
 	  you have a multi processor AMD system. This uses an old method to
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 0919c26820d4..5247d01329ca 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <linux/pci_ids.h>
@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void)
 
 int __init amd_numa_init(void)
 {
-	unsigned long start = PFN_PHYS(0);
-	unsigned long end = PFN_PHYS(max_pfn);
+	u64 start = PFN_PHYS(0);
+	u64 end = PFN_PHYS(max_pfn);
 	unsigned numnodes;
-	unsigned long prevbase;
+	u64 prevbase;
 	int i, j, nb;
 	u32 nodeid, reg;
 	unsigned int bits, cores, apicid_base;
@@ -95,7 +96,7 @@ int __init amd_numa_init(void)
 
 	prevbase = 0;
 	for (i = 0; i < 8; i++) {
-		unsigned long base, limit;
+		u64 base, limit;
 
 		base = read_pci_config(0, nb, 1, 0x40 + i*8);
 		limit = read_pci_config(0, nb, 1, 0x44 + i*8);
@@ -107,18 +108,18 @@ int __init amd_numa_init(void)
 			continue;
 		}
 		if (nodeid >= numnodes) {
-			pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+			pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid,
 				base, limit);
 			continue;
 		}
 
 		if (!limit) {
-			pr_info("Skipping node entry %d (base %lx)\n",
+			pr_info("Skipping node entry %d (base %Lx)\n",
 				i, base);
 			continue;
 		}
 		if ((base >> 8) & 3 || (limit >> 8) & 3) {
-			pr_err("Node %d using interleaving mode %lx/%lx\n",
+			pr_err("Node %d using interleaving mode %Lx/%Lx\n",
 			       nodeid, (base >> 8) & 3, (limit >> 8) & 3);
 			return -EINVAL;
 		}
@@ -150,19 +151,19 @@ int __init amd_numa_init(void)
 			continue;
 		}
 		if (limit < base) {
-			pr_err("Node %d bogus settings %lx-%lx.\n",
+			pr_err("Node %d bogus settings %Lx-%Lx.\n",
 			       nodeid, base, limit);
 			continue;
 		}
 
 		/* Could sort here, but pun for now. Should not happen anyroads. */
 		if (prevbase > base) {
-			pr_err("Node map not sorted %lx,%lx\n",
+			pr_err("Node map not sorted %Lx,%Lx\n",
 			       prevbase, base);
 			return -EINVAL;
 		}
 
-		pr_info("Node %d MemBase %016lx Limit %016lx\n",
+		pr_info("Node %d MemBase %016Lx Limit %016Lx\n",
 			nodeid, base, limit);
 
 		prevbase = base;
-- 
cgit v1.2.3


From 1b7e03ef7570568d2fb9e6640d7006a0edd728f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 May 2011 17:24:48 +0200
Subject: x86, NUMA: Enable emulation on 32bit too

Now that NUMA init path is unified, NUMA emulation can be enabled on
32bit.  Make numa_emluation.c safe on 32bit by doing the followings.

* Define MAX_DMA32_PFN on 32bit too.

* Include bootmem.h for max_pfn declaration.

* Use u64 explicitly and always use PFN_PHYS() when converting page
  number to address.

* Avoid __udivdi3() generation on 32bit by doing number of pages
  calculation instead in split_nodes_interleave().

And drop X86_64 dependency from Kconfig.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/Kconfig             |  2 +-
 arch/x86/include/asm/dma.h   | 10 +++-------
 arch/x86/mm/numa_emulation.c | 16 +++++++++++-----
 3 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50cb68d2901d..648fca42ae6a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1201,7 +1201,7 @@ config NODES_SPAN_OTHER_NODES
 
 config NUMA_EMU
 	bool "NUMA emulation"
-	depends on X86_64 && NUMA
+	depends on NUMA
 	---help---
 	  Enable NUMA emulation. A flat machine will be split
 	  into virtual nodes when booted with "numa=fake=N", where N is the
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index d1a314b610f6..0bdb0c54d9a1 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -72,19 +72,15 @@
 /* 16MB ISA DMA zone */
 #define MAX_DMA_PFN   ((16 * 1024 * 1024) >> PAGE_SHIFT)
 
-#ifdef CONFIG_X86_32
+/* 4GB broken PCI/AGP hardware bus master zone */
+#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
 
+#ifdef CONFIG_X86_32
 /* The maximum address that we can perform a DMA transfer to on this platform */
 #define MAX_DMA_ADDRESS      (PAGE_OFFSET + 0x1000000)
-
 #else
-
-/* 4GB broken PCI/AGP hardware bus master zone */
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
-
 /* Compat define for old dma zone */
 #define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
-
 #endif
 
 /* 8237 DMA controllers */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index de84cc140379..d0ed086b6247 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -5,6 +5,7 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 #include <asm/dma.h>
 
 #include "numa_internal.h"
@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
 		nr_nodes = MAX_NUMNODES;
 	}
 
-	size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
+	/*
+	 * Calculate target node size.  x86_32 freaks on __udivdi3() so do
+	 * the division in ulong number of pages and convert back.
+	 */
+	size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
+	size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
+
 	/*
 	 * Calculate the number of big nodes that can be allocated as a result
 	 * of consolidating the remainder.
@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
 	 */
 	while (nodes_weight(physnode_mask)) {
 		for_each_node_mask(i, physnode_mask) {
-			u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+			u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
 			u64 start, limit, end;
 			int phys_blk;
 
@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 {
 	static struct numa_meminfo ei __initdata;
 	static struct numa_meminfo pi __initdata;
-	const u64 max_addr = max_pfn << PAGE_SHIFT;
+	const u64 max_addr = PFN_PHYS(max_pfn);
 	u8 *phys_dist = NULL;
 	size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
 	int max_emu_nid, dfl_phys_nid;
@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	if (numa_dist_cnt) {
 		u64 phys;
 
-		phys = memblock_find_in_range(0,
-					      (u64)max_pfn_mapped << PAGE_SHIFT,
+		phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 					      phys_size, PAGE_SIZE);
 		if (phys == MEMBLOCK_ERROR) {
 			pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
-- 
cgit v1.2.3


From a56bca80db8903bb557b9ac38da68dc5b98ea672 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 2 May 2011 17:24:49 +0200
Subject: x86, NUMA: Rename setup_node_bootmem() to setup_node_data()

After using memblock to replace bootmem, that function only sets up
node_data now.

Change the name to reflect what it actually does.

tj: Minor adjustment to the patch description.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/mm/numa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ecb5685d7d20..9a0ed312b830 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -189,8 +189,8 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
 	return numa_add_memblk_to(nid, start, end, &numa_meminfo);
 }
 
-/* Initialize bootmem allocator for a node */
-static void __init setup_node_bootmem(int nid, u64 start, u64 end)
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start, u64 end)
 {
 	const u64 nd_low = PFN_PHYS(MAX_DMA_PFN);
 	const u64 nd_high = PFN_PHYS(max_pfn_mapped);
@@ -522,7 +522,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
 		}
 
 		if (start < end)
-			setup_node_bootmem(nid, start, end);
+			setup_node_data(nid, start, end);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From e5a10c1bd12a5d71bbb6406c1b0dbbc9d8958397 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 2 May 2011 17:24:49 +0200
Subject: x86, NUMA: Trim numa meminfo with max_pfn in a separate loop

During testing 32bit numa unifying code from tj, found one system with
more than 64g fails to use numa.  It turns out we do not trim numa
meminfo correctly against max_pfn in case start address of a node is
higher than 64GiB.  Bug fix made it to tip tree.

This patch moves the checking and trimming to a separate loop.  So we
don't need to compare low/high in following merge loops.  It makes the
code more readable.

Also it makes the node merge printouts less strange.  On a 512GiB numa
system with 32bit,

before:
> NUMA: Node 0 [0,a0000) + [100000,80000000) -> [0,80000000)
> NUMA: Node 0 [0,80000000) + [100000000,1080000000) -> [0,1000000000)

after:
> NUMA: Node 0 [0,a0000) + [100000,80000000) -> [0,80000000)
> NUMA: Node 0 [0,80000000) + [100000000,1000000000) -> [0,1000000000)

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
[Updated patch description and comment slightly.]
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/mm/numa.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 9a0ed312b830..f5510d889a22 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -270,6 +270,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 	const u64 high = PFN_PHYS(max_pfn);
 	int i, j, k;
 
+	/* first, trim all entries */
 	for (i = 0; i < mi->nr_blks; i++) {
 		struct numa_memblk *bi = &mi->blk[i];
 
@@ -278,10 +279,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 		bi->end = min(bi->end, high);
 
 		/* and there's no empty block */
-		if (bi->start >= bi->end) {
+		if (bi->start >= bi->end)
 			numa_remove_memblk_from(i--, mi);
-			continue;
-		}
+	}
+
+	/* merge neighboring / overlapping entries */
+	for (i = 0; i < mi->nr_blks; i++) {
+		struct numa_memblk *bi = &mi->blk[i];
 
 		for (j = i + 1; j < mi->nr_blks; j++) {
 			struct numa_memblk *bj = &mi->blk[j];
@@ -311,8 +315,8 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 			 */
 			if (bi->nid != bj->nid)
 				continue;
-			start = max(min(bi->start, bj->start), low);
-			end = min(max(bi->end, bj->end), high);
+			start = min(bi->start, bj->start);
+			end = max(bi->end, bj->end);
 			for (k = 0; k < mi->nr_blks; k++) {
 				struct numa_memblk *bk = &mi->blk[k];
 
@@ -332,6 +336,7 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 		}
 	}
 
+	/* clear unused ones */
 	for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
 		mi->blk[i].start = mi->blk[i].end = 0;
 		mi->blk[i].nid = NUMA_NO_NODE;
-- 
cgit v1.2.3


From a38647837a411f7df79623128421eef2118b5884 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 29 Apr 2011 11:34:00 -0400
Subject: xen/mmu: Add workaround "x86-64, mm: Put early page table high"

As a consequence of the commit:

commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
Author: Yinghai Lu <yinghai@kernel.org>
Date:   Fri Dec 17 16:58:28 2010 -0800

    x86-64, mm: Put early page table high

it causes the Linux kernel to crash under Xen:

mapping kernel into physical memory
Xen: setup ISA identity maps
about to get started...
(XEN) mm.c:2466:d0 Bad type (saw 7400000000000001 != exp 1000000000000000) for mfn b1d89 (pfn bacf7)
(XEN) mm.c:3027:d0 Error while pinning mfn b1d89
(XEN) traps.c:481:d0 Unhandled invalid opcode fault/trap [#6] on VCPU 0 [ec=0000]
(XEN) domain_crash_sync called from entry.S
(XEN) Domain 0 (vcpu#0) crashed on cpu#0:
...

The reason is that at some point init_memory_mapping is going to reach
the pagetable pages area and map those pages too (mapping them as normal
memory that falls in the range of addresses passed to init_memory_mapping
as argument). Some of those pages are already pagetable pages (they are
in the range pgt_buf_start-pgt_buf_end) therefore they are going to be
mapped RO and everything is fine.
Some of these pages are not pagetable pages yet (they fall in the range
pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
are going to be mapped RW.  When these pages become pagetable pages and
are hooked into the pagetable, xen will find that the guest has already
a RW mapping of them somewhere and fail the operation.
The reason Xen requires pagetables to be RO is that the hypervisor needs
to verify that the pagetables are valid before using them. The validation
operations are called "pinning" (more details in arch/x86/xen/mmu.c).

In order to fix the issue we mark all the pages in the entire range
pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
is completed only the range pgt_buf_start-pgt_buf_end is reserved by
init_memory_mapping. Hence the kernel is going to crash as soon as one
of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
ranges are RO).

For this reason, this function is introduced which is called _after_
the init_memory_mapping has completed (in a perfect world we would
call this function from init_memory_mapping, but lets ignore that).

Because we are called _after_ init_memory_mapping the pgt_buf_[start,
end,top] have all changed to new values (b/c another init_memory_mapping
is called). Hence, the first time we enter this function, we save
away the pgt_buf_start value and update the pgt_buf_[end,top].

When we detect that the "old" pgt_buf_start through pgt_buf_end
PFNs have been reserved (so memblock_x86_reserve_range has been called),
we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.

And then we update those "old" pgt_buf_[end|top] with the new ones
so that we can redo this on the next pagetable.

Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Reviewed-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
[v1: Updated with Jeremy's comments]
[v2: Added the crash output]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aef7af92b28b..1bca25f60ff2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1463,6 +1463,119 @@ static int xen_pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
+#ifdef CONFIG_X86_64
+static __initdata u64 __last_pgt_set_rw = 0;
+static __initdata u64 __pgt_buf_start = 0;
+static __initdata u64 __pgt_buf_end = 0;
+static __initdata u64 __pgt_buf_top = 0;
+/*
+ * As a consequence of the commit:
+ * 
+ * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
+ * Author: Yinghai Lu <yinghai@kernel.org>
+ * Date:   Fri Dec 17 16:58:28 2010 -0800
+ * 
+ *     x86-64, mm: Put early page table high
+ * 
+ * at some point init_memory_mapping is going to reach the pagetable pages
+ * area and map those pages too (mapping them as normal memory that falls
+ * in the range of addresses passed to init_memory_mapping as argument).
+ * Some of those pages are already pagetable pages (they are in the range
+ * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
+ * everything is fine.
+ * Some of these pages are not pagetable pages yet (they fall in the range
+ * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
+ * are going to be mapped RW.  When these pages become pagetable pages and
+ * are hooked into the pagetable, xen will find that the guest has already
+ * a RW mapping of them somewhere and fail the operation.
+ * The reason Xen requires pagetables to be RO is that the hypervisor needs
+ * to verify that the pagetables are valid before using them. The validation
+ * operations are called "pinning".
+ * 
+ * In order to fix the issue we mark all the pages in the entire range
+ * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
+ * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
+ * init_memory_mapping. Hence the kernel is going to crash as soon as one
+ * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
+ * ranges are RO).
+ * 
+ * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
+ * the init_memory_mapping has completed (in a perfect world we would
+ * call this function from init_memory_mapping, but lets ignore that).
+ * 
+ * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
+ * end,top] have all changed to new values (b/c init_memory_mapping
+ * is called and setting up another new page-table). Hence, the first time
+ * we enter this function, we save away the pgt_buf_start value and update
+ * the pgt_buf_[end,top].
+ * 
+ * When we detect that the "old" pgt_buf_start through pgt_buf_end
+ * PFNs have been reserved (so memblock_x86_reserve_range has been called),
+ * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
+ * 
+ * And then we update those "old" pgt_buf_[end|top] with the new ones
+ * so that we can redo this on the next pagetable.
+ */
+static __init void mark_rw_past_pgt(void) {
+
+	if (pgt_buf_end > pgt_buf_start) {
+		u64 addr, size;
+
+		/* Save it away. */
+		if (!__pgt_buf_start) {
+			__pgt_buf_start = pgt_buf_start;
+			__pgt_buf_end = pgt_buf_end;
+			__pgt_buf_top = pgt_buf_top;
+			return;
+		}
+		/* If we get the range that starts at __pgt_buf_end that means
+		 * the range is reserved, and that in 'init_memory_mapping'
+		 * the 'memblock_x86_reserve_range' has been called with the
+		 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
+		 * pgt_buf_[start|end|top] refer now to a new pagetable.
+		 * Note: we are called _after_ the pgt_buf_[..] have been
+		 * updated.*/
+
+		addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
+						       &size, PAGE_SIZE);
+
+		/* Still not reserved, meaning 'memblock_x86_reserve_range'
+		 * hasn't been called yet. Update the _end and _top.*/
+		if (addr == PFN_PHYS(__pgt_buf_start)) {
+			__pgt_buf_end = pgt_buf_end;
+			__pgt_buf_top = pgt_buf_top;
+			return;
+		}
+
+		/* OK, the area is reserved, meaning it is time for us to
+		 * set RW for the old end->top PFNs. */
+
+		/* ..unless we had already done this. */
+		if (__pgt_buf_end == __last_pgt_set_rw)
+			return;
+
+		addr = PFN_PHYS(__pgt_buf_end);
+		
+		/* set as RW the rest */
+		printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
+			PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
+		
+		while (addr < PFN_PHYS(__pgt_buf_top)) {
+			make_lowmem_page_readwrite(__va(addr));
+			addr += PAGE_SIZE;
+		}
+		/* And update everything so that we are ready for the next
+		 * pagetable (the one created for regions past 4GB) */
+		__last_pgt_set_rw = __pgt_buf_end;
+		__pgt_buf_start = pgt_buf_start;
+		__pgt_buf_end = pgt_buf_end;
+		__pgt_buf_top = pgt_buf_top;
+	}
+	return;
+}
+#else
+static __init void mark_rw_past_pgt(void) { }
+#endif
 static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 #ifdef CONFIG_X86_64
@@ -1488,6 +1601,14 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	unsigned long pfn = pte_pfn(pte);
 
+	/*
+	 * A bit of optimization. We do not need to call the workaround
+	 * when xen_set_pte_init is called with a PTE with 0 as PFN.
+	 * That is b/c the pagetable at that point are just being populated
+	 * with empty values and we can save some cycles by not calling
+	 * the 'memblock' code.*/
+	if (pfn)
+		mark_rw_past_pgt();
 	/*
 	 * If the new pfn is within the range of the newly allocated
 	 * kernel pagetable, and it isn't being mapped into an
@@ -1997,6 +2118,8 @@ __init void xen_ident_map_ISA(void)
 
 static __init void xen_post_allocator_init(void)
 {
+	mark_rw_past_pgt();
+
 #ifdef CONFIG_XEN_DEBUG
 	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
 #endif
-- 
cgit v1.2.3


From b9269dc7bfdf8c985971c09f2dcb2aa04ad7986d Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 12 Apr 2011 12:19:49 +0100
Subject: xen: mask_rw_pte mark RO all pagetable pages up to pgt_buf_top

mask_rw_pte is currently checking if a pfn is a pagetable page if it
falls in the range pgt_buf_start - pgt_buf_end but that is incorrect
because pgt_buf_end is a moving target: pgt_buf_top is the real
boundary.

Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 1bca25f60ff2..55c965b38c27 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1616,7 +1616,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	 * it is RO.
 	 */
 	if (((!is_early_ioremap_ptep(ptep) &&
-			pfn >= pgt_buf_start && pfn < pgt_buf_end)) ||
+			pfn >= pgt_buf_start && pfn < pgt_buf_top)) ||
 			(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
 		pte = pte_wrprotect(pte);
 
-- 
cgit v1.2.3


From 7806a49ab625ebeb1709e5e87299b64932b807a7 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 2 May 2011 14:33:24 -0700
Subject: x86, reboot: Fix relocations in reboot_32.S

The use of base for %ebx in this file is arbitrary, *except* that we
also use it to compute the real-mode segment.  Therefore, make it so
that r_base really is the true address to which %ebx points.

This resolves kernel bugzilla 33302.

Reported-and-tested-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/n/tip-08os5wi3yq1no0y4i5m4z7he@git.kernel.org
---
 arch/x86/kernel/reboot_32.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
index 29092b38d816..1d5c46df0d78 100644
--- a/arch/x86/kernel/reboot_32.S
+++ b/arch/x86/kernel/reboot_32.S
@@ -21,26 +21,26 @@ r_base = .
 	/* Get our own relocated address */
 	call	1f
 1:	popl	%ebx
-	subl	$1b, %ebx
+	subl	$(1b - r_base), %ebx
 
 	/* Compute the equivalent real-mode segment */
 	movl	%ebx, %ecx
 	shrl	$4, %ecx
 	
 	/* Patch post-real-mode segment jump */
-	movw	dispatch_table(%ebx,%eax,2),%ax
-	movw	%ax, 101f(%ebx)
-	movw	%cx, 102f(%ebx)
+	movw	(dispatch_table - r_base)(%ebx,%eax,2),%ax
+	movw	%ax, (101f - r_base)(%ebx)
+	movw	%cx, (102f - r_base)(%ebx)
 
 	/* Set up the IDT for real mode. */
-	lidtl	machine_real_restart_idt(%ebx)
+	lidtl	(machine_real_restart_idt - r_base)(%ebx)
 
 	/*
 	 * Set up a GDT from which we can load segment descriptors for real
 	 * mode.  The GDT is not used in real mode; it is just needed here to
 	 * prepare the descriptors.
 	 */
-	lgdtl	machine_real_restart_gdt(%ebx)
+	lgdtl	(machine_real_restart_gdt - r_base)(%ebx)
 
 	/*
 	 * Load the data segment registers with 16-bit compatible values
-- 
cgit v1.2.3


From 476eb4912601a8c01e6702b9a029f476b4b131d2 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 4 May 2011 15:02:15 +1000
Subject: powerpc/irq: Stop exporting irq_map

First step in eliminating irq_map[] table entirely

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/irq.h                   | 21 +------
 arch/powerpc/kernel/irq.c                        | 26 +++++++-
 arch/powerpc/platforms/512x/mpc5121_ads_cpld.c   |  4 +-
 arch/powerpc/platforms/52xx/media5200.c          |  4 +-
 arch/powerpc/platforms/52xx/mpc52xx_pic.c        | 79 ++++--------------------
 arch/powerpc/platforms/82xx/pq2ads-pci-pic.c     |  4 +-
 arch/powerpc/platforms/85xx/socrates_fpga_pic.c  | 26 ++------
 arch/powerpc/platforms/86xx/gef_pic.c            | 10 +--
 arch/powerpc/platforms/8xx/m8xx_setup.c          |  2 +-
 arch/powerpc/platforms/cell/axon_msi.c           |  2 +-
 arch/powerpc/platforms/cell/spider-pic.c         | 10 +--
 arch/powerpc/platforms/embedded6xx/flipper-pic.c |  8 +--
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c    |  8 +--
 arch/powerpc/platforms/iseries/irq.c             | 10 +--
 arch/powerpc/platforms/powermac/pic.c            | 12 ++--
 arch/powerpc/platforms/pseries/ras.c             |  4 +-
 arch/powerpc/sysdev/cpm1.c                       |  8 +--
 arch/powerpc/sysdev/cpm2_pic.c                   | 10 +--
 arch/powerpc/sysdev/ipic.c                       | 16 +++--
 arch/powerpc/sysdev/mpc8xx_pic.c                 | 10 +--
 arch/powerpc/sysdev/mpc8xxx_gpio.c               | 12 ++--
 arch/powerpc/sysdev/mpic.c                       | 28 ++++-----
 arch/powerpc/sysdev/mv64x60_pic.c                | 14 ++---
 arch/powerpc/sysdev/qe_lib/qe_ic.c               |  6 +-
 arch/powerpc/sysdev/uic.c                        | 12 ++--
 arch/powerpc/sysdev/xics/icp-hv.c                |  2 +-
 arch/powerpc/sysdev/xics/icp-native.c            |  2 +-
 arch/powerpc/sysdev/xics/ics-rtas.c              |  8 +--
 arch/powerpc/sysdev/xics/xics-common.c           |  4 +-
 arch/powerpc/sysdev/xilinx_intc.c                |  8 +--
 30 files changed, 148 insertions(+), 222 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 47b7905a6369..e1983d577688 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -128,25 +128,10 @@ struct irq_host {
 	struct device_node	*of_node;
 };
 
-/* The main irq map itself is an array of NR_IRQ entries containing the
- * associate host and irq number. An entry with a host of NULL is free.
- * An entry can be allocated if it's free, the allocator always then sets
- * hwirq first to the host's invalid irq number and then fills ops.
- */
-struct irq_map_entry {
-	irq_hw_number_t	hwirq;
-	struct irq_host	*host;
-};
-
-extern struct irq_map_entry irq_map[NR_IRQS];
-
+struct irq_data;
+extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
-
-/* This will eventually -replace- virq_to_hw if/when we stash the
- * HW number in the irq_data itself. We use a macro so we can inline
- * it as irq_data isn't defined yet
- */
-#define irq_data_to_hw(d) (irq_map[(d)->irq].hwirq)
+extern struct irq_host *virq_to_host(unsigned int virq);
 
 /**
  * irq_alloc_host - Allocate a new irq_host data structure
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4f5d6e751a65..a81dd74414bf 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -481,20 +481,42 @@ void do_softirq(void)
  * IRQ controller and virtual interrupts
  */
 
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+	irq_hw_number_t	hwirq;
+	struct irq_host	*host;
+};
+
 static LIST_HEAD(irq_hosts);
 static DEFINE_RAW_SPINLOCK(irq_big_lock);
 static unsigned int revmap_trees_allocated;
 static DEFINE_MUTEX(revmap_trees_mutex);
-struct irq_map_entry irq_map[NR_IRQS];
+static struct irq_map_entry irq_map[NR_IRQS];
 static unsigned int irq_virq_count = NR_IRQS;
 static struct irq_host *irq_default_host;
 
+irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
+{
+	return irq_map[d->irq].hwirq;
+}
+EXPORT_SYMBOL_GPL(irqd_to_hwirq);
+
 irq_hw_number_t virq_to_hw(unsigned int virq)
 {
 	return irq_map[virq].hwirq;
 }
 EXPORT_SYMBOL_GPL(virq_to_hw);
 
+struct irq_host *virq_to_host(unsigned int virq)
+{
+	return irq_map[virq].host;
+}
+EXPORT_SYMBOL_GPL(virq_to_host);
+
 static int default_irq_host_match(struct irq_host *h, struct device_node *np)
 {
 	return h->of_node != NULL && h->of_node == np;
@@ -1103,7 +1125,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 			struct irq_chip *chip;
 
 			seq_printf(m, "%5d  ", i);
-			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
+			seq_printf(m, "0x%05lx  ", irq_map[i].hwirq);
 
 			chip = irq_desc_get_chip(desc);
 			if (chip && chip->name)
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index cfc4b2009982..a8bc0d443934 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -61,7 +61,7 @@ irq_to_pic_bit(unsigned int irq)
 static void
 cpld_mask_irq(struct irq_data *d)
 {
-	unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
 	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
 
 	out_8(pic_mask,
@@ -71,7 +71,7 @@ cpld_mask_irq(struct irq_data *d)
 static void
 cpld_unmask_irq(struct irq_data *d)
 {
-	unsigned int cpld_irq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
 	void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
 
 	out_8(pic_mask,
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 57a6a349e932..96f85e5e0cd3 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -56,7 +56,7 @@ static void media5200_irq_unmask(struct irq_data *d)
 
 	spin_lock_irqsave(&media5200_irq.lock, flags);
 	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
-	val |= 1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq);
+	val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
 	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
 	spin_unlock_irqrestore(&media5200_irq.lock, flags);
 }
@@ -68,7 +68,7 @@ static void media5200_irq_mask(struct irq_data *d)
 
 	spin_lock_irqsave(&media5200_irq.lock, flags);
 	val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
-	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irq_map[d->irq].hwirq));
+	val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
 	out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
 	spin_unlock_irqrestore(&media5200_irq.lock, flags);
 }
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 1dd15400f6f0..bb611819b832 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -157,48 +157,30 @@ static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
  */
 static void mpc52xx_extirq_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->ctrl, 11 - l2irq);
 }
 
 static void mpc52xx_extirq_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->ctrl, 11 - l2irq);
 }
 
 static void mpc52xx_extirq_ack(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->ctrl, 27-l2irq);
 }
 
 static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
 {
 	u32 ctrl_reg, type;
-	int irq;
-	int l2irq;
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	void *handler = handle_level_irq;
 
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
-	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, irq, l2irq, flow_type);
+	pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+		(int) irqd_to_hwirq(d), l2irq, flow_type);
 
 	switch (flow_type) {
 	case IRQF_TRIGGER_HIGH: type = 0; break;
@@ -237,23 +219,13 @@ static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
 
 static void mpc52xx_main_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->main_mask, 16 - l2irq);
 }
 
 static void mpc52xx_main_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->main_mask, 16 - l2irq);
 }
 
@@ -270,23 +242,13 @@ static struct irq_chip mpc52xx_main_irqchip = {
  */
 static void mpc52xx_periph_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&intr->per_mask, 31 - l2irq);
 }
 
 static void mpc52xx_periph_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&intr->per_mask, 31 - l2irq);
 }
 
@@ -303,34 +265,19 @@ static struct irq_chip mpc52xx_periph_irqchip = {
  */
 static void mpc52xx_sdma_mask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_setbit(&sdma->IntMask, l2irq);
 }
 
 static void mpc52xx_sdma_unmask(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	io_be_clrbit(&sdma->IntMask, l2irq);
 }
 
 static void mpc52xx_sdma_ack(struct irq_data *d)
 {
-	int irq;
-	int l2irq;
-
-	irq = irq_map[d->irq].hwirq;
-	l2irq = irq & MPC52xx_IRQ_L2_MASK;
-
+	int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
 	out_be32(&sdma->IntPend, 1 << l2irq);
 }
 
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 4a4eb6ffa12f..5d6c34ce0cba 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -42,7 +42,7 @@ struct pq2ads_pci_pic {
 static void pq2ads_pci_mask_irq(struct irq_data *d)
 {
 	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
-	int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
 
 	if (irq != -1) {
 		unsigned long flags;
@@ -58,7 +58,7 @@ static void pq2ads_pci_mask_irq(struct irq_data *d)
 static void pq2ads_pci_unmask_irq(struct irq_data *d)
 {
 	struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
-	int irq = NUM_IRQS - virq_to_hw(d->irq) - 1;
+	int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
 
 	if (irq != -1) {
 		unsigned long flags;
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index db864623b4ae..12cb9bb2cc68 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -48,8 +48,6 @@ static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
 	[8] = {0, IRQ_TYPE_LEVEL_HIGH},
 };
 
-#define socrates_fpga_irq_to_hw(virq)    ((unsigned int)irq_map[virq].hwirq)
-
 static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
 
 static void __iomem *socrates_fpga_pic_iobase;
@@ -110,11 +108,9 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
 static void socrates_fpga_pic_ack(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq, irq_line;
+	unsigned int irq_line, hwirq = irqd_to_hwirq(d);
 	uint32_t mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -127,12 +123,10 @@ static void socrates_fpga_pic_ack(struct irq_data *d)
 static void socrates_fpga_pic_mask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -145,12 +139,10 @@ static void socrates_fpga_pic_mask(struct irq_data *d)
 static void socrates_fpga_pic_mask_ack(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -164,12 +156,10 @@ static void socrates_fpga_pic_mask_ack(struct irq_data *d)
 static void socrates_fpga_pic_unmask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -182,12 +172,10 @@ static void socrates_fpga_pic_unmask(struct irq_data *d)
 static void socrates_fpga_pic_eoi(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int irq_line;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	irq_line = fpga_irqs[hwirq].irq_line;
 	raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
 	mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
@@ -201,12 +189,10 @@ static int socrates_fpga_pic_set_type(struct irq_data *d,
 		unsigned int flow_type)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	int polarity;
 	u32 mask;
 
-	hwirq = socrates_fpga_irq_to_hw(d->irq);
-
 	if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
 		return -EINVAL;
 
diff --git a/arch/powerpc/platforms/86xx/gef_pic.c b/arch/powerpc/platforms/86xx/gef_pic.c
index 0beec7d5566b..94594e58594c 100644
--- a/arch/powerpc/platforms/86xx/gef_pic.c
+++ b/arch/powerpc/platforms/86xx/gef_pic.c
@@ -46,8 +46,6 @@
 #define GEF_PIC_CPU0_MCP_MASK	GEF_PIC_MCP_MASK(0)
 #define GEF_PIC_CPU1_MCP_MASK	GEF_PIC_MCP_MASK(1)
 
-#define gef_irq_to_hw(virq)    ((unsigned int)irq_map[virq].hwirq)
-
 
 static DEFINE_RAW_SPINLOCK(gef_pic_lock);
 
@@ -113,11 +111,9 @@ void gef_pic_cascade(unsigned int irq, struct irq_desc *desc)
 static void gef_pic_mask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	u32 mask;
 
-	hwirq = gef_irq_to_hw(d->irq);
-
 	raw_spin_lock_irqsave(&gef_pic_lock, flags);
 	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
 	mask &= ~(1 << hwirq);
@@ -136,11 +132,9 @@ static void gef_pic_mask_ack(struct irq_data *d)
 static void gef_pic_unmask(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int hwirq;
+	unsigned int hwirq = irqd_to_hwirq(d);
 	u32 mask;
 
-	hwirq = gef_irq_to_hw(d->irq);
-
 	raw_spin_lock_irqsave(&gef_pic_lock, flags);
 	mask = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_MASK(0));
 	mask |= (1 << hwirq);
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 9ecce995dd4b..1e121088826f 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -150,7 +150,7 @@ void __init mpc8xx_calibrate_decr(void)
 	 */
 	cpu = of_find_node_by_type(NULL, "cpu");
 	virq= irq_of_parse_and_map(cpu, 0);
-	irq = irq_map[virq].hwirq;
+	irq = virq_to_hw(virq);
 
 	sys_tmr2 = immr_map(im_sit);
 	out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index bb5ebf8fa80b..1e3329e8578b 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		pr_devel("axon_msi: woff %x roff %x msi %x\n",
 			  write_offset, msic->read_offset, msi);
 
-		if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
+		if (msi < NR_IRQS && virq_to_host(msi) == msic->irq_host) {
 			generic_handle_irq(msi);
 			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
 		} else {
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index c5cf50e6b45a..34d2b99d10c3 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -70,7 +70,7 @@ static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
 
 static struct spider_pic *spider_virq_to_pic(unsigned int virq)
 {
-	return irq_map[virq].host->host_data;
+	return virq_to_host(virq)->host_data;
 }
 
 static void __iomem *spider_get_irq_config(struct spider_pic *pic,
@@ -82,7 +82,7 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic,
 static void spider_unmask_irq(struct irq_data *d)
 {
 	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) | 0x30000000u);
 }
@@ -90,7 +90,7 @@ static void spider_unmask_irq(struct irq_data *d)
 static void spider_mask_irq(struct irq_data *d)
 {
 	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	void __iomem *cfg = spider_get_irq_config(pic, irq_map[d->irq].hwirq);
+	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 }
@@ -98,7 +98,7 @@ static void spider_mask_irq(struct irq_data *d)
 static void spider_ack_irq(struct irq_data *d)
 {
 	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	/* Reset edge detection logic if necessary
 	 */
@@ -117,7 +117,7 @@ static int spider_set_irq_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
 	struct spider_pic *pic = spider_virq_to_pic(d->irq);
-	unsigned int hw = irq_map[d->irq].hwirq;
+	unsigned int hw = irqd_to_hwirq(d);
 	void __iomem *cfg = spider_get_irq_config(pic, hw);
 	u32 old_mask;
 	u32 ic;
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 12aa62b6f227..77cbe4c8f953 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -48,7 +48,7 @@
 
 static void flipper_pic_mask_and_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << irq;
 
@@ -59,7 +59,7 @@ static void flipper_pic_mask_and_ack(struct irq_data *d)
 
 static void flipper_pic_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	/* this is at least needed for RSW */
@@ -68,7 +68,7 @@ static void flipper_pic_ack(struct irq_data *d)
 
 static void flipper_pic_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	clrbits32(io_base + FLIPPER_IMR, 1 << irq);
@@ -76,7 +76,7 @@ static void flipper_pic_mask(struct irq_data *d)
 
 static void flipper_pic_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + FLIPPER_IMR, 1 << irq);
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 2bdddfc9d520..44b398b0a2fd 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -43,7 +43,7 @@
 
 static void hlwd_pic_mask_and_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 	u32 mask = 1 << irq;
 
@@ -53,7 +53,7 @@ static void hlwd_pic_mask_and_ack(struct irq_data *d)
 
 static void hlwd_pic_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
@@ -61,7 +61,7 @@ static void hlwd_pic_ack(struct irq_data *d)
 
 static void hlwd_pic_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
@@ -69,7 +69,7 @@ static void hlwd_pic_mask(struct irq_data *d)
 
 static void hlwd_pic_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void __iomem *io_base = irq_data_get_irq_chip_data(d);
 
 	setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 52a6889832c7..375c21ca6602 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -171,7 +171,7 @@ static void iseries_enable_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -188,7 +188,7 @@ static unsigned int iseries_startup_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	bus = REAL_IRQ_TO_BUS(rirq);
 	function = REAL_IRQ_TO_FUNC(rirq);
@@ -234,7 +234,7 @@ static void iseries_shutdown_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* irq should be locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -257,7 +257,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -271,7 +271,7 @@ static void iseries_disable_IRQ(struct irq_data *d)
 
 static void iseries_end_IRQ(struct irq_data *d)
 {
-	unsigned int rirq = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int rirq = (unsigned int)irqd_to_hwirq(d);
 
 	HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq),
 		(REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 023f24086a0a..2f34ad04029f 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -84,7 +84,7 @@ static void __pmac_retrigger(unsigned int irq_nr)
 
 static void pmac_mask_and_ack_irq(struct irq_data *d)
 {
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
         unsigned long flags;
@@ -106,7 +106,7 @@ static void pmac_mask_and_ack_irq(struct irq_data *d)
 
 static void pmac_ack_irq(struct irq_data *d)
 {
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
         unsigned long flags;
@@ -152,7 +152,7 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
 static unsigned int pmac_startup_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
         unsigned long bit = 1UL << (src & 0x1f);
         int i = src >> 5;
 
@@ -169,7 +169,7 @@ static unsigned int pmac_startup_irq(struct irq_data *d)
 static void pmac_mask_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
         __clear_bit(src, ppc_cached_irq_mask);
@@ -180,7 +180,7 @@ static void pmac_mask_irq(struct irq_data *d)
 static void pmac_unmask_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int src = irq_map[d->irq].hwirq;
+	unsigned int src = irqd_to_hwirq(d);
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
 	__set_bit(src, ppc_cached_irq_mask);
@@ -193,7 +193,7 @@ static int pmac_retrigger(struct irq_data *d)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
-	__pmac_retrigger(irq_map[d->irq].hwirq);
+	__pmac_retrigger(irqd_to_hwirq(d));
 	raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
 	return 1;
 }
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c55d7ad9c648..164a8eb45923 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
-			   irq_map[irq].hwirq,
+			   virq_to_hw(irq),
 			   RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
 			   critical, __pa(&ras_log_buf),
 				rtas_get_error_log_max());
@@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
-			   irq_map[irq].hwirq,
+			   virq_to_hw(irq),
 			   RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
 			   __pa(&ras_log_buf),
 				rtas_get_error_log_max());
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index e0bc944eb23f..350787c83e22 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -58,21 +58,21 @@ static struct irq_host *cpm_pic_host;
 
 static void cpm_mask_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
 }
 
 static void cpm_unmask_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
 }
 
 static void cpm_end_irq(struct irq_data *d)
 {
-	unsigned int cpm_vec = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
 
 	out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec));
 }
@@ -157,7 +157,7 @@ unsigned int cpm_pic_init(void)
 		goto end;
 
 	/* Initialize the CPM interrupt controller. */
-	hwirq = (unsigned int)irq_map[sirq].hwirq;
+	hwirq = (unsigned int)virq_to_hw(sirq);
 	out_be32(&cpic_reg->cpic_cicr,
 	    (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
 		((hwirq/2) << 13) | CICR_HP_MASK);
diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c
index 5495c1be472b..bcab50e2a9eb 100644
--- a/arch/powerpc/sysdev/cpm2_pic.c
+++ b/arch/powerpc/sysdev/cpm2_pic.c
@@ -81,7 +81,7 @@ static const u_char irq_to_siubit[] = {
 static void cpm2_mask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -93,7 +93,7 @@ static void cpm2_mask_irq(struct irq_data *d)
 static void cpm2_unmask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -105,7 +105,7 @@ static void cpm2_unmask_irq(struct irq_data *d)
 static void cpm2_ack(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -116,7 +116,7 @@ static void cpm2_ack(struct irq_data *d)
 static void cpm2_end_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = virq_to_hw(d->irq);
+	unsigned int irq_nr = irqd_to_hwirq(d);
 
 	bit = irq_to_siubit[irq_nr];
 	word = irq_to_siureg[irq_nr];
@@ -133,7 +133,7 @@ static void cpm2_end_irq(struct irq_data *d)
 
 static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vold, vnew, edibit;
 
 	/* Port C interrupts are either IRQ_TYPE_EDGE_FALLING or
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index fa438be962b7..f0ece79f9be5 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -521,12 +521,10 @@ static inline struct ipic * ipic_from_irq(unsigned int virq)
 	return primary_ipic;
 }
 
-#define ipic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 static void ipic_unmask_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -542,7 +540,7 @@ static void ipic_unmask_irq(struct irq_data *d)
 static void ipic_mask_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -562,7 +560,7 @@ static void ipic_mask_irq(struct irq_data *d)
 static void ipic_ack_irq(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -581,7 +579,7 @@ static void ipic_ack_irq(struct irq_data *d)
 static void ipic_mask_irq_and_ack(struct irq_data *d)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -604,7 +602,7 @@ static void ipic_mask_irq_and_ack(struct irq_data *d)
 static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct ipic *ipic = ipic_from_irq(d->irq);
-	unsigned int src = ipic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vold, vnew, edibit;
 
 	if (flow_type == IRQ_TYPE_NONE)
@@ -793,7 +791,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags)
 int ipic_set_priority(unsigned int virq, unsigned int priority)
 {
 	struct ipic *ipic = ipic_from_irq(virq);
-	unsigned int src = ipic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	u32 temp;
 
 	if (priority > 7)
@@ -821,7 +819,7 @@ int ipic_set_priority(unsigned int virq, unsigned int priority)
 void ipic_set_highest_priority(unsigned int virq)
 {
 	struct ipic *ipic = ipic_from_irq(virq);
-	unsigned int src = ipic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	u32 temp;
 
 	temp = ipic_read(ipic->regs, IPIC_SICFR);
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index a88800ff4d01..20924f2246f0 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -28,7 +28,7 @@ int cpm_get_irq(struct pt_regs *regs);
 static void mpc8xx_unmask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -40,7 +40,7 @@ static void mpc8xx_unmask_irq(struct irq_data *d)
 static void mpc8xx_mask_irq(struct irq_data *d)
 {
 	int	bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -52,7 +52,7 @@ static void mpc8xx_mask_irq(struct irq_data *d)
 static void mpc8xx_ack(struct irq_data *d)
 {
 	int	bit;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	out_be32(&siu_reg->sc_sipend, 1 << (31-bit));
@@ -61,7 +61,7 @@ static void mpc8xx_ack(struct irq_data *d)
 static void mpc8xx_end_irq(struct irq_data *d)
 {
 	int bit, word;
-	unsigned int irq_nr = (unsigned int)irq_map[d->irq].hwirq;
+	unsigned int irq_nr = (unsigned int)irqd_to_hwirq(d);
 
 	bit = irq_nr & 0x1f;
 	word = irq_nr >> 5;
@@ -73,7 +73,7 @@ static void mpc8xx_end_irq(struct irq_data *d)
 static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	if (flow_type & IRQ_TYPE_EDGE_FALLING) {
-		irq_hw_number_t hw = (unsigned int)irq_map[d->irq].hwirq;
+		irq_hw_number_t hw = (unsigned int)irqd_to_hwirq(d);
 		unsigned int siel = in_be32(&siu_reg->sc_siel);
 
 		/* only external IRQ senses are programmable */
diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index 0892a2841c2b..fb4963abdf55 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -163,7 +163,7 @@ static void mpc8xxx_irq_unmask(struct irq_data *d)
 
 	spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 
-	setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	setbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 
 	spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 }
@@ -176,7 +176,7 @@ static void mpc8xxx_irq_mask(struct irq_data *d)
 
 	spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 
-	clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	clrbits32(mm->regs + GPIO_IMR, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 
 	spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 }
@@ -186,7 +186,7 @@ static void mpc8xxx_irq_ack(struct irq_data *d)
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
 	struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
 
-	out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+	out_be32(mm->regs + GPIO_IER, mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 }
 
 static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
@@ -199,14 +199,14 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
 	case IRQ_TYPE_EDGE_FALLING:
 		spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 		setbits32(mm->regs + GPIO_ICR,
-			  mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+			  mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 		spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 		break;
 
 	case IRQ_TYPE_EDGE_BOTH:
 		spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
 		clrbits32(mm->regs + GPIO_ICR,
-			  mpc8xxx_gpio2mask(virq_to_hw(d->irq)));
+			  mpc8xxx_gpio2mask(irqd_to_hwirq(d)));
 		spin_unlock_irqrestore(&mpc8xxx_gc->lock, flags);
 		break;
 
@@ -221,7 +221,7 @@ static int mpc512x_irq_set_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_data_get_irq_chip_data(d);
 	struct of_mm_gpio_chip *mm = &mpc8xxx_gc->mm_gc;
-	unsigned long gpio = virq_to_hw(d->irq);
+	unsigned long gpio = irqd_to_hwirq(d);
 	void __iomem *reg;
 	unsigned int shift;
 	unsigned long flags;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f91c065bed5a..824a94fc413b 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -607,8 +607,6 @@ static int irq_choose_cpu(const struct cpumask *mask)
 }
 #endif
 
-#define mpic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 /* Find an mpic associated with a given linux interrupt */
 static struct mpic *mpic_find(unsigned int irq)
 {
@@ -621,7 +619,7 @@ static struct mpic *mpic_find(unsigned int irq)
 /* Determine if the linux irq is an IPI */
 static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq)
 {
-	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned int src = virq_to_hw(irq);
 
 	return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
 }
@@ -674,7 +672,7 @@ void mpic_unmask_irq(struct irq_data *d)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, d->irq, src);
 
@@ -695,7 +693,7 @@ void mpic_mask_irq(struct irq_data *d)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, d->irq, src);
 
@@ -733,7 +731,7 @@ void mpic_end_irq(struct irq_data *d)
 static void mpic_unmask_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_unmask_irq(d);
 
@@ -744,7 +742,7 @@ static void mpic_unmask_ht_irq(struct irq_data *d)
 static unsigned int mpic_startup_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_unmask_irq(d);
 	mpic_startup_ht_interrupt(mpic, src, irqd_is_level_type(d));
@@ -755,7 +753,7 @@ static unsigned int mpic_startup_ht_irq(struct irq_data *d)
 static void mpic_shutdown_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	mpic_shutdown_ht_interrupt(mpic, src);
 	mpic_mask_irq(d);
@@ -764,7 +762,7 @@ static void mpic_shutdown_ht_irq(struct irq_data *d)
 static void mpic_end_ht_irq(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 #ifdef DEBUG_IRQ
 	DBG("%s: end_irq: %d\n", mpic->name, d->irq);
@@ -785,7 +783,7 @@ static void mpic_end_ht_irq(struct irq_data *d)
 static void mpic_unmask_ipi(struct irq_data *d)
 {
 	struct mpic *mpic = mpic_from_ipi(d);
-	unsigned int src = mpic_irq_to_hw(d->irq) - mpic->ipi_vecs[0];
+	unsigned int src = virq_to_hw(d->irq) - mpic->ipi_vecs[0];
 
 	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, d->irq, src);
 	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
@@ -816,7 +814,7 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
 		      bool force)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 
 	if (mpic->flags & MPIC_SINGLE_DEST_CPU) {
 		int cpuid = irq_choose_cpu(cpumask);
@@ -862,7 +860,7 @@ static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
 int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct mpic *mpic = mpic_from_irq_data(d);
-	unsigned int src = mpic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned int vecpri, vold, vnew;
 
 	DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n",
@@ -898,7 +896,7 @@ int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 void mpic_set_vector(unsigned int virq, unsigned int vector)
 {
 	struct mpic *mpic = mpic_from_irq(virq);
-	unsigned int src = mpic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 	unsigned int vecpri;
 
 	DBG("mpic: set_vector(mpic:@%p,virq:%d,src:%d,vector:0x%x)\n",
@@ -916,7 +914,7 @@ void mpic_set_vector(unsigned int virq, unsigned int vector)
 void mpic_set_destination(unsigned int virq, unsigned int cpuid)
 {
 	struct mpic *mpic = mpic_from_irq(virq);
-	unsigned int src = mpic_irq_to_hw(virq);
+	unsigned int src = virq_to_hw(virq);
 
 	DBG("mpic: set_destination(mpic:@%p,virq:%d,src:%d,cpuid:0x%x)\n",
 	    mpic, virq, src, cpuid);
@@ -1427,7 +1425,7 @@ void __init mpic_set_serial_int(struct mpic *mpic, int enable)
 void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 {
 	struct mpic *mpic = mpic_find(irq);
-	unsigned int src = mpic_irq_to_hw(irq);
+	unsigned int src = virq_to_hw(irq);
 	unsigned long flags;
 	u32 reg;
 
diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c
index e9c633c7c083..14d130268e7a 100644
--- a/arch/powerpc/sysdev/mv64x60_pic.c
+++ b/arch/powerpc/sysdev/mv64x60_pic.c
@@ -78,7 +78,7 @@ static struct irq_host *mv64x60_irq_host;
 
 static void mv64x60_mask_low(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -91,7 +91,7 @@ static void mv64x60_mask_low(struct irq_data *d)
 
 static void mv64x60_unmask_low(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -115,7 +115,7 @@ static struct irq_chip mv64x60_chip_low = {
 
 static void mv64x60_mask_high(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -128,7 +128,7 @@ static void mv64x60_mask_high(struct irq_data *d)
 
 static void mv64x60_unmask_high(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -152,7 +152,7 @@ static struct irq_chip mv64x60_chip_high = {
 
 static void mv64x60_mask_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -165,7 +165,7 @@ static void mv64x60_mask_gpp(struct irq_data *d)
 
 static void mv64x60_mask_ack_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
@@ -180,7 +180,7 @@ static void mv64x60_mask_ack_gpp(struct irq_data *d)
 
 static void mv64x60_unmask_gpp(struct irq_data *d)
 {
-	int level2 = irq_map[d->irq].hwirq & MV64x60_LEVEL2_MASK;
+	int level2 = irqd_to_hwirq(d) & MV64x60_LEVEL2_MASK;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mv64x60_lock, flags);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index 832d6924ad1c..b2acda07220d 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -197,12 +197,10 @@ static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d)
 	return irq_data_get_irq_chip_data(d);
 }
 
-#define virq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
-
 static void qe_ic_unmask_irq(struct irq_data *d)
 {
 	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
@@ -218,7 +216,7 @@ static void qe_ic_unmask_irq(struct irq_data *d)
 static void qe_ic_mask_irq(struct irq_data *d)
 {
 	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = virq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 temp;
 
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 5d9138516628..984cd2029158 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -41,8 +41,6 @@
 #define UIC_VR		0x7
 #define UIC_VCR		0x8
 
-#define uic_irq_to_hw(virq)	(irq_map[virq].hwirq)
-
 struct uic *primary_uic;
 
 struct uic {
@@ -58,7 +56,7 @@ struct uic {
 static void uic_unmask_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er, sr;
 
@@ -76,7 +74,7 @@ static void uic_unmask_irq(struct irq_data *d)
 static void uic_mask_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er;
 
@@ -90,7 +88,7 @@ static void uic_mask_irq(struct irq_data *d)
 static void uic_ack_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 
 	spin_lock_irqsave(&uic->lock, flags);
@@ -101,7 +99,7 @@ static void uic_ack_irq(struct irq_data *d)
 static void uic_mask_ack_irq(struct irq_data *d)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	u32 er, sr;
 
@@ -126,7 +124,7 @@ static void uic_mask_ack_irq(struct irq_data *d)
 static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 {
 	struct uic *uic = irq_data_get_irq_chip_data(d);
-	unsigned int src = uic_irq_to_hw(d->irq);
+	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 	int trigger, polarity;
 	u32 tr, pr, mask;
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index b03d348b19a5..76e87245bbfe 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -58,7 +58,7 @@ static inline void icp_hv_set_qirr(int n_cpu , u8 value)
 
 static void icp_hv_eoi(struct irq_data *d)
 {
-	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 
 	iosync();
 	icp_hv_set_xirr((xics_pop_cppr() << 24) | hw_irq);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index be5e3d748edb..d9e0515592c4 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -80,7 +80,7 @@ static void icp_native_set_cpu_priority(unsigned char cppr)
 
 static void icp_native_eoi(struct irq_data *d)
 {
-	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 
 	iosync();
 	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 610c148fedcc..c782f85cf7e4 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -38,7 +38,7 @@ static struct ics ics_rtas = {
 
 static void ics_rtas_unmask_irq(struct irq_data *d)
 {
-	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 	int call_status;
 	int server;
 
@@ -109,7 +109,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
 
 static void ics_rtas_mask_irq(struct irq_data *d)
 {
-	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 
 	pr_devel("xics: mask virq %d [hw 0x%x]\n", d->irq, hw_irq);
 
@@ -122,7 +122,7 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 				 const struct cpumask *cpumask,
 				 bool force)
 {
-	unsigned int hw_irq = (unsigned int)irq_data_to_hw(d);
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 	int status;
 	int xics_status[2];
 	int irq_server;
@@ -171,7 +171,7 @@ static struct irq_chip ics_rtas_irq_chip = {
 
 static int ics_rtas_map(struct ics *ics, unsigned int virq)
 {
-	unsigned int hw_irq = (unsigned int)irq_map[virq].hwirq;
+	unsigned int hw_irq = (unsigned int)virq_to_hw(virq);
 	int status[2];
 	int rc;
 
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index c58844d72426..a0576b705ddd 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -240,9 +240,9 @@ void xics_migrate_irqs_away(void)
 		/* We can't set affinity on ISA interrupts */
 		if (virq < NUM_ISA_INTERRUPTS)
 			continue;
-		if (irq_map[virq].host != xics_host)
+		if (virq_to_host(virq) != xics_host)
 			continue;
-		irq = (unsigned int)irq_map[virq].hwirq;
+		irq = (unsigned int)virq_to_hw(virq);
 		/* We need to get IPIs still. */
 		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 			continue;
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 0a13fc19e287..6183799754af 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -71,7 +71,7 @@ static unsigned char xilinx_intc_map_senses[] = {
  */
 static void xilinx_intc_mask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("mask: %d\n", irq);
 	out_be32(regs + XINTC_CIE, 1 << irq);
@@ -87,7 +87,7 @@ static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type)
  */
 static void xilinx_intc_level_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("unmask: %d\n", irq);
 	out_be32(regs + XINTC_SIE, 1 << irq);
@@ -112,7 +112,7 @@ static struct irq_chip xilinx_intc_level_irqchip = {
  */
 static void xilinx_intc_edge_unmask(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void *regs = irq_data_get_irq_chip_data(d);
 	pr_debug("unmask: %d\n", irq);
 	out_be32(regs + XINTC_SIE, 1 << irq);
@@ -120,7 +120,7 @@ static void xilinx_intc_edge_unmask(struct irq_data *d)
 
 static void xilinx_intc_edge_ack(struct irq_data *d)
 {
-	int irq = virq_to_hw(d->irq);
+	int irq = irqd_to_hwirq(d);
 	void * regs = irq_data_get_irq_chip_data(d);
 	pr_debug("ack: %d\n", irq);
 	out_be32(regs + XINTC_IAR, 1 << irq);
-- 
cgit v1.2.3


From 1822bc9093e05059e4144d6041b0f5450ad275e1 Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Tue, 19 Apr 2011 21:29:14 +0200
Subject: crypto: s390 - extend crypto facility check

The specification which crypto facility is required for an algorithm is added
as a parameter to the availability check which is done before an algorithm is
registered. With this change it is easier to add new algorithms that require
different facilities.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c    |  6 +++---
 arch/s390/crypto/crypt_s390.h  | 14 +++++++++++---
 arch/s390/crypto/des_s390.c    |  4 ++--
 arch/s390/crypto/prng.c        |  2 +-
 arch/s390/crypto/sha1_s390.c   |  2 +-
 arch/s390/crypto/sha256_s390.c |  2 +-
 arch/s390/crypto/sha512_s390.c |  2 +-
 7 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 58f46734465f..fc97b949254f 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -508,11 +508,11 @@ static int __init aes_s390_init(void)
 {
 	int ret;
 
-	if (crypt_s390_func_available(KM_AES_128_ENCRYPT))
+	if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
 		keylen_flag |= AES_KEYLEN_128;
-	if (crypt_s390_func_available(KM_AES_192_ENCRYPT))
+	if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA))
 		keylen_flag |= AES_KEYLEN_192;
-	if (crypt_s390_func_available(KM_AES_256_ENCRYPT))
+	if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA))
 		keylen_flag |= AES_KEYLEN_256;
 
 	if (!keylen_flag)
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 7ee9a1b4ad9f..4b8c96cab252 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -24,6 +24,10 @@
 #define CRYPT_S390_PRIORITY 300
 #define CRYPT_S390_COMPOSITE_PRIORITY 400
 
+#define CRYPT_S390_MSA	0x1
+#define CRYPT_S390_MSA3	0x2
+#define CRYPT_S390_MSA4	0x4
+
 /* s390 cryptographic operations */
 enum crypt_s390_operations {
 	CRYPT_S390_KM   = 0x0100,
@@ -291,13 +295,17 @@ static inline int crypt_s390_kmac(long func, void *param,
  *
  * Returns 1 if func available; 0 if func or op in general not available
  */
-static inline int crypt_s390_func_available(int func)
+static inline int crypt_s390_func_available(int func,
+					    unsigned int facility_mask)
 {
 	unsigned char status[16];
 	int ret;
 
-	/* check if CPACF facility (bit 17) is available */
-	if (!test_facility(17))
+	if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
 		return 0;
 
 	switch (func & CRYPT_S390_OP_MASK) {
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index cc5420118393..1121e73115af 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -381,8 +381,8 @@ static int des_s390_init(void)
 {
 	int ret;
 
-	if (!crypt_s390_func_available(KM_DEA_ENCRYPT) ||
-	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT))
+	if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) ||
+	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
 	ret = crypto_register_alg(&des_alg);
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 975e3ab13cb5..0fc8115b0aea 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -166,7 +166,7 @@ static int __init prng_init(void)
 	int ret;
 
 	/* check if the CPU has a PRNG */
-	if (!crypt_s390_func_available(KMC_PRNG))
+	if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
 	if (prng_chunk_size < 8)
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index f6de7826c979..e9868c6e0a08 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -90,7 +90,7 @@ static struct shash_alg alg = {
 
 static int __init sha1_s390_init(void)
 {
-	if (!crypt_s390_func_available(KIMD_SHA_1))
+	if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 	return crypto_register_shash(&alg);
 }
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 61a7db372121..5ed8d64fc2ed 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -86,7 +86,7 @@ static struct shash_alg alg = {
 
 static int sha256_s390_init(void)
 {
-	if (!crypt_s390_func_available(KIMD_SHA_256))
+	if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
 	return crypto_register_shash(&alg);
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 4bf73d0dc525..32a81383b69c 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -132,7 +132,7 @@ static int __init init(void)
 {
 	int ret;
 
-	if (!crypt_s390_func_available(KIMD_SHA_512))
+	if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
 		goto out;
-- 
cgit v1.2.3


From 98971f8439b1bb9a61682fe24a865ddd25167a6b Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Tue, 19 Apr 2011 21:29:15 +0200
Subject: crypto: s390 - cleanup DES code

Remove a stale file left over from 1efbd15c3bc2b79d33e033e898211109c32159fa
and and cleanup the DES code a bit to make it easier to add new code.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/des_check_key.c | 132 --------------------------
 arch/s390/crypto/des_s390.c      | 199 +++++++++++++++++++--------------------
 2 files changed, 96 insertions(+), 235 deletions(-)
 delete mode 100644 arch/s390/crypto/des_check_key.c

(limited to 'arch')

diff --git a/arch/s390/crypto/des_check_key.c b/arch/s390/crypto/des_check_key.c
deleted file mode 100644
index 5706af266442..000000000000
--- a/arch/s390/crypto/des_check_key.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Function for checking keys for the DES and Tripple DES Encryption
- * algorithms.
- *
- * Originally released as descore by Dana L. How <how@isl.stanford.edu>.
- * Modified by Raimar Falke <rf13@inf.tu-dresden.de> for the Linux-Kernel.
- * Derived from Cryptoapi and Nettle implementations, adapted for in-place
- * scatterlist interface.  Changed LGPL to GPL per section 3 of the LGPL.
- *
- * s390 Version:
- *   Copyright IBM Corp. 2003
- *   Author(s): Thomas Spatzier
- *		Jan Glauber (jan.glauber@de.ibm.com)
- *
- * Derived from "crypto/des.c"
- *   Copyright (c) 1992 Dana L. How.
- *   Copyright (c) Raimar Falke <rf13@inf.tu-dresden.de>
- *   Copyright (c) Gisle Sflensminde <gisle@ii.uib.no>
- *   Copyright (C) 2001 Niels Mvller.
- *   Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include "crypto_des.h"
-
-#define ROR(d,c,o)	((d) = (d) >> (c) | (d) << (o))
-
-static const u8 parity[] = {
-	8,1,0,8,0,8,8,0,0,8,8,0,8,0,2,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,3,
-	0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
-	0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
-	8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
-	0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,
-	8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
-	8,0,0,8,0,8,8,0,0,8,8,0,8,0,0,8,0,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,
-	4,8,8,0,8,0,0,8,8,0,0,8,0,8,8,0,8,5,0,8,0,8,8,0,0,8,8,0,8,0,6,8,
-};
-
-/*
- * RFC2451: Weak key checks SHOULD be performed.
- */
-int
-crypto_des_check_key(const u8 *key, unsigned int keylen, u32 *flags)
-{
-	u32 n, w;
-
-	n  = parity[key[0]]; n <<= 4;
-	n |= parity[key[1]]; n <<= 4;
-	n |= parity[key[2]]; n <<= 4;
-	n |= parity[key[3]]; n <<= 4;
-	n |= parity[key[4]]; n <<= 4;
-	n |= parity[key[5]]; n <<= 4;
-	n |= parity[key[6]]; n <<= 4;
-	n |= parity[key[7]];
-	w = 0x88888888L;
-
-	if ((*flags & CRYPTO_TFM_REQ_WEAK_KEY)
-	    && !((n - (w >> 3)) & w)) {  /* 1 in 10^10 keys passes this test */
-		if (n < 0x41415151) {
-			if (n < 0x31312121) {
-				if (n < 0x14141515) {
-					/* 01 01 01 01 01 01 01 01 */
-					if (n == 0x11111111) goto weak;
-					/* 01 1F 01 1F 01 0E 01 0E */
-					if (n == 0x13131212) goto weak;
-				} else {
-					/* 01 E0 01 E0 01 F1 01 F1 */
-					if (n == 0x14141515) goto weak;
-					/* 01 FE 01 FE 01 FE 01 FE */
-					if (n == 0x16161616) goto weak;
-				}
-			} else {
-				if (n < 0x34342525) {
-					/* 1F 01 1F 01 0E 01 0E 01 */
-					if (n == 0x31312121) goto weak;
-					/* 1F 1F 1F 1F 0E 0E 0E 0E (?) */
-					if (n == 0x33332222) goto weak;
-				} else {
-					/* 1F E0 1F E0 0E F1 0E F1 */
-					if (n == 0x34342525) goto weak;
-					/* 1F FE 1F FE 0E FE 0E FE */
-					if (n == 0x36362626) goto weak;
-				}
-			}
-		} else {
-			if (n < 0x61616161) {
-				if (n < 0x44445555) {
-					/* E0 01 E0 01 F1 01 F1 01 */
-					if (n == 0x41415151) goto weak;
-					/* E0 1F E0 1F F1 0E F1 0E */
-					if (n == 0x43435252) goto weak;
-				} else {
-					/* E0 E0 E0 E0 F1 F1 F1 F1 (?) */
-					if (n == 0x44445555) goto weak;
-					/* E0 FE E0 FE F1 FE F1 FE */
-					if (n == 0x46465656) goto weak;
-				}
-			} else {
-				if (n < 0x64646565) {
-					/* FE 01 FE 01 FE 01 FE 01 */
-					if (n == 0x61616161) goto weak;
-					/* FE 1F FE 1F FE 0E FE 0E */
-					if (n == 0x63636262) goto weak;
-				} else {
-					/* FE E0 FE E0 FE F1 FE F1 */
-					if (n == 0x64646565) goto weak;
-					/* FE FE FE FE FE FE FE FE */
-					if (n == 0x66666666) goto weak;
-				}
-			}
-		}
-	}
-	return 0;
-weak:
-	*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-	return -EINVAL;
-}
-
-EXPORT_SYMBOL(crypto_des_check_key);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Key Check function for DES &  DES3 Cipher Algorithms");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 1121e73115af..c36a01d70e04 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -22,22 +22,17 @@
 
 #include "crypt_s390.h"
 
-#define DES3_192_KEY_SIZE	(3 * DES_KEY_SIZE)
+#define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
 
-struct crypt_s390_des_ctx {
+struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
-	u8 key[DES_KEY_SIZE];
-};
-
-struct crypt_s390_des3_192_ctx {
-	u8 iv[DES_BLOCK_SIZE];
-	u8 key[DES3_192_KEY_SIZE];
+	u8 key[DES3_KEY_SIZE];
 };
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
-		      unsigned int keylen)
+		      unsigned int key_len)
 {
-	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 
@@ -47,22 +42,22 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 		return -EINVAL;
 	}
 
-	memcpy(dctx->key, key, keylen);
+	memcpy(ctx->key, key, key_len);
 	return 0;
 }
 
 static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_DEA_ENCRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+	crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
-	struct crypt_s390_des_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_DEA_DECRYPT, dctx->key, out, in, DES_BLOCK_SIZE);
+	crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des_alg = {
@@ -71,7 +66,7 @@ static struct crypto_alg des_alg = {
 	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(des_alg.cra_list),
 	.cra_u			=	{
@@ -86,7 +81,7 @@ static struct crypto_alg des_alg = {
 };
 
 static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
-			    void *param, struct blkcipher_walk *walk)
+			    u8 *key, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes;
@@ -97,7 +92,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_km(func, param, out, in, n);
+		ret = crypt_s390_km(func, key, out, in, n);
 		BUG_ON((ret < 0) || (ret != n));
 
 		nbytes &= DES_BLOCK_SIZE - 1;
@@ -108,7 +103,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 }
 
 static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
-			    void *param, struct blkcipher_walk *walk)
+			    u8 *iv, struct blkcipher_walk *walk)
 {
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes = walk->nbytes;
@@ -116,20 +111,20 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
 	if (!nbytes)
 		goto out;
 
-	memcpy(param, walk->iv, DES_BLOCK_SIZE);
+	memcpy(iv, walk->iv, DES_BLOCK_SIZE);
 	do {
 		/* only use complete blocks */
 		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = crypt_s390_kmc(func, param, out, in, n);
+		ret = crypt_s390_kmc(func, iv, out, in, n);
 		BUG_ON((ret < 0) || (ret != n));
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
 	} while ((nbytes = walk->nbytes));
-	memcpy(walk->iv, param, DES_BLOCK_SIZE);
+	memcpy(walk->iv, iv, DES_BLOCK_SIZE);
 
 out:
 	return ret;
@@ -139,22 +134,22 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk);
 }
 
 static int ecb_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk);
 }
 
 static struct crypto_alg ecb_des_alg = {
@@ -163,7 +158,7 @@ static struct crypto_alg ecb_des_alg = {
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(ecb_des_alg.cra_list),
@@ -182,22 +177,22 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, ctx->iv, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct crypt_s390_des_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, ctx->iv, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -206,7 +201,7 @@ static struct crypto_alg cbc_des_alg = {
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(cbc_des_alg.cra_list),
@@ -235,10 +230,10 @@ static struct crypto_alg cbc_des_alg = {
  *   property.
  *
  */
-static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
-			   unsigned int keylen)
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int key_len)
 {
-	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *flags = &tfm->crt_flags;
 
 	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
@@ -248,136 +243,134 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key,
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
-	memcpy(dctx->key, key, keylen);
+	memcpy(ctx->key, key, key_len);
 	return 0;
 }
 
-static void des3_192_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_TDEA_192_ENCRYPT, dctx->key, dst, (void*)src,
-		      DES_BLOCK_SIZE);
+	crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
-static void des3_192_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct crypt_s390_des3_192_ctx *dctx = crypto_tfm_ctx(tfm);
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypt_s390_km(KM_TDEA_192_DECRYPT, dctx->key, dst, (void*)src,
-		      DES_BLOCK_SIZE);
+	crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
-static struct crypto_alg des3_192_alg = {
+static struct crypto_alg des3_alg = {
 	.cra_name		=	"des3_ede",
 	.cra_driver_name	=	"des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(des3_192_alg.cra_list),
+	.cra_list		=	LIST_HEAD_INIT(des3_alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
-			.cia_min_keysize	=	DES3_192_KEY_SIZE,
-			.cia_max_keysize	=	DES3_192_KEY_SIZE,
-			.cia_setkey		=	des3_192_setkey,
-			.cia_encrypt		=	des3_192_encrypt,
-			.cia_decrypt		=	des3_192_decrypt,
+			.cia_min_keysize	=	DES3_KEY_SIZE,
+			.cia_max_keysize	=	DES3_KEY_SIZE,
+			.cia_setkey		=	des3_setkey,
+			.cia_encrypt		=	des3_encrypt,
+			.cia_decrypt		=	des3_decrypt,
 		}
 	}
 };
 
-static int ecb_des3_192_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk);
 }
 
-static int ecb_des3_192_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int ecb_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, sctx->key, &walk);
+	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk);
 }
 
-static struct crypto_alg ecb_des3_192_alg = {
+static struct crypto_alg ecb_des3_alg = {
 	.cra_name		=	"ecb(des3_ede)",
 	.cra_driver_name	=	"ecb-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(
-						ecb_des3_192_alg.cra_list),
+						ecb_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
-			.min_keysize		=	DES3_192_KEY_SIZE,
-			.max_keysize		=	DES3_192_KEY_SIZE,
-			.setkey			=	des3_192_setkey,
-			.encrypt		=	ecb_des3_192_encrypt,
-			.decrypt		=	ecb_des3_192_decrypt,
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	ecb_des3_encrypt,
+			.decrypt		=	ecb_des3_decrypt,
 		}
 	}
 };
 
-static int cbc_des3_192_encrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, ctx->iv, &walk);
 }
 
-static int cbc_des3_192_decrypt(struct blkcipher_desc *desc,
-				struct scatterlist *dst,
-				struct scatterlist *src, unsigned int nbytes)
+static int cbc_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
 {
-	struct crypt_s390_des3_192_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, sctx->iv, &walk);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, ctx->iv, &walk);
 }
 
-static struct crypto_alg cbc_des3_192_alg = {
+static struct crypto_alg cbc_des3_alg = {
 	.cra_name		=	"cbc(des3_ede)",
 	.cra_driver_name	=	"cbc-des3_ede-s390",
 	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct crypt_s390_des3_192_ctx),
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(
-						cbc_des3_192_alg.cra_list),
+						cbc_des3_alg.cra_list),
 	.cra_u			=	{
 		.blkcipher = {
-			.min_keysize		=	DES3_192_KEY_SIZE,
-			.max_keysize		=	DES3_192_KEY_SIZE,
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
 			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_192_setkey,
-			.encrypt		=	cbc_des3_192_encrypt,
-			.decrypt		=	cbc_des3_192_decrypt,
+			.setkey			=	des3_setkey,
+			.encrypt		=	cbc_des3_encrypt,
+			.decrypt		=	cbc_des3_decrypt,
 		}
 	}
 };
 
-static int des_s390_init(void)
+static int __init des_s390_init(void)
 {
 	int ret;
 
@@ -394,23 +387,23 @@ static int des_s390_init(void)
 	ret = crypto_register_alg(&cbc_des_alg);
 	if (ret)
 		goto cbc_des_err;
-	ret = crypto_register_alg(&des3_192_alg);
+	ret = crypto_register_alg(&des3_alg);
 	if (ret)
-		goto des3_192_err;
-	ret = crypto_register_alg(&ecb_des3_192_alg);
+		goto des3_err;
+	ret = crypto_register_alg(&ecb_des3_alg);
 	if (ret)
-		goto ecb_des3_192_err;
-	ret = crypto_register_alg(&cbc_des3_192_alg);
+		goto ecb_des3_err;
+	ret = crypto_register_alg(&cbc_des3_alg);
 	if (ret)
-		goto cbc_des3_192_err;
+		goto cbc_des3_err;
 out:
 	return ret;
 
-cbc_des3_192_err:
-	crypto_unregister_alg(&ecb_des3_192_alg);
-ecb_des3_192_err:
-	crypto_unregister_alg(&des3_192_alg);
-des3_192_err:
+cbc_des3_err:
+	crypto_unregister_alg(&ecb_des3_alg);
+ecb_des3_err:
+	crypto_unregister_alg(&des3_alg);
+des3_err:
 	crypto_unregister_alg(&cbc_des_alg);
 cbc_des_err:
 	crypto_unregister_alg(&ecb_des_alg);
@@ -422,9 +415,9 @@ des_err:
 
 static void __exit des_s390_exit(void)
 {
-	crypto_unregister_alg(&cbc_des3_192_alg);
-	crypto_unregister_alg(&ecb_des3_192_alg);
-	crypto_unregister_alg(&des3_192_alg);
+	crypto_unregister_alg(&cbc_des3_alg);
+	crypto_unregister_alg(&ecb_des3_alg);
+	crypto_unregister_alg(&des3_alg);
 	crypto_unregister_alg(&cbc_des_alg);
 	crypto_unregister_alg(&ecb_des_alg);
 	crypto_unregister_alg(&des_alg);
-- 
cgit v1.2.3


From 99d97222150a24e6096805530e141af94183b9a1 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Tue, 26 Apr 2011 16:12:42 +1000
Subject: crypto: s390 - add System z hardware support for XTS mode

This patch adds System z hardware acceleration support for the AES XTS mode.
The hardware support is available beginning with System z196.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c   | 233 ++++++++++++++++++++++++++++++++++++++++++
 arch/s390/crypto/crypt_s390.h |  31 ++++++
 drivers/crypto/Kconfig        |  22 ++--
 3 files changed, 272 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index fc97b949254f..8230e8605deb 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -45,6 +45,24 @@ struct s390_aes_ctx {
 	} fallback;
 };
 
+struct pcc_param {
+	u8 key[32];
+	u8 tweak[16];
+	u8 block[16];
+	u8 bit[16];
+	u8 xts[16];
+};
+
+struct s390_xts_ctx {
+	u8 key[32];
+	u8 xts_param[16];
+	struct pcc_param pcc;
+	long enc;
+	long dec;
+	int key_len;
+	struct crypto_blkcipher *fallback;
+};
+
 /*
  * Check if the key_len is supported by the HW.
  * Returns 0 if it is, a positive number if it is not and software fallback is
@@ -504,8 +522,211 @@ static struct crypto_alg cbc_aes_alg = {
 	}
 };
 
+static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
+				   unsigned int len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	unsigned int ret;
+
+	xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int xts_fallback_decrypt(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm;
+	unsigned int ret;
+
+	tfm = desc->tfm;
+	desc->tfm = xts_ctx->fallback;
+
+	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int xts_fallback_encrypt(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm;
+	unsigned int ret;
+
+	tfm = desc->tfm;
+	desc->tfm = xts_ctx->fallback;
+
+	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+
+	switch (key_len) {
+	case 32:
+		xts_ctx->enc = KM_XTS_128_ENCRYPT;
+		xts_ctx->dec = KM_XTS_128_DECRYPT;
+		memcpy(xts_ctx->key + 16, in_key, 16);
+		memcpy(xts_ctx->pcc.key + 16, in_key + 16, 16);
+		break;
+	case 48:
+		xts_ctx->enc = 0;
+		xts_ctx->dec = 0;
+		xts_fallback_setkey(tfm, in_key, key_len);
+		break;
+	case 64:
+		xts_ctx->enc = KM_XTS_256_ENCRYPT;
+		xts_ctx->dec = KM_XTS_256_DECRYPT;
+		memcpy(xts_ctx->key, in_key, 32);
+		memcpy(xts_ctx->pcc.key, in_key + 32, 32);
+		break;
+	default:
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	xts_ctx->key_len = key_len;
+	return 0;
+}
+
+static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct s390_xts_ctx *xts_ctx,
+			 struct blkcipher_walk *walk)
+{
+	unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+	unsigned int n;
+	u8 *in, *out;
+	void *param;
+
+	if (!nbytes)
+		goto out;
+
+	memset(xts_ctx->pcc.block, 0, sizeof(xts_ctx->pcc.block));
+	memset(xts_ctx->pcc.bit, 0, sizeof(xts_ctx->pcc.bit));
+	memset(xts_ctx->pcc.xts, 0, sizeof(xts_ctx->pcc.xts));
+	memcpy(xts_ctx->pcc.tweak, walk->iv, sizeof(xts_ctx->pcc.tweak));
+	param = xts_ctx->pcc.key + offset;
+	ret = crypt_s390_pcc(func, param);
+	BUG_ON(ret < 0);
+
+	memcpy(xts_ctx->xts_param, xts_ctx->pcc.xts, 16);
+	param = xts_ctx->key + offset;
+	do {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		BUG_ON(ret < 0 || ret != n);
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+out:
+	return ret;
+}
+
+static int xts_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(xts_ctx->key_len == 48))
+		return xts_fallback_encrypt(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+}
+
+static int xts_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(xts_ctx->key_len == 48))
+		return xts_fallback_decrypt(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+}
+
+static int xts_fallback_init(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+	xts_ctx->fallback = crypto_alloc_blkcipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(xts_ctx->fallback)) {
+		pr_err("Allocating XTS fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(xts_ctx->fallback);
+	}
+	return 0;
+}
+
+static void xts_fallback_exit(struct crypto_tfm *tfm)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_blkcipher(xts_ctx->fallback);
+	xts_ctx->fallback = NULL;
+}
+
+static struct crypto_alg xts_aes_alg = {
+	.cra_name		=	"xts(aes)",
+	.cra_driver_name	=	"xts-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(xts_aes_alg.cra_list),
+	.cra_init		=	xts_fallback_init,
+	.cra_exit		=	xts_fallback_exit,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+			.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	xts_aes_set_key,
+			.encrypt		=	xts_aes_encrypt,
+			.decrypt		=	xts_aes_decrypt,
+		}
+	}
+};
+
 static int __init aes_s390_init(void)
 {
+	unsigned long long facility_bits[2];
 	int ret;
 
 	if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
@@ -535,9 +756,20 @@ static int __init aes_s390_init(void)
 	if (ret)
 		goto cbc_aes_err;
 
+	if (crypt_s390_func_available(KM_XTS_128_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KM_XTS_256_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ret = crypto_register_alg(&xts_aes_alg);
+		if (ret)
+			goto xts_aes_err;
+	}
+
 out:
 	return ret;
 
+xts_aes_err:
+	crypto_unregister_alg(&cbc_aes_alg);
 cbc_aes_err:
 	crypto_unregister_alg(&ecb_aes_alg);
 ecb_aes_err:
@@ -548,6 +780,7 @@ aes_err:
 
 static void __exit aes_s390_fini(void)
 {
+	crypto_unregister_alg(&xts_aes_alg);
 	crypto_unregister_alg(&cbc_aes_alg);
 	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 4b8c96cab252..7cbfaf080a59 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -55,6 +55,10 @@ enum crypt_s390_km_func {
 	KM_AES_192_DECRYPT  = CRYPT_S390_KM | 0x13 | 0x80,
 	KM_AES_256_ENCRYPT  = CRYPT_S390_KM | 0x14,
 	KM_AES_256_DECRYPT  = CRYPT_S390_KM | 0x14 | 0x80,
+	KM_XTS_128_ENCRYPT  = CRYPT_S390_KM | 0x32,
+	KM_XTS_128_DECRYPT  = CRYPT_S390_KM | 0x32 | 0x80,
+	KM_XTS_256_ENCRYPT  = CRYPT_S390_KM | 0x34,
+	KM_XTS_256_DECRYPT  = CRYPT_S390_KM | 0x34 | 0x80,
 };
 
 /*
@@ -334,4 +338,31 @@ static inline int crypt_s390_func_available(int func,
 	return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
 }
 
+/**
+ * crypt_s390_pcc:
+ * @func: the function code passed to KM; see crypt_s390_km_func
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for success.
+ */
+static inline int crypt_s390_pcc(long func, void *param)
+{
+	register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */
+	register void *__param asm("1") = param;
+	int ret = -1;
+
+	asm volatile(
+		"0:	.insn	rre,0xb92c0000,0,0 \n" /* PCC opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		: "+d" (ret)
+		: "d" (__func), "a" (__param) : "cc", "memory");
+	return ret;
+}
+
+
 #endif	/* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 7957acbf76a2..78df36adbe6c 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -131,20 +131,14 @@ config CRYPTO_AES_S390
 	select CRYPTO_BLKCIPHER
 	help
 	  This is the s390 hardware accelerated implementation of the
-	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
-	  algorithm.
-
-	  Rijndael appears to be consistently a very good performer in
-	  both hardware and software across a wide range of computing
-	  environments regardless of its use in feedback or non-feedback
-	  modes. Its key setup time is excellent, and its key agility is
-	  good. Rijndael's very low memory requirements make it very well
-	  suited for restricted-space environments, in which it also
-	  demonstrates excellent performance. Rijndael's operations are
-	  among the easiest to defend against power and timing attacks.
-
-	  On s390 the System z9-109 currently only supports the key size
-	  of 128 bit.
+	  AES cipher algorithms (FIPS-197).
+
+	  As of z9 the ECB and CBC modes are hardware accelerated
+	  for 128 bit keys.
+	  As of z10 the ECB and CBC modes are hardware accelerated
+	  for all AES key sizes.
+	  As of z196 the XTS mode is hardware accelerated for 256 and
+	  512 bit keys.
 
 config S390_PRNG
 	tristate "Pseudo random number generator device driver"
-- 
cgit v1.2.3


From df1309ce955a490eac6697a41159b43e24d35995 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Tue, 19 Apr 2011 21:29:18 +0200
Subject: crypto: s390 - add System z hardware support for GHASH

This patch adds System z hardware acceleration support for the GHASH
algorithm for GCM (Galois/Counter Mode).
The hardware support is available beginning with System z196.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/Makefile     |   1 +
 arch/s390/crypto/crypt_s390.h |   1 +
 arch/s390/crypto/ghash_s390.c | 162 ++++++++++++++++++++++++++++++++++++++++++
 drivers/crypto/Kconfig        |  10 +++
 4 files changed, 174 insertions(+)
 create mode 100644 arch/s390/crypto/ghash_s390.c

(limited to 'arch')

diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 1cf81d77c5a5..7f0b7cda6259 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
 obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 7cbfaf080a59..eb2e39186b6b 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -91,6 +91,7 @@ enum crypt_s390_kimd_func {
 	KIMD_SHA_1   = CRYPT_S390_KIMD | 1,
 	KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
 	KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
+	KIMD_GHASH   = CRYPT_S390_KIMD | 65,
 };
 
 /*
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 000000000000..b1bd170f24b1
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,162 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+
+#include "crypt_s390.h"
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_ctx {
+	u8 icv[16];
+	u8 key[16];
+};
+
+struct ghash_desc_ctx {
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx, 0, sizeof(*dctx));
+
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+	memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	unsigned int n;
+	u8 *buf = dctx->buffer;
+	int ret;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		n = min(srclen, dctx->bytes);
+		dctx->bytes -= n;
+		srclen -= n;
+
+		memcpy(pos, src, n);
+		src += n;
+
+		if (!dctx->bytes) {
+			ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+					      GHASH_BLOCK_SIZE);
+			BUG_ON(ret != GHASH_BLOCK_SIZE);
+		}
+	}
+
+	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+	if (n) {
+		ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+		BUG_ON(ret != n);
+		src += n;
+		srclen -= n;
+	}
+
+	if (srclen) {
+		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+		memcpy(buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+{
+	u8 *buf = dctx->buffer;
+	int ret;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		memset(pos, 0, dctx->bytes);
+
+		ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+		BUG_ON(ret != GHASH_BLOCK_SIZE);
+	}
+
+	dctx->bytes = 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+	ghash_flush(ctx, dctx);
+	memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-s390",
+		.cra_priority		= CRYPT_S390_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
+	},
+};
+
+static int __init ghash_mod_init(void)
+{
+	if (!crypt_s390_func_available(KIMD_GHASH,
+				       CRYPT_S390_MSA | CRYPT_S390_MSA4))
+		return -EOPNOTSUPP;
+
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 78df36adbe6c..d459cc7894d9 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -151,6 +151,16 @@ config S390_PRNG
 	  ANSI X9.17 standard. The PRNG is usable via the char device
 	  /dev/prandom.
 
+config CRYPTO_GHASH_S390
+	tristate "GHASH digest algorithm"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  This is the s390 hardware accelerated implementation of the
+	  GHASH message digest algorithm for GCM (Galois/Counter Mode).
+
+	  It is available as of z196.
+
 config CRYPTO_DEV_MV_CESA
 	tristate "Marvell's Cryptographic Engine"
 	depends on PLAT_ORION
-- 
cgit v1.2.3


From 0200f3ecc19660bebeabbcbaf212957fcf1dbf8f Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Wed, 4 May 2011 15:09:44 +1000
Subject: crypto: s390 - add System z hardware support for CTR mode

This patch adds System z hardware acceleration support for AES, DES
and 3DES in CTR mode. The hardware support is available starting with
System z196.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/aes_s390.c   | 146 +++++++++++++++++++++++++++++++++++-
 arch/s390/crypto/crypt_s390.h |  66 ++++++++++++++++-
 arch/s390/crypto/des_s390.c   | 169 +++++++++++++++++++++++++++++++++++++++++-
 drivers/crypto/Kconfig        |   8 +-
 4 files changed, 383 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 8230e8605deb..a9ce135893f8 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -31,7 +31,8 @@
 #define AES_KEYLEN_192		2
 #define AES_KEYLEN_256		4
 
-static char keylen_flag = 0;
+static u8 *ctrblk;
+static char keylen_flag;
 
 struct s390_aes_ctx {
 	u8 iv[AES_BLOCK_SIZE];
@@ -724,9 +725,128 @@ static struct crypto_alg xts_aes_alg = {
 	}
 };
 
+static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KMCTR_AES_128_ENCRYPT;
+		sctx->dec = KMCTR_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KMCTR_AES_192_ENCRYPT;
+		sctx->dec = KMCTR_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KMCTR_AES_256_ENCRYPT;
+		sctx->dec = KMCTR_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+	unsigned int i, n, nbytes;
+	u8 buf[AES_BLOCK_SIZE];
+	u8 *out, *in;
+
+	if (!walk->nbytes)
+		return ret;
+
+	memcpy(ctrblk, walk->iv, AES_BLOCK_SIZE);
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		while (nbytes >= AES_BLOCK_SIZE) {
+			/* only use complete blocks, max. PAGE_SIZE */
+			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
+						 nbytes & ~(AES_BLOCK_SIZE - 1);
+			for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+				memcpy(ctrblk + i, ctrblk + i - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+				crypto_inc(ctrblk + i, AES_BLOCK_SIZE);
+			}
+			ret = crypt_s390_kmctr(func, sctx->key, out, in, n, ctrblk);
+			BUG_ON(ret < 0 || ret != n);
+			if (n > AES_BLOCK_SIZE)
+				memcpy(ctrblk, ctrblk + n - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+			crypto_inc(ctrblk, AES_BLOCK_SIZE);
+			out += n;
+			in += n;
+			nbytes -= n;
+		}
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+	/*
+	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
+	 */
+	if (nbytes) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		ret = crypt_s390_kmctr(func, sctx->key, buf, in,
+				       AES_BLOCK_SIZE, ctrblk);
+		BUG_ON(ret < 0 || ret != AES_BLOCK_SIZE);
+		memcpy(out, buf, nbytes);
+		crypto_inc(ctrblk, AES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, 0);
+	}
+	memcpy(walk->iv, ctrblk, AES_BLOCK_SIZE);
+	return ret;
+}
+
+static int ctr_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+}
+
+static int ctr_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+}
+
+static struct crypto_alg ctr_aes_alg = {
+	.cra_name		=	"ctr(aes)",
+	.cra_driver_name	=	"ctr-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ctr_aes_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ctr_aes_set_key,
+			.encrypt		=	ctr_aes_encrypt,
+			.decrypt		=	ctr_aes_decrypt,
+		}
+	}
+};
+
 static int __init aes_s390_init(void)
 {
-	unsigned long long facility_bits[2];
 	int ret;
 
 	if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
@@ -765,9 +885,29 @@ static int __init aes_s390_init(void)
 			goto xts_aes_err;
 	}
 
+	if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_AES_192_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_AES_256_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto ctr_aes_err;
+		}
+		ret = crypto_register_alg(&ctr_aes_alg);
+		if (ret) {
+			free_page((unsigned long) ctrblk);
+			goto ctr_aes_err;
+		}
+	}
+
 out:
 	return ret;
 
+ctr_aes_err:
+	crypto_unregister_alg(&xts_aes_alg);
 xts_aes_err:
 	crypto_unregister_alg(&cbc_aes_alg);
 cbc_aes_err:
@@ -780,6 +920,8 @@ aes_err:
 
 static void __exit aes_s390_fini(void)
 {
+	crypto_unregister_alg(&ctr_aes_alg);
+	free_page((unsigned long) ctrblk);
 	crypto_unregister_alg(&xts_aes_alg);
 	crypto_unregister_alg(&cbc_aes_alg);
 	crypto_unregister_alg(&ecb_aes_alg);
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index eb2e39186b6b..49676771bd66 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -34,7 +34,8 @@ enum crypt_s390_operations {
 	CRYPT_S390_KMC  = 0x0200,
 	CRYPT_S390_KIMD = 0x0300,
 	CRYPT_S390_KLMD = 0x0400,
-	CRYPT_S390_KMAC = 0x0500
+	CRYPT_S390_KMAC = 0x0500,
+	CRYPT_S390_KMCTR = 0x0600
 };
 
 /*
@@ -82,6 +83,26 @@ enum crypt_s390_kmc_func {
 	KMC_PRNG	     = CRYPT_S390_KMC | 0x43,
 };
 
+/*
+ * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+enum crypt_s390_kmctr_func {
+	KMCTR_QUERY            = CRYPT_S390_KMCTR | 0x0,
+	KMCTR_DEA_ENCRYPT      = CRYPT_S390_KMCTR | 0x1,
+	KMCTR_DEA_DECRYPT      = CRYPT_S390_KMCTR | 0x1 | 0x80,
+	KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2,
+	KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80,
+	KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3,
+	KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80,
+	KMCTR_AES_128_ENCRYPT  = CRYPT_S390_KMCTR | 0x12,
+	KMCTR_AES_128_DECRYPT  = CRYPT_S390_KMCTR | 0x12 | 0x80,
+	KMCTR_AES_192_ENCRYPT  = CRYPT_S390_KMCTR | 0x13,
+	KMCTR_AES_192_DECRYPT  = CRYPT_S390_KMCTR | 0x13 | 0x80,
+	KMCTR_AES_256_ENCRYPT  = CRYPT_S390_KMCTR | 0x14,
+	KMCTR_AES_256_DECRYPT  = CRYPT_S390_KMCTR | 0x14 | 0x80,
+};
+
 /*
  * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
  * instruction
@@ -292,6 +313,45 @@ static inline int crypt_s390_kmac(long func, void *param,
 	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
+/**
+ * crypt_s390_kmctr:
+ * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
+				 const u8 *src, long src_len, u8 *counter)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	register u8 *__dest asm("4") = dest;
+	register u8 *__ctr asm("6") = counter;
+	int ret = -1;
+
+	asm volatile(
+		"0:	.insn	rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		: "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
+		  "+a" (__ctr)
+		: "d" (__func), "a" (__param) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
 /**
  * crypt_s390_func_available:
  * @func: the function code of the specific function; 0 if op in general
@@ -329,6 +389,10 @@ static inline int crypt_s390_func_available(int func,
 	case CRYPT_S390_KMAC:
 		ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
 		break;
+	case CRYPT_S390_KMCTR:
+		ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
+				       NULL);
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index c36a01d70e04..a52bfd124d86 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -3,7 +3,7 @@
  *
  * s390 implementation of the DES Cipher Algorithm.
  *
- * Copyright IBM Corp. 2003,2007
+ * Copyright IBM Corp. 2003,2011
  * Author(s): Thomas Spatzier
  *	      Jan Glauber (jan.glauber@de.ibm.com)
  *
@@ -24,6 +24,8 @@
 
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
 
+static u8 *ctrblk;
+
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES3_KEY_SIZE];
@@ -370,6 +372,143 @@ static struct crypto_alg cbc_des3_alg = {
 	}
 };
 
+static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
+			    struct s390_des_ctx *ctx, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+	unsigned int i, n, nbytes;
+	u8 buf[DES_BLOCK_SIZE];
+	u8 *out, *in;
+
+	memcpy(ctrblk, walk->iv, DES_BLOCK_SIZE);
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		while (nbytes >= DES_BLOCK_SIZE) {
+			/* align to block size, max. PAGE_SIZE */
+			n = (nbytes > PAGE_SIZE) ? PAGE_SIZE :
+				nbytes & ~(DES_BLOCK_SIZE - 1);
+			for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+				memcpy(ctrblk + i, ctrblk + i - DES_BLOCK_SIZE,
+				       DES_BLOCK_SIZE);
+				crypto_inc(ctrblk + i, DES_BLOCK_SIZE);
+			}
+			ret = crypt_s390_kmctr(func, ctx->key, out, in, n, ctrblk);
+			BUG_ON((ret < 0) || (ret != n));
+			if (n > DES_BLOCK_SIZE)
+				memcpy(ctrblk, ctrblk + n - DES_BLOCK_SIZE,
+				       DES_BLOCK_SIZE);
+			crypto_inc(ctrblk, DES_BLOCK_SIZE);
+			out += n;
+			in += n;
+			nbytes -= n;
+		}
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+	if (nbytes) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		ret = crypt_s390_kmctr(func, ctx->key, buf, in,
+				       DES_BLOCK_SIZE, ctrblk);
+		BUG_ON(ret < 0 || ret != DES_BLOCK_SIZE);
+		memcpy(out, buf, nbytes);
+		crypto_inc(ctrblk, DES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, 0);
+	}
+	memcpy(walk->iv, ctrblk, DES_BLOCK_SIZE);
+	return ret;
+}
+
+static int ctr_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des_alg = {
+	.cra_name		=	"ctr(des)",
+	.cra_driver_name	=	"ctr-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ctr_des_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	ctr_des_encrypt,
+			.decrypt		=	ctr_des_decrypt,
+		}
+	}
+};
+
+static int ctr_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des3_alg = {
+	.cra_name		=	"ctr(des3_ede)",
+	.cra_driver_name	=	"ctr-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(ctr_des3_alg.cra_list),
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	ctr_des3_encrypt,
+			.decrypt		=	ctr_des3_decrypt,
+		}
+	}
+};
+
 static int __init des_s390_init(void)
 {
 	int ret;
@@ -396,9 +535,32 @@ static int __init des_s390_init(void)
 	ret = crypto_register_alg(&cbc_des3_alg);
 	if (ret)
 		goto cbc_des3_err;
+
+	if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ret = crypto_register_alg(&ctr_des_alg);
+		if (ret)
+			goto ctr_des_err;
+		ret = crypto_register_alg(&ctr_des3_alg);
+		if (ret)
+			goto ctr_des3_err;
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto ctr_mem_err;
+		}
+	}
 out:
 	return ret;
 
+ctr_mem_err:
+	crypto_unregister_alg(&ctr_des3_alg);
+ctr_des3_err:
+	crypto_unregister_alg(&ctr_des_alg);
+ctr_des_err:
+	crypto_unregister_alg(&cbc_des3_alg);
 cbc_des3_err:
 	crypto_unregister_alg(&ecb_des3_alg);
 ecb_des3_err:
@@ -415,6 +577,11 @@ des_err:
 
 static void __exit des_s390_exit(void)
 {
+	if (ctrblk) {
+		crypto_unregister_alg(&ctr_des_alg);
+		crypto_unregister_alg(&ctr_des3_alg);
+		free_page((unsigned long) ctrblk);
+	}
 	crypto_unregister_alg(&cbc_des3_alg);
 	crypto_unregister_alg(&ecb_des3_alg);
 	crypto_unregister_alg(&des3_alg);
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index dc601ff4bcfd..c64c3807f516 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -119,9 +119,12 @@ config CRYPTO_DES_S390
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	help
-	  This us the s390 hardware accelerated implementation of the
+	  This is the s390 hardware accelerated implementation of the
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
+	  As of z990 the ECB and CBC mode are hardware accelerated.
+	  As of z196 the CTR mode is hardware accelerated.
+
 config CRYPTO_AES_S390
 	tristate "AES cipher algorithms"
 	depends on S390
@@ -135,7 +138,8 @@ config CRYPTO_AES_S390
 	  for 128 bit keys.
 	  As of z10 the ECB and CBC modes are hardware accelerated
 	  for all AES key sizes.
-	  As of z196 the XTS mode is hardware accelerated for 256 and
+	  As of z196 the CTR mode is hardware accelerated for all AES
+	  key sizes and XTS mode is hardware accelerated for 256 and
 	  512 bit keys.
 
 config S390_PRNG
-- 
cgit v1.2.3


From a32e252f7cdfb3675a4e50215cfac356ed8952c4 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Wed, 6 Apr 2011 18:23:29 +0000
Subject: powerpc: Use new CPU feature bit to select 2.06 tlbie

This removes MMU_FTR_TLBIE_206 as we can now use CPU_FTR_HVMODE_206.  It
also changes the logic to select which tlbie to use to be based on this
new CPU feature bit.

This also duplicates the ASM_FTR_IF/SET/CLR defines for CPU features
(copied from MMU features).

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/feature-fixups.h | 13 +++++++++++++
 arch/powerpc/include/asm/mmu.h            |  8 +-------
 arch/powerpc/mm/hash_native_64.c          | 10 ++++------
 3 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index bdc0d6877bce..9a67a38bf7b9 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -146,6 +146,19 @@ label##5:							\
 
 #ifndef __ASSEMBLY__
 
+#define ASM_FTR_IF(section_if, section_else, msk, val)	\
+	stringify_in_c(BEGIN_FTR_SECTION)			\
+	section_if "; "						\
+	stringify_in_c(FTR_SECTION_ELSE)			\
+	section_else "; "					\
+	stringify_in_c(ALT_FTR_SECTION_END((msk), (val)))
+
+#define ASM_FTR_IFSET(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), (msk))
+
+#define ASM_FTR_IFCLR(section_if, section_else, msk)	\
+	ASM_FTR_IF(section_if, section_else, (msk), 0)
+
 #define ASM_MMU_FTR_IF(section_if, section_else, msk, val)	\
 	stringify_in_c(BEGIN_MMU_FTR_SECTION)			\
 	section_if "; "						\
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index a39304b74f84..4138b21ae80a 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -56,11 +56,6 @@
  */
 #define MMU_FTR_NEED_DTLB_SW_LRU	ASM_CONST(0x00200000)
 
-/* This indicates that the processor uses the ISA 2.06 server tlbie
- * mnemonics
- */
-#define MMU_FTR_TLBIE_206		ASM_CONST(0x00400000)
-
 /* Enable use of TLB reservation.  Processor should support tlbsrx.
  * instruction and MAS0[WQ].
  */
@@ -105,8 +100,7 @@
 #define MMU_FTRS_PPC970		MMU_FTRS_POWER4
 #define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_POWER6		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | \
-				MMU_FTR_TLBIE_206
+#define MMU_FTRS_POWER7		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_CELL		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
 				MMU_FTR_CI_LARGE_PAGE
 #define MMU_FTRS_PA6T		MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index c23eef2b81a6..dfd764896db0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -50,9 +50,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 	case MMU_PAGE_4K:
 		va &= ~0xffful;
 		va |= ssize << 8;
-		asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0),
-					       %2)
-			     : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
 			     : "memory");
 		break;
 	default:
@@ -61,9 +60,8 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 		va |= penc << 12;
 		va |= ssize << 8;
 		va |= 1; /* L */
-		asm volatile(ASM_MMU_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0),
-					       %2)
-			     : : "r" (va), "r"(0), "i" (MMU_FTR_TLBIE_206)
+		asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
+			     : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206)
 			     : "memory");
 		break;
 	}
-- 
cgit v1.2.3


From 851d2e2fe8dbcbe3afcad6fc4569c881d8ad4ce9 Mon Sep 17 00:00:00 2001
From: "Tseng-Hui (Frank) Lin" <thlin@linux.vnet.ibm.com>
Date: Mon, 2 May 2011 20:43:04 +0000
Subject: powerpc: Add Initiate Coprocessor Store Word (icswx) support

Icswx is a PowerPC instruction to send data to a co-processor. On Book-S
processors the LPAR_ID and process ID (PID) of the owning process are
registered in the window context of the co-processor at initialization
time. When the icswx instruction is executed the L2 generates a cop-reg
transaction on PowerBus. The transaction has no address and the
processor does not perform an MMU access to authenticate the transaction.
The co-processor compares the LPAR_ID and the PID included in the
transaction and the LPAR_ID and PID held in the window context to
determine if the process is authorized to generate the transaction.

The OS needs to assign a 16-bit PID for the process. This cop-PID needs
to be updated during context switch. The cop-PID needs to be destroyed
when the context is destroyed.

Signed-off-by: Sonny Rao <sonnyrao@linux.vnet.ibm.com>
Signed-off-by: Tseng-Hui (Frank) Lin <thlin@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h    |   4 +-
 arch/powerpc/include/asm/mmu-hash64.h  |   6 +
 arch/powerpc/include/asm/mmu_context.h |  10 ++
 arch/powerpc/include/asm/reg.h         |   1 +
 arch/powerpc/mm/mmu_context_hash64.c   | 211 +++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig.cputype |  18 +++
 6 files changed, 249 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 3db2476704d6..a3e1a9e96a7f 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -197,6 +197,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
 #define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
 #define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)
+#define CPU_FTR_ICSWX			LONG_ASM_CONST(0x1000000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -418,7 +419,8 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_ICSWX)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index ae7b3efec8e5..d865bd909c7d 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -408,6 +408,7 @@ static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
 typedef unsigned long mm_context_id_t;
+struct spinlock;
 
 typedef struct {
 	mm_context_id_t id;
@@ -423,6 +424,11 @@ typedef struct {
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 	struct subpage_prot_table spt;
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
+#ifdef CONFIG_PPC_ICSWX
+	struct spinlock *cop_lockp; /* guard acop and cop_pid */
+	unsigned long acop;	/* mask of enabled coprocessor types */
+	unsigned int cop_pid;	/* pid value used with coprocessors */
+#endif /* CONFIG_PPC_ICSWX */
 } mm_context_t;
 
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 8e13f65b498c..a73668a5f30d 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -32,6 +32,10 @@ extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
 #endif
 
+extern void switch_cop(struct mm_struct *next);
+extern int use_cop(unsigned long acop, struct mm_struct *mm);
+extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+
 /*
  * switch_mm is the entry point called from the architecture independent
  * code in kernel/sched.c
@@ -55,6 +59,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	if (prev == next)
 		return;
 
+#ifdef CONFIG_PPC_ICSWX
+	/* Switch coprocessor context only if prev or next uses a coprocessor */
+	if (prev->context.acop || next->context.acop)
+		switch_cop(next);
+#endif /* CONFIG_PPC_ICSWX */
+
 	/* We must stop all altivec streams before changing the HW
 	 * context
 	 */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1f9ac12742e6..632e78e14441 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -188,6 +188,7 @@
 
 #define SPRN_CTR	0x009	/* Count Register */
 #define SPRN_DSCR	0x11
+#define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
 #define   CTRL_CT	0xc0000000	/* current thread */
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index c5859448a0ab..c517815404c4 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -20,9 +20,205 @@
 #include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_PPC_ICSWX
+/*
+ * The processor and its L2 cache cause the icswx instruction to
+ * generate a COP_REQ transaction on PowerBus. The transaction has
+ * no address, and the processor does not perform an MMU access
+ * to authenticate the transaction. The command portion of the
+ * PowerBus COP_REQ transaction includes the LPAR_ID (LPID) and
+ * the coprocessor Process ID (PID), which the coprocessor compares
+ * to the authorized LPID and PID held in the coprocessor, to determine
+ * if the process is authorized to generate the transaction.
+ * The data of the COP_REQ transaction is 128-byte or less and is
+ * placed in cacheable memory on a 128-byte cache line boundary.
+ *
+ * The task to use a coprocessor should use use_cop() to allocate
+ * a coprocessor PID before executing icswx instruction. use_cop()
+ * also enables the coprocessor context switching. Drop_cop() is
+ * used to free the coprocessor PID.
+ *
+ * Example:
+ * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
+ * Each HFI have multiple windows. Each HFI window serves as a
+ * network device sending to and receiving from HFI network.
+ * HFI immediate send function uses icswx instruction. The immediate
+ * send function allows small (single cache-line) packets be sent
+ * without using the regular HFI send FIFO and doorbell, which are
+ * much slower than immediate send.
+ *
+ * For each task intending to use HFI immediate send, the HFI driver
+ * calls use_cop() to obtain a coprocessor PID for the task.
+ * The HFI driver then allocate a free HFI window and save the
+ * coprocessor PID to the HFI window to allow the task to use the
+ * HFI window.
+ *
+ * The HFI driver repeatedly creates immediate send packets and
+ * issues icswx instruction to send data through the HFI window.
+ * The HFI compares the coprocessor PID in the CPU PID register
+ * to the PID held in the HFI window to determine if the transaction
+ * is allowed.
+ *
+ * When the task to release the HFI window, the HFI driver calls
+ * drop_cop() to release the coprocessor PID.
+ */
+
+#define COP_PID_NONE 0
+#define COP_PID_MIN (COP_PID_NONE + 1)
+#define COP_PID_MAX (0xFFFF)
+
+static DEFINE_SPINLOCK(mmu_context_acop_lock);
+static DEFINE_IDA(cop_ida);
+
+void switch_cop(struct mm_struct *next)
+{
+	mtspr(SPRN_PID, next->context.cop_pid);
+	mtspr(SPRN_ACOP, next->context.acop);
+}
+
+static int new_cop_pid(struct ida *ida, int min_id, int max_id,
+		       spinlock_t *lock)
+{
+	int index;
+	int err;
+
+again:
+	if (!ida_pre_get(ida, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(lock);
+	err = ida_get_new_above(ida, min_id, &index);
+	spin_unlock(lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > max_id) {
+		spin_lock(lock);
+		ida_remove(ida, index);
+		spin_unlock(lock);
+		return -ENOMEM;
+	}
+
+	return index;
+}
+
+static void sync_cop(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (mm == current->active_mm)
+		switch_cop(current->active_mm);
+}
+
+/**
+ * Start using a coprocessor.
+ * @acop: mask of coprocessor to be used.
+ * @mm: The mm the coprocessor to associate with. Most likely current mm.
+ *
+ * Return a positive PID if successful. Negative errno otherwise.
+ * The returned PID will be fed to the coprocessor to determine if an
+ * icswx transaction is authenticated.
+ */
+int use_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int ret;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return -ENODEV;
+
+	if (!mm || !acop)
+		return -EINVAL;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	if (mm->context.cop_pid == COP_PID_NONE) {
+		ret = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
+				  &mmu_context_acop_lock);
+		if (ret < 0)
+			goto out;
+
+		mm->context.cop_pid = ret;
+	}
+	mm->context.acop |= acop;
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	ret = mm->context.cop_pid;
+
+out:
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(use_cop);
+
+/**
+ * Stop using a coprocessor.
+ * @acop: mask of coprocessor to be stopped.
+ * @mm: The mm the coprocessor associated with.
+ */
+void drop_cop(unsigned long acop, struct mm_struct *mm)
+{
+	int free_pid = COP_PID_NONE;
+
+	if (!cpu_has_feature(CPU_FTR_ICSWX))
+		return;
+
+	if (WARN_ON_ONCE(!mm))
+		return;
+
+	/* We need to make sure mm_users doesn't change */
+	down_read(&mm->mmap_sem);
+	spin_lock(mm->context.cop_lockp);
+
+	mm->context.acop &= ~acop;
+
+	if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
+		free_pid = mm->context.cop_pid;
+		mm->context.cop_pid = COP_PID_NONE;
+	}
+
+	sync_cop(mm);
+
+	/*
+	 * If this is a threaded process then there might be other threads
+	 * running. We need to send an IPI to force them to pick up any
+	 * change in PID and ACOP.
+	 */
+	if (atomic_read(&mm->mm_users) > 1)
+		smp_call_function(sync_cop, mm, 1);
+
+	if (free_pid != COP_PID_NONE) {
+		spin_lock(&mmu_context_acop_lock);
+		ida_remove(&cop_ida, free_pid);
+		spin_unlock(&mmu_context_acop_lock);
+	}
+
+	spin_unlock(mm->context.cop_lockp);
+	up_read(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(drop_cop);
+
+#endif /* CONFIG_PPC_ICSWX */
+
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
@@ -78,6 +274,16 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 		slice_set_user_psize(mm, mmu_virtual_psize);
 	subpage_prot_init_new_context(mm);
 	mm->context.id = index;
+#ifdef CONFIG_PPC_ICSWX
+	mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+	if (!mm->context.cop_lockp) {
+		__destroy_context(index);
+		subpage_prot_free(mm);
+		mm->context.id = NO_CONTEXT;
+		return -ENOMEM;
+	}
+	spin_lock_init(mm->context.cop_lockp);
+#endif /* CONFIG_PPC_ICSWX */
 
 	return 0;
 }
@@ -92,6 +298,11 @@ EXPORT_SYMBOL_GPL(__destroy_context);
 
 void destroy_context(struct mm_struct *mm)
 {
+#ifdef CONFIG_PPC_ICSWX
+	drop_cop(mm->context.acop, mm);
+	kfree(mm->context.cop_lockp);
+	mm->context.cop_lockp = NULL;
+#endif /* CONFIG_PPC_ICSWX */
 	__destroy_context(mm->context.id);
 	subpage_prot_free(mm);
 	mm->context.id = MMU_NO_CONTEXT;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 7c1e1c64437b..a1e623822a30 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -230,6 +230,24 @@ config VSX
 
 	  If in doubt, say Y here.
 
+config PPC_ICSWX
+	bool "Support for PowerPC icswx coprocessor instruction"
+	depends on POWER4
+	default n
+	---help---
+
+	  This option enables kernel support for the PowerPC Initiate
+	  Coprocessor Store Word (icswx) coprocessor instruction on POWER7
+	  or newer processors.
+
+	  This option is only useful if you have a processor that supports
+	  the icswx coprocessor instruction. It does not have any effect
+	  on processors without the icswx coprocessor instruction.
+
+	  This option slightly increases kernel memory usage.
+
+	  If in doubt, say N here.
+
 config SPE
 	bool "SPE Support"
 	depends on E200 || (E500 && !PPC_E500MC)
-- 
cgit v1.2.3


From 1977b502120d44b9b4897703adfb2e2fab346880 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 1 May 2011 19:46:44 +0000
Subject: powerpc: Save register r9-r13 values accurately on interrupt with bad
 stack

When we take an interrupt or exception from kernel mode and the stack
pointer is obviously not a kernel address (i.e. the top bit is 0), we
switch to an emergency stack, save register values and panic.  However,
on 64-bit server machines, we don't actually save the values of r9 - r13
at the time of the interrupt, but rather values corrupted by the
exception entry code for r12-r13, and nothing at all for r9-r11.

This fixes it by passing a pointer to the register save area in the paca
through to the bad_stack code in r3.  The register values are saved in
one of the paca register save areas (depending on which exception this
is).  Using the pointer in r3, the bad_stack code now retrieves the
saved values of r9 - r13 and stores them in the exception frame on the
emergency stack.  This also stores the normal exception frame marker
("regshere") in the exception frame.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h |  7 ++++---
 arch/powerpc/kernel/exceptions-64s.S     | 20 +++++++++++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index d6b4849df9b1..96ccef136ca9 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -104,10 +104,11 @@
 	beq-	1f;							   \
 	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
 1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
-	bge-	cr1,2f;			/* abort if it is		*/ \
-	b	3f;							   \
-2:	li	r1,(n);			/* will be reloaded later	*/ \
+	blt+	cr1,3f;			/* abort if it is		*/ \
+	li	r1,(n);			/* will be reloaded later	*/ \
 	sth	r1,PACA_TRAP_SAVE(r13);					   \
+	std	r3,area+EX_R3(r13);					   \
+	addi	r3,r13,area;		/* r3 -> where regs are saved*/	   \
 	b	bad_stack;						   \
 3:	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
 	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 226cc8c62224..27ca8b7c6002 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -461,9 +461,20 @@ bad_stack:
 	std	r12,_XER(r1)
 	SAVE_GPR(0,r1)
 	SAVE_GPR(2,r1)
-	SAVE_4GPRS(3,r1)
-	SAVE_2GPRS(7,r1)
-	SAVE_10GPRS(12,r1)
+	ld	r10,EX_R3(r3)
+	std	r10,GPR3(r1)
+	SAVE_GPR(4,r1)
+	SAVE_4GPRS(5,r1)
+	ld	r9,EX_R9(r3)
+	ld	r10,EX_R10(r3)
+	SAVE_2GPRS(9,r1)
+	ld	r9,EX_R11(r3)
+	ld	r10,EX_R12(r3)
+	ld	r11,EX_R13(r3)
+	std	r9,GPR11(r1)
+	std	r10,GPR12(r1)
+	std	r11,GPR13(r1)
+	SAVE_8GPRS(14,r1)
 	SAVE_10GPRS(22,r1)
 	lhz	r12,PACA_TRAP_SAVE(r13)
 	std	r12,_TRAP(r1)
@@ -472,6 +483,9 @@ bad_stack:
 	li	r12,0
 	std	r12,0(r11)
 	ld	r2,PACATOC(r13)
+	ld	r11,exception_marker@toc(r2)
+	std	r12,RESULT(r1)
+	std	r11,STACK_FRAME_OVERHEAD-16(r1)
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.kernel_bad_stack
 	b	1b
-- 
cgit v1.2.3


From 48404f2e95ef0ffd8134d89c8abcd1a15e15f1b0 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 1 May 2011 19:48:20 +0000
Subject: powerpc: Save Come-From Address Register (CFAR) in exception frame

Recent 64-bit server processors (POWER6 and POWER7) have a "Come-From
Address Register" (CFAR), that records the address of the most recent
branch or rfid (return from interrupt) instruction for debugging purposes.

This saves the value of the CFAR in the exception entry code and stores
it in the exception frame.  We also make xmon print the CFAR value in
its register dump code.

Rather than extend the pt_regs struct at this time, we steal the orig_gpr3
field, which is only used for system calls, and use it for the CFAR value
for all exceptions/interrupts other than system calls.  This means we
don't save the CFAR on system calls, which is not a great problem since
system calls tend not to happen unexpectedly, and also avoids adding the
overhead of reading the CFAR to the system call entry path.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h      | 5 +++--
 arch/powerpc/include/asm/exception-64s.h | 9 +++++++++
 arch/powerpc/include/asm/paca.h          | 6 +++---
 arch/powerpc/include/asm/reg.h           | 1 +
 arch/powerpc/kernel/exceptions-64s.S     | 4 ++++
 arch/powerpc/xmon/xmon.c                 | 4 ++++
 6 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index a3e1a9e96a7f..4efbfb3f3254 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -180,6 +180,7 @@ extern const char *powerpc_base_platform;
 
 
 #define CPU_FTR_HVMODE_206		LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_CFAR			LONG_ASM_CONST(0x0000001000000000)
 #define CPU_FTR_IABR			LONG_ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
@@ -412,7 +413,7 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
+	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR)
 #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -420,7 +421,7 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
-	    CPU_FTR_ICSWX)
+	    CPU_FTR_ICSWX | CPU_FTR_CFAR)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 96ccef136ca9..f5dfe3411f64 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -46,6 +46,7 @@
 #define EX_CCR		60
 #define EX_R3		64
 #define EX_LR		72
+#define EX_CFAR		80
 
 /*
  * We're short on space and time in the exception prolog, so we can't
@@ -66,6 +67,10 @@
 	std	r10,area+EX_R10(r13);					\
 	std	r11,area+EX_R11(r13);					\
 	std	r12,area+EX_R12(r13);					\
+	BEGIN_FTR_SECTION_NESTED(66);					\
+	mfspr	r10,SPRN_CFAR;						\
+	std	r10,area+EX_CFAR(r13);					\
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		\
 	GET_SCRATCH0(r9);						\
 	std	r9,area+EX_R13(r13);					\
 	mfcr	r9
@@ -130,6 +135,10 @@
 	std	r9,GPR11(r1);						   \
 	std	r10,GPR12(r1);						   \
 	std	r11,GPR13(r1);						   \
+	BEGIN_FTR_SECTION_NESTED(66);					   \
+	ld	r10,area+EX_CFAR(r13);					   \
+	std	r10,ORIG_GPR3(r1);					   \
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		   \
 	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
 	mflr	r9;			/* save LR in stackframe	*/ \
 	std	r9,_LINK(r1);						   \
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 65c13c48db43..74126765106a 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -92,9 +92,9 @@ struct paca_struct {
 	 * Now, starting in cacheline 2, the exception save areas
 	 */
 	/* used for most interrupts/exceptions */
-	u64 exgen[10] __attribute__((aligned(0x80)));
-	u64 exmc[10];		/* used for machine checks */
-	u64 exslb[10];		/* used for SLB/segment table misses
+	u64 exgen[11] __attribute__((aligned(0x80)));
+	u64 exmc[11];		/* used for machine checks */
+	u64 exslb[11];		/* used for SLB/segment table misses
  				 * on the linear mapping */
 	/* SLB related definitions */
 	u16 vmalloc_sllp;
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 632e78e14441..fdec59333053 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -188,6 +188,7 @@
 
 #define SPRN_CTR	0x009	/* Count Register */
 #define SPRN_DSCR	0x11
+#define SPRN_CFAR	0x1c	/* Come From Address Register */
 #define SPRN_ACOP	0x1F	/* Available Coprocessor Register */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 27ca8b7c6002..0ec3b42717d7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -474,6 +474,10 @@ bad_stack:
 	std	r9,GPR11(r1)
 	std	r10,GPR12(r1)
 	std	r11,GPR13(r1)
+BEGIN_FTR_SECTION
+	ld	r10,EX_CFAR(r3)
+	std	r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	SAVE_8GPRS(14,r1)
 	SAVE_10GPRS(22,r1)
 	lhz	r12,PACA_TRAP_SAVE(r13)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 60593ad861e8..909804aaeebb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1497,6 +1497,10 @@ static void prregs(struct pt_regs *fp)
 #endif
 	printf("pc  = ");
 	xmon_print_symbol(fp->nip, " ", "\n");
+	if (TRAP(fp) != 0xc00 && cpu_has_feature(CPU_FTR_CFAR)) {
+		printf("cfar= ");
+		xmon_print_symbol(fp->orig_gpr3, " ", "\n");
+	}
 	printf("lr  = ");
 	xmon_print_symbol(fp->link, " ", "\n");
 	printf("msr = "REG"   cr  = %.8lx\n", fp->msr, fp->ccr);
-- 
cgit v1.2.3


From 104699c0ab473535793b5fea156adaf309afd29b Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 28 Apr 2011 05:07:23 +0000
Subject: powerpc: Convert old cpumask API into new one

Adapt new API.

Almost change is trivial. Most important change is the below line
because we plan to change task->cpus_allowed implementation.

-       ctx->cpus_allowed = current->cpus_allowed;

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputhreads.h        | 12 +++++------
 arch/powerpc/include/asm/kexec.h             |  2 +-
 arch/powerpc/kernel/crash.c                  | 32 ++++++++++++++--------------
 arch/powerpc/kernel/setup-common.c           |  4 ++--
 arch/powerpc/kernel/smp.c                    |  4 ++--
 arch/powerpc/kernel/traps.c                  |  2 +-
 arch/powerpc/mm/numa.c                       |  2 +-
 arch/powerpc/platforms/cell/beat_smp.c       |  2 +-
 arch/powerpc/platforms/cell/cbe_regs.c       | 11 +++++-----
 arch/powerpc/platforms/cell/smp.c            | 13 ++++++-----
 arch/powerpc/platforms/cell/spufs/sched.c    |  2 +-
 arch/powerpc/platforms/pseries/hotplug-cpu.c |  2 +-
 arch/powerpc/xmon/xmon.c                     | 16 +++++++-------
 13 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index f71bb4c118b4..ce516e5eb0d3 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -37,16 +37,16 @@ extern cpumask_t threads_core_mask;
  * This can typically be used for things like IPI for tlb invalidations
  * since those need to be done only once per core/TLB
  */
-static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads)
+static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
 {
 	cpumask_t	tmp, res;
 	int		i;
 
-	res = CPU_MASK_NONE;
+	cpumask_clear(&res);
 	for (i = 0; i < NR_CPUS; i += threads_per_core) {
-		cpus_shift_left(tmp, threads_core_mask, i);
-		if (cpus_intersects(threads, tmp))
-			cpu_set(i, res);
+		cpumask_shift_left(&tmp, &threads_core_mask, i);
+		if (cpumask_intersects(threads, &tmp))
+			cpumask_set_cpu(i, &res);
 	}
 	return res;
 }
@@ -58,7 +58,7 @@ static inline int cpu_nr_cores(void)
 
 static inline cpumask_t cpu_online_cores_map(void)
 {
-	return cpu_thread_mask_to_cores(cpu_online_map);
+	return cpu_thread_mask_to_cores(cpu_online_mask);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f54408d995b5..8a33698c61bd 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -76,7 +76,7 @@ extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
 extern cpumask_t cpus_in_sr;
 static inline int kexec_sr_activated(int cpu)
 {
-	return cpu_isset(cpu,cpus_in_sr);
+	return cpumask_test_cpu(cpu, &cpus_in_sr);
 }
 
 struct kimage;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 5b5e1f002a8e..ccc2198e6b23 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs)
 		return;
 
 	hard_irq_disable();
-	if (!cpu_isset(cpu, cpus_in_crash))
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
 		crash_save_cpu(regs, cpu);
-	cpu_set(cpu, cpus_in_crash);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
 
 	/*
 	 * Entered via soft-reset - could be the kdump
@@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs)
 	 * Tell the kexec CPU that entered via soft-reset and ready
 	 * to go down.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr)) {
-		cpu_clear(cpu, cpus_in_sr);
+	if (cpumask_test_cpu(cpu, &cpus_in_sr)) {
+		cpumask_clear_cpu(cpu, &cpus_in_sr);
 		atomic_inc(&enter_on_soft_reset);
 	}
 
@@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs)
 	 * This barrier is needed to make sure that all CPUs are stopped.
 	 * If not, soft-reset will be invoked to bring other CPUs.
 	 */
-	while (!cpu_isset(crashing_cpu, cpus_in_crash))
+	while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash))
 		cpu_relax();
 
 	if (ppc_md.kexec_cpu_down)
@@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu)
 {
 	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
 
-	cpu_clear(cpu, cpus_in_sr);
+	cpumask_clear_cpu(cpu, &cpus_in_sr);
 	while (atomic_read(&enter_on_soft_reset) != ncpus)
 		cpu_relax();
 }
@@ -132,7 +132,7 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 */
 	printk(KERN_EMERG "Sending IPI to other cpus...\n");
 	msecs = 10000;
-	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) {
 		cpu_relax();
 		mdelay(1);
 	}
@@ -144,20 +144,20 @@ static void crash_kexec_prepare_cpus(int cpu)
 	 * user to do soft reset such that we get all.
 	 * Soft-reset will be used until better mechanism is implemented.
 	 */
-	if (cpus_weight(cpus_in_crash) < ncpus) {
+	if (cpumask_weight(&cpus_in_crash) < ncpus) {
 		printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
-			ncpus - cpus_weight(cpus_in_crash));
+			ncpus - cpumask_weight(&cpus_in_crash));
 		printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
-		cpus_in_sr = CPU_MASK_NONE;
+		cpumask_clear(&cpus_in_sr);
 		atomic_set(&enter_on_soft_reset, 0);
-		while (cpus_weight(cpus_in_crash) < ncpus)
+		while (cpumask_weight(&cpus_in_crash) < ncpus)
 			cpu_relax();
 	}
 	/*
 	 * Make sure all CPUs are entered via soft-reset if the kdump is
 	 * invoked using soft-reset.
 	 */
-	if (cpu_isset(cpu, cpus_in_sr))
+	if (cpumask_test_cpu(cpu, &cpus_in_sr))
 		crash_soft_reset_check(cpu);
 	/* Leave the IPI callback set */
 }
@@ -210,7 +210,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 			 * exited using 'x'(exit and recover) or
 			 * kexec_should_crash() failed for all running tasks.
 			 */
-			cpu_clear(cpu, cpus_in_sr);
+			cpumask_clear_cpu(cpu, &cpus_in_sr);
 			local_irq_restore(flags);
 			return;
 		}
@@ -224,7 +224,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
 		 * then start kexec boot.
 		 */
 		crash_soft_reset_check(cpu);
-		cpu_set(crashing_cpu, cpus_in_crash);
+		cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 		if (ppc_md.kexec_cpu_down)
 			ppc_md.kexec_cpu_down(1, 0);
 		machine_kexec(kexec_crash_image);
@@ -253,7 +253,7 @@ static void crash_kexec_prepare_cpus(int cpu)
 
 void crash_kexec_secondary(struct pt_regs *regs)
 {
-	cpus_in_sr = CPU_MASK_NONE;
+	cpumask_clear(&cpus_in_sr);
 }
 #endif	/* CONFIG_SMP */
 
@@ -345,7 +345,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crashing_cpu = smp_processor_id();
 	crash_save_cpu(regs, crashing_cpu);
 	crash_kexec_prepare_cpus(crashing_cpu);
-	cpu_set(crashing_cpu, cpus_in_crash);
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
 	crash_kexec_wait_realmode(crashing_cpu);
 
 	machine_kexec_mask_interrupts();
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 21f30cb68077..1475df6e403f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
 	int i;
 
 	threads_per_core = tpc;
-	threads_core_mask = CPU_MASK_NONE;
+	cpumask_clear(&threads_core_mask);
 
 	/* This implementation only supports power of 2 number of threads
 	 * for simplicity and performance
@@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
 	BUG_ON(tpc != (1 << threads_shift));
 
 	for (i = 0; i < tpc; i++)
-		cpu_set(i, threads_core_mask);
+		cpumask_set_cpu(i, &threads_core_mask);
 
 	printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
 	       tpc, tpc > 1 ? "s" : "");
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index b6083f4f39b1..87517ab6d365 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -513,7 +513,7 @@ int cpu_first_thread_of_core(int core)
 }
 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
 
-/* Must be called when no change can occur to cpu_present_map,
+/* Must be called when no change can occur to cpu_present_mask,
  * i.e. during cpu online or offline.
  */
 static struct device_node *cpu_to_l2cache(int cpu)
@@ -614,7 +614,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	 * se we pin us down to CPU 0 for a short while
 	 */
 	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
-	cpumask_copy(old_mask, &current->cpus_allowed);
+	cpumask_copy(old_mask, tsk_cpus_allowed(current));
 	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 4a6a109b6816..06b9d457d0a7 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -221,7 +221,7 @@ void system_reset_exception(struct pt_regs *regs)
 	}
 
 #ifdef CONFIG_KEXEC
-	cpu_set(smp_processor_id(), cpus_in_sr);
+	cpumask_set_cpu(smp_processor_id(), &cpus_in_sr);
 #endif
 
 	die("System Reset", regs, SIGABRT);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index e49b799b59a3..2164006fe170 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1452,7 +1452,7 @@ int arch_update_cpu_topology(void)
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	struct sys_device *sysdev;
 
-	for_each_cpu_mask(cpu, cpu_associativity_changes_mask) {
+	for_each_cpu(cpu,&cpu_associativity_changes_mask) {
 		vphn_get_associativity(cpu, associativity);
 		nid = associativity_to_nid(associativity);
 
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
index 996df33165f1..3e86acbb0fb4 100644
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ b/arch/powerpc/platforms/cell/beat_smp.c
@@ -85,7 +85,7 @@ static void smp_beatic_message_pass(int target, int msg)
 
 static int __init smp_beatic_probe(void)
 {
-	return cpus_weight(cpu_possible_map);
+	return cpumask_weight(cpu_possible_mask);
 }
 
 static void __devinit smp_beatic_setup_cpu(int cpu)
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index dbc338f187a2..f3917e7a5b44 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -45,8 +45,8 @@ static struct cbe_thread_map
 	unsigned int cbe_id;
 } cbe_thread_map[NR_CPUS];
 
-static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = CPU_MASK_NONE };
-static cpumask_t cbe_first_online_cpu = CPU_MASK_NONE;
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
 
 static struct cbe_regs_map *cbe_find_map(struct device_node *np)
 {
@@ -159,7 +159,8 @@ EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
 
 u32 cbe_node_to_cpu(int node)
 {
-	return find_first_bit( (unsigned long *) &cbe_local_mask[node], sizeof(cpumask_t));
+	return cpumask_first(&cbe_local_mask[node]);
+
 }
 EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
 
@@ -268,9 +269,9 @@ void __init cbe_regs_init(void)
 				thread->regs = map;
 				thread->cbe_id = cbe_id;
 				map->be_node = thread->be_node;
-				cpu_set(i, cbe_local_mask[cbe_id]);
+				cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
 				if(thread->thread_id == 0)
-					cpu_set(i, cbe_first_online_cpu);
+					cpumask_set_cpu(i, &cbe_first_online_cpu);
 			}
 		}
 
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 03d638e2f44f..a2161b91b0bf 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -77,7 +77,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	unsigned int pcpu;
 	int start_cpu;
 
-	if (cpu_isset(lcpu, of_spin_map))
+	if (cpumask_test_cpu(lcpu, &of_spin_map))
 		/* Already started by OF and sitting in spin loop */
 		return 1;
 
@@ -123,7 +123,7 @@ static int __init smp_iic_probe(void)
 {
 	iic_request_IPIs();
 
-	return cpus_weight(cpu_possible_map);
+	return cpumask_weight(cpu_possible_mask);
 }
 
 static void __devinit smp_cell_setup_cpu(int cpu)
@@ -188,13 +188,12 @@ void __init smp_init_cell(void)
 	if (cpu_has_feature(CPU_FTR_SMT)) {
 		for_each_present_cpu(i) {
 			if (cpu_thread_in_core(i) == 0)
-				cpu_set(i, of_spin_map);
+				cpumask_set_cpu(i, &of_spin_map);
 		}
-	} else {
-		of_spin_map = cpu_present_map;
-	}
+	} else
+		cpumask_copy(&of_spin_map, cpu_present_mask);
 
-	cpu_clear(boot_cpuid, of_spin_map);
+	cpumask_clear_cpu(boot_cpuid, &of_spin_map);
 
 	/* Non-lpar has additional take/give timebase */
 	if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 65203857b0ce..32cb4e66d2cd 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -141,7 +141,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
 	 * runqueue. The context will be rescheduled on the proper node
 	 * if it is timesliced or preempted.
 	 */
-	ctx->cpus_allowed = current->cpus_allowed;
+	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
 
 	/* Save the current cpu id for spu interrupt routing. */
 	ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ae6c27df4dc4..46f13a3c5d09 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -281,7 +281,7 @@ static int pseries_add_processor(struct device_node *np)
 	}
 
 	for_each_cpu(cpu, tmp) {
-		BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask));
+		BUG_ON(cpu_present(cpu));
 		set_cpu_present(cpu, true);
 		set_hard_smp_processor_id(cpu, *intserv++);
 	}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 909804aaeebb..91309c5c00d7 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -334,7 +334,7 @@ static void release_output_lock(void)
 
 int cpus_are_in_xmon(void)
 {
-	return !cpus_empty(cpus_in_xmon);
+	return !cpumask_empty(&cpus_in_xmon);
 }
 #endif
 
@@ -373,7 +373,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
 #ifdef CONFIG_SMP
 	cpu = smp_processor_id();
-	if (cpu_isset(cpu, cpus_in_xmon)) {
+	if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		get_output_lock();
 		excprint(regs);
 		printf("cpu 0x%x: Exception %lx %s in xmon, "
@@ -396,7 +396,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	}
 
 	xmon_fault_jmp[cpu] = recurse_jmp;
-	cpu_set(cpu, cpus_in_xmon);
+	cpumask_set_cpu(cpu, &cpus_in_xmon);
 
 	bp = NULL;
 	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
@@ -440,7 +440,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 			smp_send_debugger_break(MSG_ALL_BUT_SELF);
 			/* wait for other cpus to come in */
 			for (timeout = 100000000; timeout != 0; --timeout) {
-				if (cpus_weight(cpus_in_xmon) >= ncpus)
+				if (cpumask_weight(&cpus_in_xmon) >= ncpus)
 					break;
 				barrier();
 			}
@@ -484,7 +484,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		}
 	}
  leave:
-	cpu_clear(cpu, cpus_in_xmon);
+	cpumask_clear_cpu(cpu, &cpus_in_xmon);
 	xmon_fault_jmp[cpu] = NULL;
 #else
 	/* UP is simple... */
@@ -630,7 +630,7 @@ static int xmon_iabr_match(struct pt_regs *regs)
 static int xmon_ipi(struct pt_regs *regs)
 {
 #ifdef CONFIG_SMP
-	if (in_xmon && !cpu_isset(smp_processor_id(), cpus_in_xmon))
+	if (in_xmon && !cpumask_test_cpu(smp_processor_id(), &cpus_in_xmon))
 		xmon_core(regs, 1);
 #endif
 	return 0;
@@ -976,7 +976,7 @@ static int cpu_cmd(void)
 		printf("cpus stopped:");
 		count = 0;
 		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
-			if (cpu_isset(cpu, cpus_in_xmon)) {
+			if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 				if (count == 0)
 					printf(" %x", cpu);
 				++count;
@@ -992,7 +992,7 @@ static int cpu_cmd(void)
 		return 0;
 	}
 	/* try to switch to cpu specified */
-	if (!cpu_isset(cpu, cpus_in_xmon)) {
+	if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) {
 		printf("cpu 0x%x isn't in xmon\n", cpu);
 		return 0;
 	}
-- 
cgit v1.2.3


From 7707e4110e5692fe85e7e6c471c9bb2a9254d313 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 24 Apr 2011 15:04:31 +0000
Subject: powerpc/kexec: Fix build failure on 32-bit SMP

Commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce left
crash_kexec_wait_realmode() undefined for UP.

Commit 7c7a81b53e581d727d069cc45df5510516faac31 defined it for UP but
left it undefined for 32-bit SMP.

Seems like people are getting confused by nested #ifdef's, so move the
definitions of crash_kexec_wait_realmode() after the #ifdef CONFIG_SMP
section.

Compile-tested with 32-bit UP, 32-bit SMP and 64-bit SMP configurations.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Tested-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 59 +++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index ccc2198e6b23..21f2c781ded1 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -162,34 +162,6 @@ static void crash_kexec_prepare_cpus(int cpu)
 	/* Leave the IPI callback set */
 }
 
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#ifdef CONFIG_PPC_STD_MMU_64
-static void crash_kexec_wait_realmode(int cpu)
-{
-	unsigned int msecs;
-	int i;
-
-	msecs = 10000;
-	for (i=0; i < NR_CPUS && msecs > 0; i++) {
-		if (i == cpu)
-			continue;
-
-		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
-			barrier();
-			if (!cpu_possible(i)) {
-				break;
-			}
-			if (!cpu_online(i)) {
-				break;
-			}
-			msecs--;
-			mdelay(1);
-		}
-	}
-	mb();
-}
-#endif	/* CONFIG_PPC_STD_MMU_64 */
-
 /*
  * This function will be called by secondary cpus or by kexec cpu
  * if soft-reset is activated to stop some CPUs.
@@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs)
 }
 
 #else	/* ! CONFIG_SMP */
-static inline void crash_kexec_wait_realmode(int cpu) {}
 
 static void crash_kexec_prepare_cpus(int cpu)
 {
@@ -257,6 +228,36 @@ void crash_kexec_secondary(struct pt_regs *regs)
 }
 #endif	/* CONFIG_SMP */
 
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+	unsigned int msecs;
+	int i;
+
+	msecs = 10000;
+	for (i=0; i < NR_CPUS && msecs > 0; i++) {
+		if (i == cpu)
+			continue;
+
+		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i)) {
+				break;
+			}
+			if (!cpu_online(i)) {
+				break;
+			}
+			msecs--;
+			mdelay(1);
+		}
+	}
+	mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif	/* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+
 /*
  * Register a function to be called on shutdown.  Only use this if you
  * can't reset your device in the second kernel.
-- 
cgit v1.2.3


From 9ee820fa005254dfc816330f6654f14dcb2beee1 Mon Sep 17 00:00:00 2001
From: Brian King <brking@linux.vnet.ibm.com>
Date: Wed, 4 May 2011 16:01:20 +1000
Subject: powerpc/pseries: Add page coalescing support

Adds support for page coalescing, which is a feature on IBM Power servers
which allows for coalescing identical pages between logical partitions.
Hint text pages as coalesce candidates, since they are the most likely
pages to be able to be coalesced between partitions. This patch also
exports some page coalescing statistics available from firmware via
lparcfg.

[BenH: Moved a couple of things around to fix compile problems]

Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/firmware.h         |  3 +-
 arch/powerpc/include/asm/hvcall.h           | 12 +++++++
 arch/powerpc/include/asm/pSeries_reconfig.h |  5 +++
 arch/powerpc/kernel/lparcfg.c               | 53 ++++++++++++++---------------
 arch/powerpc/kernel/prom_init.c             |  4 ++-
 arch/powerpc/kernel/rtas.c                  |  2 ++
 arch/powerpc/platforms/pseries/lpar.c       | 46 +++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/setup.c      | 11 ++++++
 8 files changed, 106 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 4ef662e4a31d..3a6c586c4e40 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -47,6 +47,7 @@
 #define FW_FEATURE_BEAT		ASM_CONST(0x0000000001000000)
 #define FW_FEATURE_CMO		ASM_CONST(0x0000000002000000)
 #define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
+#define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
 
 #ifndef __ASSEMBLY__
 
@@ -60,7 +61,7 @@ enum {
 		FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
 		FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
 		FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
-		FW_FEATURE_CMO | FW_FEATURE_VPHN,
+		FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
 	FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8edec710cc6d..852b8c1c09db 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -102,6 +102,7 @@
 #define H_ANDCOND		(1UL<<(63-33))
 #define H_ICACHE_INVALIDATE	(1UL<<(63-40))	/* icbi, etc.  (ignored for IO pages) */
 #define H_ICACHE_SYNCHRONIZE	(1UL<<(63-41))	/* dcbst, icbi, etc (ignored for IO pages */
+#define H_COALESCE_CAND	(1UL<<(63-42))	/* page is a good candidate for coalescing */
 #define H_ZERO_PAGE		(1UL<<(63-48))	/* zero the page before mapping (ignored for IO pages) */
 #define H_COPY_PAGE		(1UL<<(63-49))
 #define H_N			(1UL<<(63-61))
@@ -234,6 +235,7 @@
 #define H_GET_MPP		0x2D4
 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC
 #define H_BEST_ENERGY		0x2F4
+#define H_GET_MPP_X		0x314
 #define MAX_HCALL_OPCODE	H_BEST_ENERGY
 
 #ifndef __ASSEMBLY__
@@ -312,6 +314,16 @@ struct hvcall_mpp_data {
 
 int h_get_mpp(struct hvcall_mpp_data *);
 
+struct hvcall_mpp_x_data {
+	unsigned long coalesced_bytes;
+	unsigned long pool_coalesced_bytes;
+	unsigned long pool_purr_cycles;
+	unsigned long pool_spurr_cycles;
+	unsigned long reserved[3];
+};
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data);
+
 #ifdef CONFIG_PPC_PSERIES
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;
diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index d4b4bfa26fb3..89d2f99c1bf4 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -18,13 +18,18 @@
 extern int pSeries_reconfig_notifier_register(struct notifier_block *);
 extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
 extern struct blocking_notifier_head pSeries_reconfig_chain;
+/* Not the best place to put this, will be fixed when we move some
+ * of the rtas suspend-me stuff to pseries */
+extern void pSeries_coalesce_init(void);
 #else /* !CONFIG_PPC_PSERIES */
 static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
 	return 0;
 }
 static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
+static inline void pSeries_coalesce_init(void) { }
 #endif /* CONFIG_PPC_PSERIES */
 
+
 #endif /* __KERNEL__ */
 #endif /* _PPC64_PSERIES_RECONFIG_H */
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 301db65f05a1..84daabe2fcba 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
 /*
  * Methods used to fetch LPAR data when running on a pSeries platform.
  */
-/**
- * h_get_mpp
- * H_GET_MPP hcall returns info in 7 parms
- */
-int h_get_mpp(struct hvcall_mpp_data *mpp_data)
-{
-	int rc;
-	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
-
-	rc = plpar_hcall9(H_GET_MPP, retbuf);
-
-	mpp_data->entitled_mem = retbuf[0];
-	mpp_data->mapped_mem = retbuf[1];
-
-	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
-	mpp_data->pool_num = retbuf[2] & 0xffff;
-
-	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
-	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
-	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
-
-	mpp_data->pool_size = retbuf[4];
-	mpp_data->loan_request = retbuf[5];
-	mpp_data->backing_mem = retbuf[6];
-
-	return rc;
-}
-EXPORT_SYMBOL(h_get_mpp);
 
 struct hvcall_ppp_data {
 	u64	entitlement;
@@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m)
 	seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
 }
 
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (!firmware_has_feature(FW_FEATURE_XCMO))
+		return;
+	if (h_get_mpp_x(&mpp_x_data))
+		return;
+
+	seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+	if (mpp_x_data.pool_coalesced_bytes)
+		seq_printf(m, "pool_coalesced_bytes=%ld\n",
+			   mpp_x_data.pool_coalesced_bytes);
+	if (mpp_x_data.pool_purr_cycles)
+		seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+	if (mpp_x_data.pool_spurr_cycles)
+		seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
 #define SPLPAR_CHARACTERISTICS_TOKEN 20
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))
 
@@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 		parse_system_parameter_string(m);
 		parse_ppp_data(m);
 		parse_mpp_data(m);
+		parse_mpp_x_data(m);
 		pseries_cmo_data(m);
 		splpar_dispatch_data(m);
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 7839bd7bfd15..c016033ba78d 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -700,8 +700,10 @@ static void __init early_cmdline_parse(void)
 #endif /* CONFIG_PCI_MSI */
 #ifdef CONFIG_PPC_SMLPAR
 #define OV5_CMO			0x80	/* Cooperative Memory Overcommitment */
+#define OV5_XCMO			0x40	/* Page Coalescing */
 #else
 #define OV5_CMO			0x00
+#define OV5_XCMO			0x00
 #endif
 #define OV5_TYPE1_AFFINITY	0x80	/* Type 1 NUMA affinity */
 
@@ -756,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = {
 	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
 	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
 	0,
-	OV5_CMO,
+	OV5_CMO | OV5_XCMO,
 	OV5_TYPE1_AFFINITY,
 	0,
 	0,
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f48446635c89..271ff6318eda 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -42,6 +42,7 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
+#include <asm/pSeries_reconfig.h>
 
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED
@@ -731,6 +732,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w
 
 	atomic_set(&data->error, rc);
 	start_topology_update();
+	pSeries_coalesce_init();
 
 	if (wake_when_done) {
 		atomic_set(&data->done, 1);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6f0ed3aac77f..39e6e0a7b2fa 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	/* Make pHyp happy */
 	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
 		hpte_r &= ~_PAGE_COHERENT;
+	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+		flags |= H_COALESCE_CAND;
 
 	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
@@ -771,3 +773,47 @@ out:
 	local_irq_restore(flags);
 }
 #endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+	mpp_data->entitled_mem = retbuf[0];
+	mpp_data->mapped_mem = retbuf[1];
+
+	mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+	mpp_data->pool_num = retbuf[2] & 0xffff;
+
+	mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+	mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+	mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+	mpp_data->pool_size = retbuf[4];
+	mpp_data->loan_request = retbuf[5];
+	mpp_data->backing_mem = retbuf[6];
+
+	return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+	int rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+	rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+	mpp_x_data->coalesced_bytes = retbuf[0];
+	mpp_x_data->pool_coalesced_bytes = retbuf[1];
+	mpp_x_data->pool_purr_cycles = retbuf[2];
+	mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ab73ad2ff59d..1689adccc6d7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -405,6 +405,16 @@ static int pseries_set_xdabr(unsigned long dabr)
 #define CMO_CHARACTERISTICS_TOKEN 44
 #define CMO_MAXLENGTH 1026
 
+void pSeries_coalesce_init(void)
+{
+	struct hvcall_mpp_x_data mpp_x_data;
+
+	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+		powerpc_firmware_features |= FW_FEATURE_XCMO;
+	else
+		powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
 /**
  * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
  * handle that here. (Stolen from parse_system_parameter_string)
@@ -474,6 +484,7 @@ void pSeries_cmo_feature_init(void)
 		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 		         CMO_SecPSP);
 		powerpc_firmware_features |= FW_FEATURE_CMO;
+		pSeries_coalesce_init();
 	} else
 		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
 		         CMO_SecPSP);
-- 
cgit v1.2.3


From 308fc4f8e10b5239cde46104bb9fca79b46230c8 Mon Sep 17 00:00:00 2001
From: Richard A Lary <rlary@linux.vnet.ibm.com>
Date: Fri, 22 Apr 2011 09:59:47 +0000
Subject: powerpc/pseries/eeh: Propagate needs_freset flag to device at PE

  For multifunction adapters with a PCI bridge or switch as the device
  at the Partitionable Endpoint(PE), if one or more devices below PE
  sets dev->needs_freset, that value will be set for the PE device.

  In other words, if any device below PE requires a fundamental reset
  the PE will request a fundamental reset.

Signed-off-by: Richard A Lary <rlary@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/eeh.c | 50 +++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 229373053864..6cb3e7bd175d 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -451,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 }
 
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+	struct device_node *dn;
+
+	for_each_child_of_node(parent, dn) {
+		if (PCI_DN(dn)) {
+
+			struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+			if (dev && dev->driver)
+				*freset |= dev->needs_freset;
+
+			__eeh_set_pe_freset(dn, freset);
+		}
+	}
+}
+
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+	struct pci_dev *dev;
+	dn = find_device_pe(dn);
+
+	/* Back up one, since config addrs might be shared */
+	if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+		dn = dn->parent;
+
+	dev = PCI_DN(dn)->pcidev;
+	if (dev)
+		*freset |= dev->needs_freset;
+
+	__eeh_set_pe_freset(dn, freset);
+}
+
 /**
  * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
  * @dn device node
@@ -739,18 +772,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 /**
  * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
  * @pdn: pci device node to be reset.
- *
- *  Return 0 if success, else a non-zero value.
  */
 
 static void __rtas_set_slot_reset(struct pci_dn *pdn)
 {
-	struct pci_dev *dev = pdn->pcidev;
+	unsigned int freset = 0;
 
-	/* Determine type of EEH reset required by device,
-	 * default hot reset or fundamental reset
-	 */
-	if (dev && dev->needs_freset)
+	/* Determine type of EEH reset required for
+	 * Partitionable Endpoint, a hot-reset (1)
+	 * or a fundamental reset (3).
+	 * A fundamental reset required by any device under
+	 * Partitionable Endpoint trumps hot-reset.
+  	 */
+	eeh_set_pe_freset(pdn->node, &freset);
+
+	if (freset)
 		rtas_pci_slot_reset(pdn, 3);
 	else
 		rtas_pci_slot_reset(pdn, 1);
-- 
cgit v1.2.3


From ecb7390211fe9cb14ff0bae116a3f4f1149c0b6c Mon Sep 17 00:00:00 2001
From: Richard A Lary <rlary@linux.vnet.ibm.com>
Date: Fri, 22 Apr 2011 10:00:05 +0000
Subject: powerpc/pseries/eeh: Handle functional reset on non-PCIe device

  Fundamental reset is an optional reset type supported only by PCIe adapters.
  Handle the unexpected case where a non-PCIe device has requested a
  fundamental reset. Try hot-reset as a fallback to handle this case.

Signed-off-by: Richard A Lary <rlary@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/eeh.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 6cb3e7bd175d..46b55cf563e3 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -728,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
 	if (pdn->eeh_pe_config_addr)
 		config_addr = pdn->eeh_pe_config_addr;
 
-	rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+	rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
 	               config_addr,
 	               BUID_HI(pdn->phb->buid),
 	               BUID_LO(pdn->phb->buid),
 	               state);
-	if (rc)
-		printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
-		        " (%d) #RST=%d dn=%s\n",
-		        rc, state, pdn->node->full_name);
+
+	/* Fundamental-reset not supported on this PE, try hot-reset */
+	if (rc == -8 && state == 3) {
+		rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			       config_addr,
+			       BUID_HI(pdn->phb->buid),
+			       BUID_LO(pdn->phb->buid), 1);
+		if (rc)
+			printk(KERN_WARNING
+				"EEH: Unable to reset the failed slot,"
+				" #RST=%d dn=%s\n",
+				rc, pdn->node->full_name);
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 904cc1e637a00dba1b58e7752f485f90ebf2a568 Mon Sep 17 00:00:00 2001
From: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Date: Tue, 26 Apr 2011 17:05:18 +0000
Subject: [CPUFREQ] Fix _OSC UUID in pcc-cpufreq

UUID needs to be written out the way it is described in
Sec 18.5.124 of ACPI 4.0a Specification.

Platform firmware's use of this UUID/_OSC is optional, which is
why we didn't notice this bug earlier.

Signed-off-by: Naga Chumbalkar <nagananda.chumbalkar@hp.com>
Signed-off-by: Dave Jones <davej@redhat.com>
Cc: stable@kernel.org
---
 arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 755a31e0f5b0..907c8e637ef5 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -39,7 +39,7 @@
 
 #include <acpi/processor.h>
 
-#define PCC_VERSION 	"1.00.00"
+#define PCC_VERSION	"1.10.00"
 #define POLL_LOOPS 	300
 
 #define CMD_COMPLETE 	0x1
@@ -102,7 +102,7 @@ static struct acpi_generic_address doorbell;
 static u64 doorbell_preserve;
 static u64 doorbell_write;
 
-static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
+static u8 OSC_UUID[16] = {0x9F, 0x2C, 0x9B, 0x63, 0x91, 0x70, 0x1f, 0x49,
 			  0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
 
 struct pcc_cpu {
-- 
cgit v1.2.3


From 2d06d8c49afdcc9bb35a85039fa50f0fe35bd40e Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 27 Mar 2011 15:04:46 +0200
Subject: [CPUFREQ] use dynamic debug instead of custom infrastructure

With dynamic debug having gained the capability to report debug messages
also during the boot process, it offers a far superior interface for
debug messages than the custom cpufreq infrastructure. As a first step,
remove the old cpufreq_debug_printk() function and replace it with a call
to the generic pr_debug() function.

How can dynamic debug be used on cpufreq? You need a kernel which has
CONFIG_DYNAMIC_DEBUG enabled.

To enabled debugging during runtime, mount debugfs and

$ echo -n 'module cpufreq +p' > /sys/kernel/debug/dynamic_debug/control

for debugging the complete "cpufreq" module. To achieve the same goal during
boot, append

	ddebug_query="module cpufreq +p"

as a boot parameter to the kernel of your choice.

For more detailled instructions, please see
Documentation/dynamic-debug-howto.txt

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/arm/mach-davinci/cpufreq.c                  |   4 +-
 arch/blackfin/mach-common/dpmc.c                 |   3 -
 arch/ia64/kernel/cpufreq/acpi-cpufreq.c          |  44 +++---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       |  45 +++---
 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c    |   6 +-
 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c         |  21 ++-
 arch/x86/kernel/cpu/cpufreq/longhaul.c           |  11 +-
 arch/x86/kernel/cpu/cpufreq/longrun.c            |  17 +--
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c        |  10 +-
 arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c        |  47 +++---
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c        |  33 ++---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        | 100 ++++++-------
 arch/x86/kernel/cpu/cpufreq/powernow-k8.h        |   2 -
 arch/x86/kernel/cpu/cpufreq/sc520_freq.c         |   6 +-
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c |  23 ++-
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      |  28 ++--
 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c      |  43 +++---
 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c      |  41 +++---
 drivers/acpi/processor_perflib.c                 |   6 +-
 drivers/cpufreq/Kconfig                          |  13 --
 drivers/cpufreq/cpufreq.c                        | 180 +++++------------------
 drivers/cpufreq/cpufreq_performance.c            |   5 +-
 drivers/cpufreq/cpufreq_powersave.c              |   5 +-
 drivers/cpufreq/cpufreq_userspace.c              |  13 +-
 drivers/cpufreq/freq_table.c                     |  19 +--
 include/linux/cpufreq.h                          |  19 ---
 26 files changed, 270 insertions(+), 474 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 0a95be1512bb..41669ecc1f91 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return ret;
 
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,
-			dev_driver_string(cpufreq.dev),
-			"transition: %u --> %u\n", freqs.old, freqs.new);
+	dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new);
 
 	ret = cpufreq_frequency_table_target(policy, pdata->freq_table,
 						freqs.new, relation, &idx);
diff --git a/arch/blackfin/mach-common/dpmc.c b/arch/blackfin/mach-common/dpmc.c
index 382099fd5561..5e4112e518a9 100644
--- a/arch/blackfin/mach-common/dpmc.c
+++ b/arch/blackfin/mach-common/dpmc.c
@@ -19,9 +19,6 @@
 
 #define DRIVER_NAME "bfin dpmc"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, DRIVER_NAME, msg)
-
 struct bfin_dpmc_platform_data *pdata;
 
 /**
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
index 22f61526a8e1..f09b174244d5 100644
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
@@ -23,8 +23,6 @@
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Venkatesh Pallipadi");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -47,12 +45,12 @@ processor_set_pstate (
 {
 	s64 retval;
 
-	dprintk("processor_set_pstate\n");
+	pr_debug("processor_set_pstate\n");
 
 	retval = ia64_pal_set_pstate((u64)value);
 
 	if (retval) {
-		dprintk("Failed to set freq to 0x%x, with error 0x%lx\n",
+		pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
 		        value, retval);
 		return -ENODEV;
 	}
@@ -67,14 +65,14 @@ processor_get_pstate (
 	u64	pstate_index = 0;
 	s64 	retval;
 
-	dprintk("processor_get_pstate\n");
+	pr_debug("processor_get_pstate\n");
 
 	retval = ia64_pal_get_pstate(&pstate_index,
 	                             PAL_GET_PSTATE_TYPE_INSTANT);
 	*value = (u32) pstate_index;
 
 	if (retval)
-		dprintk("Failed to get current freq with "
+		pr_debug("Failed to get current freq with "
 			"error 0x%lx, idx 0x%x\n", retval, *value);
 
 	return (int)retval;
@@ -90,7 +88,7 @@ extract_clock (
 {
 	unsigned long i;
 
-	dprintk("extract_clock\n");
+	pr_debug("extract_clock\n");
 
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		if (value == data->acpi_data.states[i].status)
@@ -110,7 +108,7 @@ processor_get_freq (
 	cpumask_t		saved_mask;
 	unsigned long 		clock_freq;
 
-	dprintk("processor_get_freq\n");
+	pr_debug("processor_get_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -148,7 +146,7 @@ processor_set_freq (
 	cpumask_t		saved_mask;
 	int			retval;
 
-	dprintk("processor_set_freq\n");
+	pr_debug("processor_set_freq\n");
 
 	saved_mask = current->cpus_allowed;
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
@@ -159,16 +157,16 @@ processor_set_freq (
 
 	if (state == data->acpi_data.state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n", state);
+			pr_debug("Called after resume, resetting to P%d\n", state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n", state);
+			pr_debug("Already at target state (P%d)\n", state);
 			retval = 0;
 			goto migrate_end;
 		}
 	}
 
-	dprintk("Transitioning from P%d to P%d\n",
+	pr_debug("Transitioning from P%d to P%d\n",
 		data->acpi_data.state, state);
 
 	/* cpufreq frequency struct */
@@ -186,7 +184,7 @@ processor_set_freq (
 
 	value = (u32) data->acpi_data.states[state].control;
 
-	dprintk("Transitioning to state: 0x%08x\n", value);
+	pr_debug("Transitioning to state: 0x%08x\n", value);
 
 	ret = processor_set_pstate(value);
 	if (ret) {
@@ -219,7 +217,7 @@ acpi_cpufreq_get (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[cpu];
 
-	dprintk("acpi_cpufreq_get\n");
+	pr_debug("acpi_cpufreq_get\n");
 
 	return processor_get_freq(data, cpu);
 }
@@ -235,7 +233,7 @@ acpi_cpufreq_target (
 	unsigned int next_state = 0;
 	unsigned int result = 0;
 
-	dprintk("acpi_cpufreq_setpolicy\n");
+	pr_debug("acpi_cpufreq_setpolicy\n");
 
 	result = cpufreq_frequency_table_target(policy,
 			data->freq_table, target_freq, relation, &next_state);
@@ -255,7 +253,7 @@ acpi_cpufreq_verify (
 	unsigned int result = 0;
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	result = cpufreq_frequency_table_verify(policy,
 			data->freq_table);
@@ -273,7 +271,7 @@ acpi_cpufreq_cpu_init (
 	struct cpufreq_acpi_io	*data;
 	unsigned int		result = 0;
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 	data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
 	if (!data)
@@ -288,7 +286,7 @@ acpi_cpufreq_cpu_init (
 
 	/* capability check */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -297,7 +295,7 @@ acpi_cpufreq_cpu_init (
 					ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (data->acpi_data.status_register.space_id !=
 					ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Unsupported address space [%d, %d]\n",
+		pr_debug("Unsupported address space [%d, %d]\n",
 			(u32) (data->acpi_data.control_register.space_id),
 			(u32) (data->acpi_data.status_register.space_id));
 		result = -ENODEV;
@@ -348,7 +346,7 @@ acpi_cpufreq_cpu_init (
 	       "activated.\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
 			(i == data->acpi_data.state?'*':' '), i,
 			(u32) data->acpi_data.states[i].core_frequency,
 			(u32) data->acpi_data.states[i].power,
@@ -383,7 +381,7 @@ acpi_cpufreq_cpu_exit (
 {
 	struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -418,7 +416,7 @@ static struct cpufreq_driver acpi_cpufreq_driver = {
 static int __init
 acpi_cpufreq_init (void)
 {
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
  	return cpufreq_register_driver(&acpi_cpufreq_driver);
 }
@@ -427,7 +425,7 @@ acpi_cpufreq_init (void)
 static void __exit
 acpi_cpufreq_exit (void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 	return;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index a2baafb2fe6d..4e04e1274388 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -47,9 +47,6 @@
 #include <asm/cpufeature.h>
 #include "mperf.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"acpi-cpufreq", msg)
-
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
@@ -233,7 +230,7 @@ static u32 get_cur_val(const struct cpumask *mask)
 	cmd.mask = mask;
 	drv_read(&cmd);
 
-	dprintk("get_cur_val = %u\n", cmd.val);
+	pr_debug("get_cur_val = %u\n", cmd.val);
 
 	return cmd.val;
 }
@@ -244,7 +241,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	unsigned int freq;
 	unsigned int cached_freq;
 
-	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
+	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
 
 	if (unlikely(data == NULL ||
 		     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -261,7 +258,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		data->resume = 1;
 	}
 
-	dprintk("cur freq = %u\n", freq);
+	pr_debug("cur freq = %u\n", freq);
 
 	return freq;
 }
@@ -293,7 +290,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int i;
 	int result = 0;
 
-	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
 	if (unlikely(data == NULL ||
 	     data->acpi_data == NULL || data->freq_table == NULL)) {
@@ -313,11 +310,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	next_perf_state = data->freq_table[next_state].index;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
-			dprintk("Called after resume, resetting to P%d\n",
+			pr_debug("Called after resume, resetting to P%d\n",
 				next_perf_state);
 			data->resume = 0;
 		} else {
-			dprintk("Already at target state (P%d)\n",
+			pr_debug("Already at target state (P%d)\n",
 				next_perf_state);
 			goto out;
 		}
@@ -357,7 +354,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 
 	if (acpi_pstate_strict) {
 		if (!check_freqs(cmd.mask, freqs.new, data)) {
-			dprintk("acpi_cpufreq_target failed (%d)\n",
+			pr_debug("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
 			result = -EAGAIN;
 			goto out;
@@ -378,7 +375,7 @@ static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_verify\n");
+	pr_debug("acpi_cpufreq_verify\n");
 
 	return cpufreq_frequency_table_verify(policy, data->freq_table);
 }
@@ -433,11 +430,11 @@ static void free_acpi_perf_data(void)
 static int __init acpi_cpufreq_early_init(void)
 {
 	unsigned int i;
-	dprintk("acpi_cpufreq_early_init\n");
+	pr_debug("acpi_cpufreq_early_init\n");
 
 	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
 	if (!acpi_perf_data) {
-		dprintk("Memory allocation error for acpi_perf_data.\n");
+		pr_debug("Memory allocation error for acpi_perf_data.\n");
 		return -ENOMEM;
 	}
 	for_each_possible_cpu(i) {
@@ -519,7 +516,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	static int blacklisted;
 #endif
 
-	dprintk("acpi_cpufreq_cpu_init\n");
+	pr_debug("acpi_cpufreq_cpu_init\n");
 
 #ifdef CONFIG_SMP
 	if (blacklisted)
@@ -566,7 +563,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	/* capability check */
 	if (perf->state_count <= 1) {
-		dprintk("No P-States\n");
+		pr_debug("No P-States\n");
 		result = -ENODEV;
 		goto err_unreg;
 	}
@@ -578,11 +575,11 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	switch (perf->control_register.space_id) {
 	case ACPI_ADR_SPACE_SYSTEM_IO:
-		dprintk("SYSTEM IO addr space\n");
+		pr_debug("SYSTEM IO addr space\n");
 		data->cpu_feature = SYSTEM_IO_CAPABLE;
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		dprintk("HARDWARE addr space\n");
+		pr_debug("HARDWARE addr space\n");
 		if (!check_est_cpu(cpu)) {
 			result = -ENODEV;
 			goto err_unreg;
@@ -590,7 +587,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
 		break;
 	default:
-		dprintk("Unknown addr space %d\n",
+		pr_debug("Unknown addr space %d\n",
 			(u32) (perf->control_register.space_id));
 		result = -ENODEV;
 		goto err_unreg;
@@ -661,9 +658,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (cpu_has(c, X86_FEATURE_APERFMPERF))
 		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
 
-	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
+	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
 	for (i = 0; i < perf->state_count; i++)
-		dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
 			(i == perf->state ? '*' : ' '), i,
 			(u32) perf->states[i].core_frequency,
 			(u32) perf->states[i].power,
@@ -694,7 +691,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_cpu_exit\n");
+	pr_debug("acpi_cpufreq_cpu_exit\n");
 
 	if (data) {
 		cpufreq_frequency_table_put_attr(policy->cpu);
@@ -712,7 +709,7 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
 {
 	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
 
-	dprintk("acpi_cpufreq_resume\n");
+	pr_debug("acpi_cpufreq_resume\n");
 
 	data->resume = 1;
 
@@ -743,7 +740,7 @@ static int __init acpi_cpufreq_init(void)
 	if (acpi_disabled)
 		return 0;
 
-	dprintk("acpi_cpufreq_init\n");
+	pr_debug("acpi_cpufreq_init\n");
 
 	ret = acpi_cpufreq_early_init();
 	if (ret)
@@ -758,7 +755,7 @@ static int __init acpi_cpufreq_init(void)
 
 static void __exit acpi_cpufreq_exit(void)
 {
-	dprintk("acpi_cpufreq_exit\n");
+	pr_debug("acpi_cpufreq_exit\n");
 
 	cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 141abebc4516..7bac808804f3 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -57,8 +57,6 @@ MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
 #define PFX "cpufreq-nforce2: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"cpufreq-nforce2", msg)
 
 /**
  * nforce2_calc_fsb - calculate FSB
@@ -270,7 +268,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 	if (freqs.old == freqs.new)
 		return 0;
 
-	dprintk("Old CPU frequency %d kHz, new %d kHz\n",
+	pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
 	       freqs.old, freqs.new);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
@@ -282,7 +280,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
 		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
 			target_fsb);
 	else
-		dprintk("Changed FSB successfully to %d\n",
+		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
 
 	/* Enable IRQs */
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 32974cf84232..ffe1f2c92ed3 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -142,9 +142,6 @@ module_param(max_duration, int, 0444);
 #define POLICY_MIN_DIV 20
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"gx-suspmod", msg)
-
 /**
  * we can detect a core multipiler from dir0_lsb
  * from GX1 datasheet p.56,
@@ -191,7 +188,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 	/* check if CPU is a MediaGX or a Geode. */
 	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
 	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
-		dprintk("error: no MediaGX/Geode processor found!\n");
+		pr_debug("error: no MediaGX/Geode processor found!\n");
 		return NULL;
 	}
 
@@ -201,7 +198,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
 			return gx_pci;
 	}
 
-	dprintk("error: no supported chipset found!\n");
+	pr_debug("error: no supported chipset found!\n");
 	return NULL;
 }
 
@@ -305,14 +302,14 @@ static void gx_set_cpuspeed(unsigned int khz)
 			break;
 		default:
 			local_irq_restore(flags);
-			dprintk("fatal: try to set unknown chipset.\n");
+			pr_debug("fatal: try to set unknown chipset.\n");
 			return;
 		}
 	} else {
 		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
 		gx_params->off_duration = 0;
 		gx_params->on_duration = 0;
-		dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
+		pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
 	}
 
 	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
@@ -327,9 +324,9 @@ static void gx_set_cpuspeed(unsigned int khz)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 
-	dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+	pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
 		gx_params->on_duration * 32, gx_params->off_duration * 32);
-	dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+	pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
 }
 
 /****************************************************************
@@ -428,8 +425,8 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
 	stock_freq = maxfreq;
 	curfreq = gx_get_cpuspeed(0);
 
-	dprintk("cpu max frequency is %d.\n", maxfreq);
-	dprintk("cpu current frequency is %dkHz.\n", curfreq);
+	pr_debug("cpu max frequency is %d.\n", maxfreq);
+	pr_debug("cpu current frequency is %dkHz.\n", curfreq);
 
 	/* setup basic struct for cpufreq API */
 	policy->cpu = 0;
@@ -475,7 +472,7 @@ static int __init cpufreq_gx_init(void)
 	if (max_duration > 0xff)
 		max_duration = 0xff;
 
-	dprintk("geode suspend modulation available.\n");
+	pr_debug("geode suspend modulation available.\n");
 
 	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
 	if (params == NULL)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cf48cdd6907d..f47d26e2a135 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -77,9 +77,6 @@ static int scale_voltage;
 static int disable_acpi_c3;
 static int revid_errata;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longhaul", msg)
-
 
 /* Clock ratios multiplied by 10 */
 static int mults[32];
@@ -87,7 +84,6 @@ static int eblcr[32];
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 static char speedbuffer[8];
 
 static char *print_speed(int speed)
@@ -106,7 +102,6 @@ static char *print_speed(int speed)
 
 	return speedbuffer;
 }
-#endif
 
 
 static unsigned int calc_speed(int mult)
@@ -275,7 +270,7 @@ static void longhaul_setstate(unsigned int table_index)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+	pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
 			fsb, mult/10, mult%10, print_speed(speed/1000));
 retry_loop:
 	preempt_disable();
@@ -460,12 +455,12 @@ static int __cpuinit longhaul_get_ranges(void)
 		break;
 	}
 
-	dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
+	pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
 		 minmult/10, minmult%10, maxmult/10, maxmult%10);
 
 	highest_speed = calc_speed(maxmult);
 	lowest_speed = calc_speed(minmult);
-	dprintk("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+	pr_debug("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
 		 print_speed(lowest_speed/1000),
 		 print_speed(highest_speed/1000));
 
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index d9f51367666b..34ea359b370e 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -15,9 +15,6 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"longrun", msg)
-
 static struct cpufreq_driver	longrun_driver;
 
 /**
@@ -40,14 +37,14 @@ static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
 	u32 msr_lo, msr_hi;
 
 	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
 	if (msr_lo & 0x01)
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 	else
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
 
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
 	msr_lo &= 0x0000007F;
 	msr_hi &= 0x0000007F;
 
@@ -150,7 +147,7 @@ static unsigned int longrun_get(unsigned int cpu)
 		return 0;
 
 	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	dprintk("cpuid eax is %u\n", eax);
+	pr_debug("cpuid eax is %u\n", eax);
 
 	return eax * 1000;
 }
@@ -196,7 +193,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
 		*high_freq = msr_lo * 1000; /* to kHz */
 
-		dprintk("longrun table interface told %u - %u kHz\n",
+		pr_debug("longrun table interface told %u - %u kHz\n",
 				*low_freq, *high_freq);
 
 		if (*low_freq > *high_freq)
@@ -207,7 +204,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	/* set the upper border to the value determined during TSC init */
 	*high_freq = (cpu_khz / 1000);
 	*high_freq = *high_freq * 1000;
-	dprintk("high frequency is %u kHz\n", *high_freq);
+	pr_debug("high frequency is %u kHz\n", *high_freq);
 
 	/* get current borders */
 	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
@@ -233,7 +230,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 		/* restore values */
 		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
 	}
-	dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+	pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
 
 	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
 	 * eqals
@@ -249,7 +246,7 @@ static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
 	edx = ((eax - ebx) * 100) / (100 - ecx);
 	*low_freq = edx * 1000; /* back to kHz */
 
-	dprintk("low frequency is %u kHz\n", *low_freq);
+	pr_debug("low frequency is %u kHz\n", *low_freq);
 
 	if (*low_freq > *high_freq)
 		*low_freq = *high_freq;
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 52c93648e492..6be3e0760c26 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -35,8 +35,6 @@
 #include "speedstep-lib.h"
 
 #define PFX	"p4-clockmod: "
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"p4-clockmod", msg)
 
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -66,7 +64,7 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
 
 	if (l & 0x01)
-		dprintk("CPU#%d currently thermal throttled\n", cpu);
+		pr_debug("CPU#%d currently thermal throttled\n", cpu);
 
 	if (has_N44_O17_errata[cpu] &&
 	    (newstate == DC_25PT || newstate == DC_DFLT))
@@ -74,10 +72,10 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
 
 	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
 	if (newstate == DC_DISABLE) {
-		dprintk("CPU#%d disabling modulation\n", cpu);
+		pr_debug("CPU#%d disabling modulation\n", cpu);
 		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
 	} else {
-		dprintk("CPU#%d setting duty cycle to %d%%\n",
+		pr_debug("CPU#%d setting duty cycle to %d%%\n",
 			cpu, ((125 * newstate) / 10));
 		/* bits 63 - 5	: reserved
 		 * bit  4	: enable/disable
@@ -217,7 +215,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	case 0x0f11:
 	case 0x0f12:
 		has_N44_O17_errata[policy->cpu] = 1;
-		dprintk("has errata -- disabling low frequencies\n");
+		pr_debug("has errata -- disabling low frequencies\n");
 	}
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 907c8e637ef5..7b0603eb0129 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -48,9 +48,6 @@
 
 #define BUF_SZ		4
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER,	\
-					     "pcc-cpufreq", msg)
-
 struct pcc_register_resource {
 	u8 descriptor;
 	u16 length;
@@ -152,7 +149,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	spin_lock(&pcc_lock);
 
-	dprintk("get: get_freq for CPU %d\n", cpu);
+	pr_debug("get: get_freq for CPU %d\n", cpu);
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
 	input_buffer = 0x1;
@@ -170,7 +167,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("get: FAILED: for CPU %d, status is %d\n",
+		pr_debug("get: FAILED: for CPU %d, status is %d\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
@@ -178,14 +175,14 @@ static unsigned int pcc_get_freq(unsigned int cpu)
 	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
 			/ 100) * 1000);
 
-	dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
-		"0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
+	pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
+		"0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
 		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
 		output_buffer, curr_freq);
 
 	freq_limit = (output_buffer >> 8) & 0xff;
 	if (freq_limit != 0xff) {
-		dprintk("get: frequency for cpu %d is being temporarily"
+		pr_debug("get: frequency for cpu %d is being temporarily"
 			" capped at %d\n", cpu, curr_freq);
 	}
 
@@ -212,8 +209,8 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 	cpu = policy->cpu;
 	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
 
-	dprintk("target: CPU %d should go to target freq: %d "
-		"(virtual) input_offset is 0x%x\n",
+	pr_debug("target: CPU %d should go to target freq: %d "
+		"(virtual) input_offset is 0x%p\n",
 		cpu, target_freq,
 		(pcch_virt_addr + pcc_cpu_data->input_offset));
 
@@ -234,14 +231,14 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
 
 	status = ioread16(&pcch_hdr->status);
 	if (status != CMD_COMPLETE) {
-		dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
+		pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
 			cpu, status);
 		goto cmd_incomplete;
 	}
 	iowrite16(0, &pcch_hdr->status);
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
+	pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
 	spin_unlock(&pcc_lock);
 
 	return 0;
@@ -293,7 +290,7 @@ static int pcc_get_offset(int cpu)
 	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
 	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
 
-	dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
+	pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
 		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
 		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
 out_free:
@@ -410,7 +407,7 @@ static int __init pcc_cpufreq_probe(void)
 	if (ACPI_SUCCESS(status)) {
 		ret = pcc_cpufreq_do_osc(&osc_handle);
 		if (ret)
-			dprintk("probe: _OSC evaluation did not succeed\n");
+			pr_debug("probe: _OSC evaluation did not succeed\n");
 		/* Firmware's use of _OSC is optional */
 		ret = 0;
 	}
@@ -433,7 +430,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
 
-	dprintk("probe: mem_resource descriptor: 0x%x,"
+	pr_debug("probe: mem_resource descriptor: 0x%x,"
 		" length: %d, space_id: %d, resource_usage: %d,"
 		" type_specific: %d, granularity: 0x%llx,"
 		" minimum: 0x%llx, maximum: 0x%llx,"
@@ -453,13 +450,13 @@ static int __init pcc_cpufreq_probe(void)
 	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
 					mem_resource->address_length);
 	if (pcch_virt_addr == NULL) {
-		dprintk("probe: could not map shared mem region\n");
+		pr_debug("probe: could not map shared mem region\n");
 		goto out_free;
 	}
 	pcch_hdr = pcch_virt_addr;
 
-	dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
-	dprintk("probe: PCCH header is at physical address: 0x%llx,"
+	pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+	pr_debug("probe: PCCH header is at physical address: 0x%llx,"
 		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
 		" supported features: 0x%x, command field: 0x%x,"
 		" status field: 0x%x, nominal latency: %d us\n",
@@ -469,7 +466,7 @@ static int __init pcc_cpufreq_probe(void)
 		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
 		ioread32(&pcch_hdr->latency));
 
-	dprintk("probe: min time between commands: %d us,"
+	pr_debug("probe: min time between commands: %d us,"
 		" max time between commands: %d us,"
 		" nominal CPU frequency: %d MHz,"
 		" minimum CPU frequency: %d MHz,"
@@ -494,7 +491,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.access_width = 64;
 	doorbell.address = reg_resource->address;
 
-	dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
+	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
 		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
 		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
 		doorbell.access_width, reg_resource->address);
@@ -515,7 +512,7 @@ static int __init pcc_cpufreq_probe(void)
 
 	doorbell_write = member->integer.value;
 
-	dprintk("probe: doorbell_preserve: 0x%llx,"
+	pr_debug("probe: doorbell_preserve: 0x%llx,"
 		" doorbell_write: 0x%llx\n",
 		doorbell_preserve, doorbell_write);
 
@@ -550,7 +547,7 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	result = pcc_get_offset(cpu);
 	if (result) {
-		dprintk("init: PCCP evaluation failed\n");
+		pr_debug("init: PCCP evaluation failed\n");
 		goto out;
 	}
 
@@ -561,12 +558,12 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = pcc_get_freq(cpu);
 
 	if (!policy->cur) {
-		dprintk("init: Unable to get current CPU frequency\n");
+		pr_debug("init: Unable to get current CPU frequency\n");
 		result = -EINVAL;
 		goto out;
 	}
 
-	dprintk("init: policy->max is %d, policy->min is %d\n",
+	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 out:
 	return result;
@@ -597,7 +594,7 @@ static int __init pcc_cpufreq_init(void)
 
 	ret = pcc_cpufreq_probe();
 	if (ret) {
-		dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
+		pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
 		return ret;
 	}
 
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 4a45fd6e41ba..d71d9f372359 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -68,7 +68,6 @@ union powernow_acpi_control_t {
 };
 #endif
 
-#ifdef CONFIG_CPU_FREQ_DEBUG
 /* divide by 1000 to get VCore voltage in V. */
 static const int mobile_vid_table[32] = {
     2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
@@ -76,7 +75,6 @@ static const int mobile_vid_table[32] = {
     1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
     1075, 1050, 1025, 1000, 975, 950, 925, 0,
 };
-#endif
 
 /* divide by 10 to get FID. */
 static const int fid_codes[32] = {
@@ -103,9 +101,6 @@ static unsigned int fsb;
 static unsigned int latency;
 static char have_a0;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"powernow-k7", msg)
-
 static int check_fsb(unsigned int fsbspeed)
 {
 	int delta;
@@ -209,7 +204,7 @@ static int get_ranges(unsigned char *pst)
 		vid = *pst++;
 		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed/1000, vid,
 			 mobile_vid_table[vid]/1000,
@@ -367,7 +362,7 @@ static int powernow_acpi_init(void)
 		unsigned int speed, speed_mhz;
 
 		pc.val = (unsigned long) state->control;
-		dprintk("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+		pr_debug("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
 			 i,
 			 (u32) state->core_frequency,
 			 (u32) state->power,
@@ -401,7 +396,7 @@ static int powernow_acpi_init(void)
 				invalidate_entry(i);
 		}
 
-		dprintk("   FID: 0x%x (%d.%dx [%dMHz])  "
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
 			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
 			 fid_codes[fid] % 10, speed_mhz, vid,
 			 mobile_vid_table[vid]/1000,
@@ -409,7 +404,7 @@ static int powernow_acpi_init(void)
 
 		if (state->core_frequency != speed_mhz) {
 			state->core_frequency = speed_mhz;
-			dprintk("   Corrected ACPI frequency to %d\n",
+			pr_debug("   Corrected ACPI frequency to %d\n",
 				speed_mhz);
 		}
 
@@ -453,8 +448,8 @@ static int powernow_acpi_init(void)
 
 static void print_pst_entry(struct pst_s *pst, unsigned int j)
 {
-	dprintk("PST:%d (@%p)\n", j, pst);
-	dprintk(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+	pr_debug("PST:%d (@%p)\n", j, pst);
+	pr_debug(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
 		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
 }
 
@@ -474,20 +469,20 @@ static int powernow_decode_bios(int maxfid, int startvid)
 		p = phys_to_virt(i);
 
 		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
-			dprintk("Found PSB header at %p\n", p);
+			pr_debug("Found PSB header at %p\n", p);
 			psb = (struct psb_s *) p;
-			dprintk("Table version: 0x%x\n", psb->tableversion);
+			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
 				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
 						" supported right now\n");
 				return -ENODEV;
 			}
 
-			dprintk("Flags: 0x%x\n", psb->flags);
+			pr_debug("Flags: 0x%x\n", psb->flags);
 			if ((psb->flags & 1) == 0)
-				dprintk("Mobile voltage regulator\n");
+				pr_debug("Mobile voltage regulator\n");
 			else
-				dprintk("Desktop voltage regulator\n");
+				pr_debug("Desktop voltage regulator\n");
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
@@ -497,9 +492,9 @@ static int powernow_decode_bios(int maxfid, int startvid)
 						"Correcting.\n", latency);
 				latency = 100;
 			}
-			dprintk("Settling Time: %d microseconds.\n",
+			pr_debug("Settling Time: %d microseconds.\n",
 					psb->settlingtime);
-			dprintk("Has %d PST tables. (Only dumping ones "
+			pr_debug("Has %d PST tables. (Only dumping ones "
 					"relevant to this CPU).\n",
 					psb->numpst);
 
@@ -650,7 +645,7 @@ static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
 		printk(KERN_WARNING PFX "can not determine bus frequency\n");
 		return -EINVAL;
 	}
-	dprintk("FSB: %3dMHz\n", fsb/1000);
+	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
 		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 2368e38327b3..83479b6fb9a1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -139,7 +139,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	}
 	do {
 		if (i++ > 10000) {
-			dprintk("detected change pending stuck\n");
+			pr_debug("detected change pending stuck\n");
 			return 1;
 		}
 		rdmsr(MSR_FIDVID_STATUS, lo, hi);
@@ -176,7 +176,7 @@ static void fidvid_msr_init(void)
 	fid = lo & MSR_S_LO_CURRENT_FID;
 	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
 	hi = MSR_C_HI_STP_GNT_BENIGN;
-	dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
 	wrmsr(MSR_FIDVID_CTL, lo, hi);
 }
 
@@ -196,7 +196,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
 	do {
@@ -244,7 +244,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	lo |= (vid << MSR_C_LO_VID_SHIFT);
 	lo |= MSR_C_LO_INIT_FID_VID;
 
-	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
 	do {
@@ -325,7 +325,7 @@ static int transition_fid_vid(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), data->currfid, data->currvid);
 
 	return 0;
@@ -339,7 +339,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 maxvid, lo, rvomult = 1;
 
-	dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
 		"reqvid 0x%x, rvo 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqvid, data->rvo);
@@ -349,12 +349,12 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	rvosteps *= rvomult;
 	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
 	maxvid = 0x1f & (maxvid >> 16);
-	dprintk("ph1 maxvid=0x%x\n", maxvid);
+	pr_debug("ph1 maxvid=0x%x\n", maxvid);
 	if (reqvid < maxvid) /* lower numbers are higher voltages */
 		reqvid = maxvid;
 
 	while (data->currvid > reqvid) {
-		dprintk("ph1: curr 0x%x, req vid 0x%x\n",
+		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
 			data->currvid, reqvid);
 		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
 			return 1;
@@ -365,7 +365,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		if (data->currvid == maxvid) {
 			rvosteps = 0;
 		} else {
-			dprintk("ph1: changing vid for rvo, req 0x%x\n",
+			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
 				data->currvid - 1);
 			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
 				return 1;
@@ -382,7 +382,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
 		return 1;
 	}
 
-	dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -400,7 +400,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 0;
 	}
 
-	dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
 		"reqfid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid, reqfid);
@@ -457,7 +457,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
 		return 1;
 	}
 
-	dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -470,7 +470,7 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 	u32 savefid = data->currfid;
 	u32 savereqvid = reqvid;
 
-	dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
 		smp_processor_id(),
 		data->currfid, data->currvid);
 
@@ -498,17 +498,17 @@ static int core_voltage_post_transition(struct powernow_k8_data *data,
 		return 1;
 
 	if (savereqvid != data->currvid) {
-		dprintk("ph3 failed, currvid 0x%x\n", data->currvid);
+		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
 		return 1;
 	}
 
 	if (savefid != data->currfid) {
-		dprintk("ph3 failed, currfid changed 0x%x\n",
+		pr_debug("ph3 failed, currfid changed 0x%x\n",
 			data->currfid);
 		return 1;
 	}
 
-	dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
 		data->currfid, data->currvid);
 
 	return 0;
@@ -707,7 +707,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		return -EIO;
 	}
 
-	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
 	data->powernow_table = powernow_table;
 	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
 		print_basics(data);
@@ -717,7 +717,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
 		    (pst[j].vid == data->currvid))
 			return 0;
 
-	dprintk("currfid/vid do not match PST, ignoring\n");
+	pr_debug("currfid/vid do not match PST, ignoring\n");
 	return 0;
 }
 
@@ -739,36 +739,36 @@ static int find_psb_table(struct powernow_k8_data *data)
 		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
 			continue;
 
-		dprintk("found PSB header at 0x%p\n", psb);
+		pr_debug("found PSB header at 0x%p\n", psb);
 
-		dprintk("table vers: 0x%x\n", psb->tableversion);
+		pr_debug("table vers: 0x%x\n", psb->tableversion);
 		if (psb->tableversion != PSB_VERSION_1_4) {
 			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
 			return -ENODEV;
 		}
 
-		dprintk("flags: 0x%x\n", psb->flags1);
+		pr_debug("flags: 0x%x\n", psb->flags1);
 		if (psb->flags1) {
 			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
 			return -ENODEV;
 		}
 
 		data->vstable = psb->vstable;
-		dprintk("voltage stabilization time: %d(*20us)\n",
+		pr_debug("voltage stabilization time: %d(*20us)\n",
 				data->vstable);
 
-		dprintk("flags2: 0x%x\n", psb->flags2);
+		pr_debug("flags2: 0x%x\n", psb->flags2);
 		data->rvo = psb->flags2 & 3;
 		data->irt = ((psb->flags2) >> 2) & 3;
 		mvs = ((psb->flags2) >> 4) & 3;
 		data->vidmvs = 1 << mvs;
 		data->batps = ((psb->flags2) >> 6) & 3;
 
-		dprintk("ramp voltage offset: %d\n", data->rvo);
-		dprintk("isochronous relief time: %d\n", data->irt);
-		dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+		pr_debug("ramp voltage offset: %d\n", data->rvo);
+		pr_debug("isochronous relief time: %d\n", data->irt);
+		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
 
-		dprintk("numpst: 0x%x\n", psb->num_tables);
+		pr_debug("numpst: 0x%x\n", psb->num_tables);
 		cpst = psb->num_tables;
 		if ((psb->cpuid == 0x00000fc0) ||
 		    (psb->cpuid == 0x00000fe0)) {
@@ -783,13 +783,13 @@ static int find_psb_table(struct powernow_k8_data *data)
 		}
 
 		data->plllock = psb->plllocktime;
-		dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
-		dprintk("maxfid: 0x%x\n", psb->maxfid);
-		dprintk("maxvid: 0x%x\n", psb->maxvid);
+		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		pr_debug("maxfid: 0x%x\n", psb->maxfid);
+		pr_debug("maxvid: 0x%x\n", psb->maxvid);
 		maxvid = psb->maxvid;
 
 		data->numps = psb->numps;
-		dprintk("numpstates: 0x%x\n", data->numps);
+		pr_debug("numpstates: 0x%x\n", data->numps);
 		return fill_powernow_table(data,
 				(struct pst_s *)(psb+1), maxvid);
 	}
@@ -834,13 +834,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	u64 control, status;
 
 	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
-		dprintk("register performance failed: bad ACPI data\n");
+		pr_debug("register performance failed: bad ACPI data\n");
 		return -EIO;
 	}
 
 	/* verify the data contained in the ACPI structures */
 	if (data->acpi_data.state_count <= 1) {
-		dprintk("No ACPI P-States\n");
+		pr_debug("No ACPI P-States\n");
 		goto err_out;
 	}
 
@@ -849,7 +849,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 
 	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
 	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		dprintk("Invalid control/status registers (%x - %x)\n",
+		pr_debug("Invalid control/status registers (%llx - %llx)\n",
 			control, status);
 		goto err_out;
 	}
@@ -858,7 +858,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
 		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
 	if (!powernow_table) {
-		dprintk("powernow_table memory alloc failure\n");
+		pr_debug("powernow_table memory alloc failure\n");
 		goto err_out;
 	}
 
@@ -928,7 +928,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
 		}
 		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
-			dprintk("invalid pstate %d, ignoring\n", index);
+			pr_debug("invalid pstate %d, ignoring\n", index);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -968,7 +968,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 			vid = (control >> VID_SHIFT) & VID_MASK;
 		}
 
-		dprintk("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
 
 		index = fid | (vid<<8);
 		powernow_table[i].index = index;
@@ -978,7 +978,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 
 		/* verify frequency is OK */
 		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
-			dprintk("invalid freq %u kHz, ignoring\n", freq);
+			pr_debug("invalid freq %u kHz, ignoring\n", freq);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -986,7 +986,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		/* verify voltage is OK -
 		 * BIOSs are using "off" to indicate invalid */
 		if (vid == VID_OFF) {
-			dprintk("invalid vid %u, ignoring\n", vid);
+			pr_debug("invalid vid %u, ignoring\n", vid);
 			invalidate_entry(powernow_table, i);
 			continue;
 		}
@@ -1047,7 +1047,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* fid/vid correctness check for k8 */
 	/* fid are the lower 8 bits of the index we stored into
@@ -1057,18 +1057,18 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
 	fid = data->powernow_table[index].index & 0xFF;
 	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
 
-	dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
 
 	if (query_current_values_with_pending_wait(data))
 		return 1;
 
 	if ((data->currvid == vid) && (data->currfid == fid)) {
-		dprintk("target matches current values (fid 0x%x, vid 0x%x)\n",
+		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
 			fid, vid);
 		return 0;
 	}
 
-	dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
 		smp_processor_id(), fid, vid);
 	freqs.old = find_khz_freq_from_fid(data->currfid);
 	freqs.new = find_khz_freq_from_fid(fid);
@@ -1096,7 +1096,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
 	int res, i;
 	struct cpufreq_freqs freqs;
 
-	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
 
 	/* get MSR index for hardware pstate transition */
 	pstate = index & HW_PSTATE_MASK;
@@ -1156,14 +1156,14 @@ static int powernowk8_target(struct cpufreq_policy *pol,
 		goto err_out;
 	}
 
-	dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
 		pol->cpu, targfreq, pol->min, pol->max, relation);
 
 	if (query_current_values_with_pending_wait(data))
 		goto err_out;
 
 	if (cpu_family != CPU_HW_PSTATE) {
-		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
 		data->currfid, data->currvid);
 
 		if ((checkvid != data->currvid) ||
@@ -1319,7 +1319,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 				data->currpstate);
 	else
 		pol->cur = find_khz_freq_from_fid(data->currfid);
-	dprintk("policy current frequency %d kHz\n", pol->cur);
+	pr_debug("policy current frequency %d kHz\n", pol->cur);
 
 	/* min/max the cpu is capable of */
 	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
@@ -1337,10 +1337,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
 
 	if (cpu_family == CPU_HW_PSTATE)
-		dprintk("cpu_init done, current pstate 0x%x\n",
+		pr_debug("cpu_init done, current pstate 0x%x\n",
 				data->currpstate);
 	else
-		dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
 			data->currfid, data->currvid);
 
 	per_cpu(powernow_data, pol->cpu) = data;
@@ -1586,7 +1586,7 @@ static int __cpuinit powernowk8_init(void)
 /* driver entry point for term */
 static void __exit powernowk8_exit(void)
 {
-	dprintk("exit\n");
+	pr_debug("exit\n");
 
 	if (boot_cpu_has(X86_FEATURE_CPB)) {
 		msrs_free(msrs);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index df3529b1c02d..3744d26cdc2b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -211,8 +211,6 @@ struct pst_s {
 	u8 vid;
 };
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
-
 static int core_voltage_pre_transition(struct powernow_k8_data *data,
 	u32 reqvid, u32 regfid);
 static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 435a996a613a..1e205e6b1727 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -29,8 +29,6 @@
 
 static __u8 __iomem *cpuctl;
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"sc520_freq", msg)
 #define PFX "sc520_freq: "
 
 static struct cpufreq_frequency_table sc520_freq_table[] = {
@@ -66,7 +64,7 @@ static void sc520_freq_set_cpu_state(unsigned int state)
 
 	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 
-	dprintk("attempting to set frequency to %i kHz\n",
+	pr_debug("attempting to set frequency to %i kHz\n",
 			sc520_freq_table[state].frequency);
 
 	local_irq_disable();
@@ -161,7 +159,7 @@ static int __init sc520_freq_init(void)
 	/* Test if we have the right hardware */
 	if (c->x86_vendor != X86_VENDOR_AMD ||
 	    c->x86 != 4 || c->x86_model != 9) {
-		dprintk("no Elan SC520 processor found!\n");
+		pr_debug("no Elan SC520 processor found!\n");
 		return -ENODEV;
 	}
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 9b1ff37de46a..6ea3455def21 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -29,9 +29,6 @@
 #define PFX		"speedstep-centrino: "
 #define MAINTAINER	"cpufreq@vger.kernel.org"
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
-
 #define INTEL_MSR_RANGE	(0xffff)
 
 struct cpu_id
@@ -244,7 +241,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->cpu_id == NULL) {
 		/* No match at all */
-		dprintk("no support for CPU model \"%s\": "
+		pr_debug("no support for CPU model \"%s\": "
 		       "send /proc/cpuinfo to " MAINTAINER "\n",
 		       cpu->x86_model_id);
 		return -ENOENT;
@@ -252,15 +249,15 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 
 	if (model->op_points == NULL) {
 		/* Matched a non-match */
-		dprintk("no table support for CPU model \"%s\"\n",
+		pr_debug("no table support for CPU model \"%s\"\n",
 		       cpu->x86_model_id);
-		dprintk("try using the acpi-cpufreq driver\n");
+		pr_debug("try using the acpi-cpufreq driver\n");
 		return -ENOENT;
 	}
 
 	per_cpu(centrino_model, policy->cpu) = model;
 
-	dprintk("found \"%s\": max frequency: %dkHz\n",
+	pr_debug("found \"%s\": max frequency: %dkHz\n",
 	       model->model_name, model->max_freq);
 
 	return 0;
@@ -369,7 +366,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
 
 	if (!per_cpu(centrino_cpu, policy->cpu)) {
-		dprintk("found unsupported CPU with "
+		pr_debug("found unsupported CPU with "
 		"Enhanced SpeedStep: send /proc/cpuinfo to "
 		MAINTAINER "\n");
 		return -ENODEV;
@@ -385,7 +382,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 
 	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
 		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+		pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
 		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
 
 		/* check to see if it stuck */
@@ -402,7 +399,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
 						/* 10uS transition latency */
 	policy->cur = freq;
 
-	dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+	pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
 
 	ret = cpufreq_frequency_table_cpuinfo(policy,
 		per_cpu(centrino_model, policy->cpu)->op_points);
@@ -498,7 +495,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			good_cpu = j;
 
 		if (good_cpu >= nr_cpu_ids) {
-			dprintk("couldn't limit to CPUs in this domain\n");
+			pr_debug("couldn't limit to CPUs in this domain\n");
 			retval = -EAGAIN;
 			if (first_cpu) {
 				/* We haven't started the transition yet. */
@@ -512,7 +509,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 		if (first_cpu) {
 			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
 			if (msr == (oldmsr & 0xffff)) {
-				dprintk("no change needed - msr was and needs "
+				pr_debug("no change needed - msr was and needs "
 					"to be %x\n", oldmsr);
 				retval = 0;
 				goto out;
@@ -521,7 +518,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 			freqs.old = extract_clock(oldmsr, cpu, 0);
 			freqs.new = extract_clock(msr, cpu, 0);
 
-			dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
 				target_freq, freqs.old, freqs.new, msr);
 
 			for_each_cpu(k, policy->cpus) {
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 561758e95180..a748ce782fee 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -53,10 +53,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
 };
 
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-ich", msg)
-
-
 /**
  * speedstep_find_register - read the PMBASE address
  *
@@ -80,7 +76,7 @@ static int speedstep_find_register(void)
 		return -ENODEV;
 	}
 
-	dprintk("pmbase is 0x%x\n", pmbase);
+	pr_debug("pmbase is 0x%x\n", pmbase);
 	return 0;
 }
 
@@ -106,13 +102,13 @@ static void speedstep_set_state(unsigned int state)
 	/* read state */
 	value = inb(pmbase + 0x50);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	/* write new state */
 	value &= 0xFE;
 	value |= state;
 
-	dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+	pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
 
 	/* Disable bus master arbitration */
 	pm2_blk = inb(pmbase + 0x20);
@@ -132,10 +128,10 @@ static void speedstep_set_state(unsigned int state)
 	/* Enable IRQs */
 	local_irq_restore(flags);
 
-	dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
 
 	if (state == (value & 0x1))
-		dprintk("change to %u MHz succeeded\n",
+		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
 		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
@@ -165,7 +161,7 @@ static int speedstep_activate(void)
 	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
 	if (!(value & 0x08)) {
 		value |= 0x08;
-		dprintk("activating SpeedStep (TM) registers\n");
+		pr_debug("activating SpeedStep (TM) registers\n");
 		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
 	}
 
@@ -218,7 +214,7 @@ static unsigned int speedstep_detect_chipset(void)
 			return 2; /* 2-M */
 
 		if (hostbridge->revision < 5) {
-			dprintk("hostbridge does not support speedstep\n");
+			pr_debug("hostbridge does not support speedstep\n");
 			speedstep_chipset_dev = NULL;
 			pci_dev_put(hostbridge);
 			return 0;
@@ -246,7 +242,7 @@ static unsigned int speedstep_get(unsigned int cpu)
 	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
 		BUG();
 
-	dprintk("detected %u kHz as current frequency\n", speed);
+	pr_debug("detected %u kHz as current frequency\n", speed);
 	return speed;
 }
 
@@ -276,7 +272,7 @@ static int speedstep_target(struct cpufreq_policy *policy,
 	freqs.new = speedstep_freqs[newstate].frequency;
 	freqs.cpu = policy->cpu;
 
-	dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
 
 	/* no transition necessary */
 	if (freqs.old == freqs.new)
@@ -351,7 +347,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (!speed)
 		return -EIO;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -405,14 +401,14 @@ static int __init speedstep_init(void)
 	/* detect processor */
 	speedstep_processor = speedstep_detect_processor();
 	if (!speedstep_processor) {
-		dprintk("Intel(R) SpeedStep(TM) capable processor "
+		pr_debug("Intel(R) SpeedStep(TM) capable processor "
 				"not found\n");
 		return -ENODEV;
 	}
 
 	/* detect chipset */
 	if (!speedstep_detect_chipset()) {
-		dprintk("Intel(R) SpeedStep(TM) for this chipset not "
+		pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
 				"(yet) available.\n");
 		return -ENODEV;
 	}
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index a94ec6be69fa..8af2d2fd9d51 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -18,9 +18,6 @@
 #include <asm/tsc.h>
 #include "speedstep-lib.h"
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-lib", msg)
-
 #define PFX "speedstep-lib: "
 
 #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
@@ -75,7 +72,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* read MSR 0x2a - we only need the low 32 bits */
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 	msr_tmp = msr_lo;
 
 	/* decode the FSB */
@@ -89,7 +86,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 
 	/* decode the multiplier */
 	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
-		dprintk("workaround for early PIIIs\n");
+		pr_debug("workaround for early PIIIs\n");
 		msr_lo &= 0x03c00000;
 	} else
 		msr_lo &= 0x0bc00000;
@@ -100,7 +97,7 @@ static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
 		j++;
 	}
 
-	dprintk("speed is %u\n",
+	pr_debug("speed is %u\n",
 		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
 
 	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
@@ -112,7 +109,7 @@ static unsigned int pentiumM_get_frequency(void)
 	u32 msr_lo, msr_tmp;
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
 
 	/* see table B-2 of 24547212.pdf */
 	if (msr_lo & 0x00040000) {
@@ -122,7 +119,7 @@ static unsigned int pentiumM_get_frequency(void)
 	}
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * 100 * 1000));
 
 	return msr_tmp * 100 * 1000;
@@ -160,11 +157,11 @@ static unsigned int pentium_core_get_frequency(void)
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+	pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
 			msr_lo, msr_tmp);
 
 	msr_tmp = (msr_lo >> 22) & 0x1f;
-	dprintk("bits 22-26 are 0x%x, speed is %u\n",
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
 			msr_tmp, (msr_tmp * fsb));
 
 	ret = (msr_tmp * fsb);
@@ -190,7 +187,7 @@ static unsigned int pentium4_get_frequency(void)
 
 	rdmsr(0x2c, msr_lo, msr_hi);
 
-	dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+	pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
 
 	/* decode the FSB: see IA-32 Intel (C) Architecture Software
 	 * Developer's Manual, Volume 3: System Prgramming Guide,
@@ -217,7 +214,7 @@ static unsigned int pentium4_get_frequency(void)
 	/* Multiplier. */
 	mult = msr_lo >> 24;
 
-	dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+	pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
 			fsb, mult, (fsb * mult));
 
 	ret = (fsb * mult);
@@ -257,7 +254,7 @@ unsigned int speedstep_detect_processor(void)
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
 
-	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
+	pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
 
 	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
 	    ((c->x86 != 6) && (c->x86 != 0xF)))
@@ -272,7 +269,7 @@ unsigned int speedstep_detect_processor(void)
 		ebx = cpuid_ebx(0x00000001);
 		ebx &= 0x000000FF;
 
-		dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
 
 		switch (c->x86_mask) {
 		case 4:
@@ -327,7 +324,7 @@ unsigned int speedstep_detect_processor(void)
 		/* cpuid_ebx(1) is 0x04 for desktop PIII,
 		 * 0x06 for mobile PIII-M */
 		ebx = cpuid_ebx(0x00000001);
-		dprintk("ebx is %x\n", ebx);
+		pr_debug("ebx is %x\n", ebx);
 
 		ebx &= 0x000000FF;
 
@@ -344,7 +341,7 @@ unsigned int speedstep_detect_processor(void)
 		/* all mobile PIII Coppermines have FSB 100 MHz
 		 * ==> sort out a few desktop PIIIs. */
 		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		msr_lo &= 0x00c0000;
 		if (msr_lo != 0x0080000)
@@ -357,12 +354,12 @@ unsigned int speedstep_detect_processor(void)
 		 * bit 56 or 57 is set
 		 */
 		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+		pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
 				msr_lo, msr_hi);
 		if ((msr_hi & (1<<18)) &&
 		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
 			if (c->x86_mask == 0x01) {
-				dprintk("early PIII version\n");
+				pr_debug("early PIII version\n");
 				return SPEEDSTEP_CPU_PIII_C_EARLY;
 			} else
 				return SPEEDSTEP_CPU_PIII_C;
@@ -393,14 +390,14 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
 		return -EINVAL;
 
-	dprintk("trying to determine both speeds\n");
+	pr_debug("trying to determine both speeds\n");
 
 	/* get current speed */
 	prev_speed = speedstep_get_frequency(processor);
 	if (!prev_speed)
 		return -EIO;
 
-	dprintk("previous speed is %u\n", prev_speed);
+	pr_debug("previous speed is %u\n", prev_speed);
 
 	local_irq_save(flags);
 
@@ -412,7 +409,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("low speed is %u\n", *low_speed);
+	pr_debug("low speed is %u\n", *low_speed);
 
 	/* start latency measurement */
 	if (transition_latency)
@@ -431,7 +428,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 		goto out;
 	}
 
-	dprintk("high speed is %u\n", *high_speed);
+	pr_debug("high speed is %u\n", *high_speed);
 
 	if (*low_speed == *high_speed) {
 		ret = -ENODEV;
@@ -445,7 +442,7 @@ unsigned int speedstep_get_freqs(enum speedstep_processor processor,
 	if (transition_latency) {
 		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
 			tv2.tv_usec - tv1.tv_usec;
-		dprintk("transition latency is %u uSec\n", *transition_latency);
+		pr_debug("transition latency is %u uSec\n", *transition_latency);
 
 		/* convert uSec to nSec and add 20% for safety reasons */
 		*transition_latency *= 1200;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 91bc25b67bc1..c76ead3490bf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -55,9 +55,6 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
  * of DMA activity going on? */
 #define SMI_TRIES 5
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
-		"speedstep-smi", msg)
-
 /**
  * speedstep_smi_ownership
  */
@@ -70,7 +67,7 @@ static int speedstep_smi_ownership(void)
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 	magic = virt_to_phys(magic_data);
 
-	dprintk("trying to obtain ownership with command %x at port %x\n",
+	pr_debug("trying to obtain ownership with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -85,7 +82,7 @@ static int speedstep_smi_ownership(void)
 		: "memory"
 	);
 
-	dprintk("result is %x\n", result);
+	pr_debug("result is %x\n", result);
 
 	return result;
 }
@@ -106,13 +103,13 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 	u32 function = GET_SPEEDSTEP_FREQS;
 
 	if (!(ist_info.event & 0xFFFF)) {
-		dprintk("bug #1422 -- can't read freqs from BIOS\n");
+		pr_debug("bug #1422 -- can't read freqs from BIOS\n");
 		return -ENODEV;
 	}
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine frequencies with command %x at port %x\n",
+	pr_debug("trying to determine frequencies with command %x at port %x\n",
 			command, smi_port);
 
 	__asm__ __volatile__(
@@ -129,7 +126,7 @@ static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("result %x, low_freq %u, high_freq %u\n",
+	pr_debug("result %x, low_freq %u, high_freq %u\n",
 			result, low_mhz, high_mhz);
 
 	/* abort if results are obviously incorrect... */
@@ -154,7 +151,7 @@ static int speedstep_get_state(void)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to determine current setting with command %x "
+	pr_debug("trying to determine current setting with command %x "
 		"at port %x\n", command, smi_port);
 
 	__asm__ __volatile__(
@@ -168,7 +165,7 @@ static int speedstep_get_state(void)
 		  "d" (smi_port), "S" (0), "D" (0)
 	);
 
-	dprintk("state is %x, result is %x\n", state, result);
+	pr_debug("state is %x, result is %x\n", state, result);
 
 	return state & 1;
 }
@@ -194,13 +191,13 @@ static void speedstep_set_state(unsigned int state)
 
 	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
 
-	dprintk("trying to set frequency to state %u "
+	pr_debug("trying to set frequency to state %u "
 		"with command %x at port %x\n",
 		state, command, smi_port);
 
 	do {
 		if (retry) {
-			dprintk("retry %u, previous result %u, waiting...\n",
+			pr_debug("retry %u, previous result %u, waiting...\n",
 					retry, result);
 			mdelay(retry * 50);
 		}
@@ -221,7 +218,7 @@ static void speedstep_set_state(unsigned int state)
 	local_irq_restore(flags);
 
 	if (new_state == state)
-		dprintk("change to %u MHz succeeded after %u tries "
+		pr_debug("change to %u MHz succeeded after %u tries "
 			"with result %u\n",
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
@@ -292,7 +289,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	result = speedstep_smi_ownership();
 	if (result) {
-		dprintk("fails in acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in acquiring ownership of a SMI interface.\n");
 		return -EINVAL;
 	}
 
@@ -304,7 +301,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 	if (result) {
 		/* fall back to speedstep_lib.c dection mechanism:
 		 * try both states out */
-		dprintk("could not detect low and high frequencies "
+		pr_debug("could not detect low and high frequencies "
 				"by SMI call.\n");
 		result = speedstep_get_freqs(speedstep_processor,
 				low, high,
@@ -312,18 +309,18 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 				&speedstep_set_state);
 
 		if (result) {
-			dprintk("could not detect two different speeds"
+			pr_debug("could not detect two different speeds"
 					" -- aborting.\n");
 			return result;
 		} else
-			dprintk("workaround worked.\n");
+			pr_debug("workaround worked.\n");
 	}
 
 	/* get current speed setting */
 	state = speedstep_get_state();
 	speed = speedstep_freqs[state].frequency;
 
-	dprintk("currently at %s speed setting - %i MHz\n",
+	pr_debug("currently at %s speed setting - %i MHz\n",
 		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
 		? "low" : "high",
 		(speed / 1000));
@@ -360,7 +357,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
 	int result = speedstep_smi_ownership();
 
 	if (result)
-		dprintk("fails in re-acquiring ownership of a SMI interface.\n");
+		pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
 
 	return result;
 }
@@ -403,12 +400,12 @@ static int __init speedstep_init(void)
 	}
 
 	if (!speedstep_processor) {
-		dprintk("No supported Intel CPU detected.\n");
+		pr_debug("No supported Intel CPU detected.\n");
 		return -ENODEV;
 	}
 
-	dprintk("signature:0x%.8lx, command:0x%.8lx, "
-		"event:0x%.8lx, perf_level:0x%.8lx.\n",
+	pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
+		"event:0x%.8ulx, perf_level:0x%.8ulx.\n",
 		ist_info.signature, ist_info.command,
 		ist_info.event, ist_info.perf_level);
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 3a73a93596e8..85b32376dad7 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -49,10 +49,6 @@ ACPI_MODULE_NAME("processor_perflib");
 
 static DEFINE_MUTEX(performance_mutex);
 
-/* Use cpufreq debug layer for _PPC changes. */
-#define cpufreq_printk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /*
  * _PPC support is implemented as a CPUfreq policy notifier:
  * This means each time a CPUfreq driver registered also with
@@ -145,7 +141,7 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 		return -ENODEV;
 	}
 
-	cpufreq_printk("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
+	pr_debug("CPU %d: _PPC is %d - frequency %s limited\n", pr->id,
 		       (int)ppc, ppc ? "" : "not");
 
 	pr->performance_platform_limit = (int)ppc;
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index ca8ee8093d6c..b78baa547ef5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -18,19 +18,6 @@ if CPU_FREQ
 config CPU_FREQ_TABLE
 	tristate
 
-config CPU_FREQ_DEBUG
-	bool "Enable CPUfreq debugging"
-	help
-	  Say Y here to enable CPUfreq subsystem (including drivers)
-	  debugging. You will need to activate it via the kernel
-	  command line by passing
-	     cpufreq.debug=<value>
-
-	  To get <value>, add 
-	       1 to activate CPUfreq core debugging,
-	       2 to activate CPUfreq drivers debugging, and
-	       4 to activate CPUfreq governor debugging
-
 config CPU_FREQ_STAT
 	tristate "CPU frequency translation statistics"
 	select CPU_FREQ_TABLE
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7c10f96c5ae9..1e08af43ae72 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -32,9 +32,6 @@
 
 #include <trace/events/power.h>
 
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
-						"cpufreq-core", msg)
-
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
@@ -180,93 +177,6 @@ void cpufreq_cpu_put(struct cpufreq_policy *data)
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-/* what part(s) of the CPUfreq subsystem are debugged? */
-static unsigned int debug;
-
-/* is the debug output ratelimit'ed using printk_ratelimit? User can
- * set or modify this value.
- */
-static unsigned int debug_ratelimit = 1;
-
-/* is the printk_ratelimit'ing enabled? It's enabled after a successful
- * loading of a cpufreq driver, temporarily disabled when a new policy
- * is set, and disabled upon cpufreq driver removal
- */
-static unsigned int disable_ratelimit = 1;
-static DEFINE_SPINLOCK(disable_ratelimit_lock);
-
-static void cpufreq_debug_enable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	if (disable_ratelimit)
-		disable_ratelimit--;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-static void cpufreq_debug_disable_ratelimit(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&disable_ratelimit_lock, flags);
-	disable_ratelimit++;
-	spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-}
-
-void cpufreq_debug_printk(unsigned int type, const char *prefix,
-			const char *fmt, ...)
-{
-	char s[256];
-	va_list args;
-	unsigned int len;
-	unsigned long flags;
-
-	WARN_ON(!prefix);
-	if (type & debug) {
-		spin_lock_irqsave(&disable_ratelimit_lock, flags);
-		if (!disable_ratelimit && debug_ratelimit
-					&& !printk_ratelimit()) {
-			spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-			return;
-		}
-		spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
-
-		len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
-
-		va_start(args, fmt);
-		len += vsnprintf(&s[len], (256 - len), fmt, args);
-		va_end(args);
-
-		printk(s);
-
-		WARN_ON(len < 5);
-	}
-}
-EXPORT_SYMBOL(cpufreq_debug_printk);
-
-
-module_param(debug, uint, 0644);
-MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
-			" 2 to debug drivers, and 4 to debug governors.");
-
-module_param(debug_ratelimit, uint, 0644);
-MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
-					" set to 0 to disable ratelimiting.");
-
-#else /* !CONFIG_CPU_FREQ_DEBUG */
-
-static inline void cpufreq_debug_enable_ratelimit(void) { return; }
-static inline void cpufreq_debug_disable_ratelimit(void) { return; }
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
-
 /*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
@@ -291,7 +201,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	if (!l_p_j_ref_freq) {
 		l_p_j_ref = loops_per_jiffy;
 		l_p_j_ref_freq = ci->old;
-		dprintk("saving %lu as reference value for loops_per_jiffy; "
+		pr_debug("saving %lu as reference value for loops_per_jiffy; "
 			"freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
 	}
 	if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
@@ -299,7 +209,7 @@ static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
 	    (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
 		loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
 								ci->new);
-		dprintk("scaling loops_per_jiffy to %lu "
+		pr_debug("scaling loops_per_jiffy to %lu "
 			"for frequency %u kHz\n", loops_per_jiffy, ci->new);
 	}
 }
@@ -326,7 +236,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 	BUG_ON(irqs_disabled());
 
 	freqs->flags = cpufreq_driver->flags;
-	dprintk("notification %u of frequency transition to %u kHz\n",
+	pr_debug("notification %u of frequency transition to %u kHz\n",
 		state, freqs->new);
 
 	policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
@@ -340,7 +250,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
 			if ((policy) && (policy->cpu == freqs->cpu) &&
 			    (policy->cur) && (policy->cur != freqs->old)) {
-				dprintk("Warning: CPU frequency is"
+				pr_debug("Warning: CPU frequency is"
 					" %u, cpufreq assumed %u kHz.\n",
 					freqs->old, policy->cur);
 				freqs->old = policy->cur;
@@ -353,7 +263,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-		dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
+		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 			(unsigned long)freqs->cpu);
 		trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
 		trace_cpu_frequency(freqs->new, freqs->cpu);
@@ -753,7 +663,7 @@ no_policy:
 static void cpufreq_sysfs_release(struct kobject *kobj)
 {
 	struct cpufreq_policy *policy = to_policy(kobj);
-	dprintk("last reference is dropped\n");
+	pr_debug("last reference is dropped\n");
 	complete(&policy->kobj_unregister);
 }
 
@@ -788,7 +698,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 	gov = __find_governor(per_cpu(cpufreq_cpu_governor, cpu));
 	if (gov) {
 		policy->governor = gov;
-		dprintk("Restoring governor %s for cpu %d\n",
+		pr_debug("Restoring governor %s for cpu %d\n",
 		       policy->governor->name, cpu);
 	}
 #endif
@@ -824,7 +734,7 @@ static int cpufreq_add_dev_policy(unsigned int cpu,
 			per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
 			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-			dprintk("CPU already managed, adding link\n");
+			pr_debug("CPU already managed, adding link\n");
 			ret = sysfs_create_link(&sys_dev->kobj,
 						&managed_policy->kobj,
 						"cpufreq");
@@ -865,7 +775,7 @@ static int cpufreq_add_dev_symlink(unsigned int cpu,
 		if (!cpu_online(j))
 			continue;
 
-		dprintk("CPU %u already managed, adding link\n", j);
+		pr_debug("CPU %u already managed, adding link\n", j);
 		managed_policy = cpufreq_cpu_get(cpu);
 		cpu_sys_dev = get_cpu_sysdev(j);
 		ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
@@ -941,7 +851,7 @@ static int cpufreq_add_dev_interface(unsigned int cpu,
 	policy->user_policy.governor = policy->governor;
 
 	if (ret) {
-		dprintk("setting policy failed\n");
+		pr_debug("setting policy failed\n");
 		if (cpufreq_driver->exit)
 			cpufreq_driver->exit(policy);
 	}
@@ -977,8 +887,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	if (cpu_is_offline(cpu))
 		return 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("adding CPU %u\n", cpu);
+	pr_debug("adding CPU %u\n", cpu);
 
 #ifdef CONFIG_SMP
 	/* check whether a different CPU already registered this
@@ -986,7 +895,6 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	policy = cpufreq_cpu_get(cpu);
 	if (unlikely(policy)) {
 		cpufreq_cpu_put(policy);
-		cpufreq_debug_enable_ratelimit();
 		return 0;
 	}
 #endif
@@ -1037,7 +945,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	 */
 	ret = cpufreq_driver->init(policy);
 	if (ret) {
-		dprintk("initialization failed\n");
+		pr_debug("initialization failed\n");
 		goto err_unlock_policy;
 	}
 	policy->user_policy.min = policy->min;
@@ -1063,8 +971,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
 	module_put(cpufreq_driver->owner);
-	dprintk("initialization complete\n");
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("initialization complete\n");
 
 	return 0;
 
@@ -1088,7 +995,6 @@ err_free_policy:
 nomem_out:
 	module_put(cpufreq_driver->owner);
 module_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1112,15 +1018,13 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	unsigned int j;
 #endif
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("unregistering CPU %u\n", cpu);
+	pr_debug("unregistering CPU %u\n", cpu);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	data = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!data) {
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		return -EINVAL;
 	}
@@ -1132,12 +1036,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * only need to unlink, put and exit
 	 */
 	if (unlikely(cpu != data->cpu)) {
-		dprintk("removing link\n");
+		pr_debug("removing link\n");
 		cpumask_clear_cpu(cpu, data->cpus);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 		kobj = &sys_dev->kobj;
 		cpufreq_cpu_put(data);
-		cpufreq_debug_enable_ratelimit();
 		unlock_policy_rwsem_write(cpu);
 		sysfs_remove_link(kobj, "cpufreq");
 		return 0;
@@ -1170,7 +1073,7 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 		for_each_cpu(j, data->cpus) {
 			if (j == cpu)
 				continue;
-			dprintk("removing link for cpu %u\n", j);
+			pr_debug("removing link for cpu %u\n", j);
 #ifdef CONFIG_HOTPLUG_CPU
 			strncpy(per_cpu(cpufreq_cpu_governor, j),
 				data->governor->name, CPUFREQ_NAME_LEN);
@@ -1199,17 +1102,15 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
 	 * not referenced anymore by anybody before we proceed with
 	 * unloading.
 	 */
-	dprintk("waiting for dropping of refcount\n");
+	pr_debug("waiting for dropping of refcount\n");
 	wait_for_completion(cmp);
-	dprintk("wait complete\n");
+	pr_debug("wait complete\n");
 
 	lock_policy_rwsem_write(cpu);
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(data);
 	unlock_policy_rwsem_write(cpu);
 
-	cpufreq_debug_enable_ratelimit();
-
 #ifdef CONFIG_HOTPLUG_CPU
 	/* when the CPU which is the parent of the kobj is hotplugged
 	 * offline, check for siblings, and create cpufreq sysfs interface
@@ -1255,7 +1156,7 @@ static void handle_update(struct work_struct *work)
 	struct cpufreq_policy *policy =
 		container_of(work, struct cpufreq_policy, update);
 	unsigned int cpu = policy->cpu;
-	dprintk("handle_update for cpu %u called\n", cpu);
+	pr_debug("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
 
@@ -1273,7 +1174,7 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
 {
 	struct cpufreq_freqs freqs;
 
-	dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
+	pr_debug("Warning: CPU frequency out of sync: cpufreq and timing "
 	       "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
 
 	freqs.cpu = cpu;
@@ -1376,7 +1277,7 @@ static int cpufreq_bp_suspend(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("suspending cpu %u\n", cpu);
+	pr_debug("suspending cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1414,7 +1315,7 @@ static void cpufreq_bp_resume(void)
 	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
-	dprintk("resuming cpu %u\n", cpu);
+	pr_debug("resuming cpu %u\n", cpu);
 
 	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
@@ -1526,7 +1427,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 {
 	int retval = -EINVAL;
 
-	dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
+	pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
 		target_freq, relation);
 	if (cpu_online(policy->cpu) && cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
@@ -1612,7 +1513,7 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 	if (!try_module_get(policy->governor->owner))
 		return -EINVAL;
 
-	dprintk("__cpufreq_governor for CPU %u, event %u\n",
+	pr_debug("__cpufreq_governor for CPU %u, event %u\n",
 						policy->cpu, event);
 	ret = policy->governor->governor(policy, event);
 
@@ -1713,8 +1614,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 {
 	int ret = 0;
 
-	cpufreq_debug_disable_ratelimit();
-	dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
+	pr_debug("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
 		policy->min, policy->max);
 
 	memcpy(&policy->cpuinfo, &data->cpuinfo,
@@ -1751,19 +1651,19 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 	data->min = policy->min;
 	data->max = policy->max;
 
-	dprintk("new min and max freqs are %u - %u kHz\n",
+	pr_debug("new min and max freqs are %u - %u kHz\n",
 					data->min, data->max);
 
 	if (cpufreq_driver->setpolicy) {
 		data->policy = policy->policy;
-		dprintk("setting range\n");
+		pr_debug("setting range\n");
 		ret = cpufreq_driver->setpolicy(policy);
 	} else {
 		if (policy->governor != data->governor) {
 			/* save old, working values */
 			struct cpufreq_governor *old_gov = data->governor;
 
-			dprintk("governor switch\n");
+			pr_debug("governor switch\n");
 
 			/* end old governor */
 			if (data->governor)
@@ -1773,7 +1673,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			data->governor = policy->governor;
 			if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
 				/* new governor failed, so re-start old one */
-				dprintk("starting governor %s failed\n",
+				pr_debug("starting governor %s failed\n",
 							data->governor->name);
 				if (old_gov) {
 					data->governor = old_gov;
@@ -1785,12 +1685,11 @@ static int __cpufreq_set_policy(struct cpufreq_policy *data,
 			}
 			/* might be a policy change, too, so fall through */
 		}
-		dprintk("governor: change or update limits\n");
+		pr_debug("governor: change or update limits\n");
 		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
 	}
 
 error_out:
-	cpufreq_debug_enable_ratelimit();
 	return ret;
 }
 
@@ -1817,7 +1716,7 @@ int cpufreq_update_policy(unsigned int cpu)
 		goto fail;
 	}
 
-	dprintk("updating policy for CPU %u\n", cpu);
+	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&policy, data, sizeof(struct cpufreq_policy));
 	policy.min = data->user_policy.min;
 	policy.max = data->user_policy.max;
@@ -1829,7 +1728,7 @@ int cpufreq_update_policy(unsigned int cpu)
 	if (cpufreq_driver->get) {
 		policy.cur = cpufreq_driver->get(cpu);
 		if (!data->cur) {
-			dprintk("Driver did not initialize current freq");
+			pr_debug("Driver did not initialize current freq");
 			data->cur = policy.cur;
 		} else {
 			if (data->cur != policy.cur)
@@ -1905,7 +1804,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    ((!driver_data->setpolicy) && (!driver_data->target)))
 		return -EINVAL;
 
-	dprintk("trying to register driver %s\n", driver_data->name);
+	pr_debug("trying to register driver %s\n", driver_data->name);
 
 	if (driver_data->setpolicy)
 		driver_data->flags |= CPUFREQ_CONST_LOOPS;
@@ -1936,15 +1835,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 
 		/* if all ->init() calls failed, unregister */
 		if (ret) {
-			dprintk("no CPU initialized for driver %s\n",
+			pr_debug("no CPU initialized for driver %s\n",
 							driver_data->name);
 			goto err_sysdev_unreg;
 		}
 	}
 
 	register_hotcpu_notifier(&cpufreq_cpu_notifier);
-	dprintk("driver %s up and running\n", driver_data->name);
-	cpufreq_debug_enable_ratelimit();
+	pr_debug("driver %s up and running\n", driver_data->name);
 
 	return 0;
 err_sysdev_unreg:
@@ -1971,14 +1869,10 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
 {
 	unsigned long flags;
 
-	cpufreq_debug_disable_ratelimit();
-
-	if (!cpufreq_driver || (driver != cpufreq_driver)) {
-		cpufreq_debug_enable_ratelimit();
+	if (!cpufreq_driver || (driver != cpufreq_driver))
 		return -EINVAL;
-	}
 
-	dprintk("unregistering driver %s\n", driver->name);
+	pr_debug("unregistering driver %s\n", driver->name);
 
 	sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
 	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index 7e2e515087f8..f13a8a9af6a1 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -15,9 +15,6 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg)
-
 
 static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 					unsigned int event)
@@ -25,7 +22,7 @@ static int cpufreq_governor_performance(struct cpufreq_policy *policy,
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 						policy->max, event);
 		__cpufreq_driver_target(policy, policy->max,
 						CPUFREQ_RELATION_H);
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index e6db5faf3eb1..4c2eb512f2bc 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -15,16 +15,13 @@
 #include <linux/cpufreq.h>
 #include <linux/init.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg)
-
 static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
 					unsigned int event)
 {
 	switch (event) {
 	case CPUFREQ_GOV_START:
 	case CPUFREQ_GOV_LIMITS:
-		dprintk("setting to %u kHz because of event %u\n",
+		pr_debug("setting to %u kHz because of event %u\n",
 							policy->min, event);
 		__cpufreq_driver_target(policy, policy->min,
 						CPUFREQ_RELATION_L);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 66d2d1d6c80f..f231015904c0 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -37,9 +37,6 @@ static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
 static DEFINE_MUTEX(userspace_mutex);
 static int cpus_using_userspace_governor;
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg)
-
 /* keep track of frequency transitions */
 static int
 userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
@@ -50,7 +47,7 @@ userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if (!per_cpu(cpu_is_managed, freq->cpu))
 		return 0;
 
-	dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n",
+	pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n",
 			freq->cpu, freq->new);
 	per_cpu(cpu_cur_freq, freq->cpu) = freq->new;
 
@@ -73,7 +70,7 @@ static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
 
-	dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
+	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
 
 	mutex_lock(&userspace_mutex);
 	if (!per_cpu(cpu_is_managed, policy->cpu))
@@ -134,7 +131,7 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_max_freq, cpu) = policy->max;
 		per_cpu(cpu_cur_freq, cpu) = policy->cur;
 		per_cpu(cpu_set_freq, cpu) = policy->cur;
-		dprintk("managing cpu %u started "
+		pr_debug("managing cpu %u started "
 			"(%u - %u kHz, currently %u kHz)\n",
 				cpu,
 				per_cpu(cpu_min_freq, cpu),
@@ -156,12 +153,12 @@ static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 		per_cpu(cpu_min_freq, cpu) = 0;
 		per_cpu(cpu_max_freq, cpu) = 0;
 		per_cpu(cpu_set_freq, cpu) = 0;
-		dprintk("managing cpu %u stopped\n", cpu);
+		pr_debug("managing cpu %u stopped\n", cpu);
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_LIMITS:
 		mutex_lock(&userspace_mutex);
-		dprintk("limit event for cpu %u: %u - %u kHz, "
+		pr_debug("limit event for cpu %u: %u - %u kHz, "
 			"currently %u kHz, last set to %u kHz\n",
 			cpu, policy->min, policy->max,
 			per_cpu(cpu_cur_freq, cpu),
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 05432216e224..90431cb92804 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -14,9 +14,6 @@
 #include <linux/init.h>
 #include <linux/cpufreq.h>
 
-#define dprintk(msg...) \
-	cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg)
-
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/
@@ -31,11 +28,11 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
 		unsigned int freq = table[i].frequency;
 		if (freq == CPUFREQ_ENTRY_INVALID) {
-			dprintk("table entry %u is invalid, skipping\n", i);
+			pr_debug("table entry %u is invalid, skipping\n", i);
 
 			continue;
 		}
-		dprintk("table entry %u: %u kHz, %u index\n",
+		pr_debug("table entry %u: %u kHz, %u index\n",
 					i, freq, table[i].index);
 		if (freq < min_freq)
 			min_freq = freq;
@@ -61,7 +58,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	unsigned int i;
 	unsigned int count = 0;
 
-	dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n",
+	pr_debug("request for verification of policy (%u - %u kHz) for cpu %u\n",
 					policy->min, policy->max, policy->cpu);
 
 	if (!cpu_online(policy->cpu))
@@ -86,7 +83,7 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
 				     policy->cpuinfo.max_freq);
 
-	dprintk("verification lead to (%u - %u kHz) for cpu %u\n",
+	pr_debug("verification lead to (%u - %u kHz) for cpu %u\n",
 				policy->min, policy->max, policy->cpu);
 
 	return 0;
@@ -110,7 +107,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	};
 	unsigned int i;
 
-	dprintk("request for target %u kHz (relation: %u) for cpu %u\n",
+	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
 
 	switch (relation) {
@@ -167,7 +164,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 	} else
 		*index = optimal.index;
 
-	dprintk("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
+	pr_debug("target is %u (%u kHz, %u)\n", *index, table[*index].frequency,
 		table[*index].index);
 
 	return 0;
@@ -216,14 +213,14 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu)
 {
-	dprintk("setting show_table for cpu %u to %p\n", cpu, table);
+	pr_debug("setting show_table for cpu %u to %p\n", cpu, table);
 	per_cpu(cpufreq_show_table, cpu) = table;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu)
 {
-	dprintk("clearing show_table for cpu %u\n", cpu);
+	pr_debug("clearing show_table for cpu %u\n", cpu);
 	per_cpu(cpufreq_show_table, cpu) = NULL;
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9343dd3de858..2845f6e67221 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -397,23 +397,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 
 
-/*********************************************************************
- *                     UNIFIED DEBUG HELPERS                         *
- *********************************************************************/
-
-#define CPUFREQ_DEBUG_CORE	1
-#define CPUFREQ_DEBUG_DRIVER	2
-#define CPUFREQ_DEBUG_GOVERNOR	4
-
-#ifdef CONFIG_CPU_FREQ_DEBUG
-
-extern void cpufreq_debug_printk(unsigned int type, const char *prefix, 
-				 const char *fmt, ...);
-
-#else
-
-#define cpufreq_debug_printk(msg...) do { } while(0)
-
-#endif /* CONFIG_CPU_FREQ_DEBUG */
-
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 9cddf15f18865c4f7124d829be41799f59aaa5cf Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 4 May 2011 11:06:05 -0700
Subject: x86/net: only select BPF_JIT when NET is enabled

Fix kconfig unmet dependency warning: HAVE_BPF_JIT depends on NET, so
make the "select" of it depend on NET also.

warning: (X86) selects HAVE_BPF_JIT which has unmet direct dependencies (NET)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 855a1bdc437d..2096cf180648 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,7 +72,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select ARCH_NO_SYSDEV_OPS
-	select HAVE_BPF_JIT if X86_64
+	select HAVE_BPF_JIT if (X86_64 && NET)
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
-- 
cgit v1.2.3


From 228e548e602061b08ee8e8966f567c12aa079682 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 2 May 2011 20:21:35 +0000
Subject: net: Add sendmmsg socket system call

This patch adds a multiple message send syscall and is the send
version of the existing recvmmsg syscall. This is heavily
based on the patch by Arnaldo that added recvmmsg.

I wrote a microbenchmark to test the performance gains of using
this new syscall:

http://ozlabs.org/~anton/junkcode/sendmmsg_test.c

The test was run on a ppc64 box with a 10 Gbit network card. The
benchmark can send both UDP and RAW ethernet packets.

64B UDP

batch   pkts/sec
1       804570
2       872800 (+ 8 %)
4       916556 (+14 %)
8       939712 (+17 %)
16      952688 (+18 %)
32      956448 (+19 %)
64      964800 (+20 %)

64B raw socket

batch   pkts/sec
1       1201449
2       1350028 (+12 %)
4       1461416 (+22 %)
8       1513080 (+26 %)
16      1541216 (+28 %)
32      1553440 (+29 %)
64      1557888 (+30 %)

We see a 20% improvement in throughput on UDP send and 30%
on raw socket send.

[ Add sparc syscall entries. -DaveM ]

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/include/asm/systbl.h  |   1 +
 arch/powerpc/include/asm/unistd.h  |   3 +-
 arch/sparc/include/asm/unistd.h    |   3 +-
 arch/sparc/kernel/systbls_32.S     |   2 +-
 arch/sparc/kernel/systbls_64.S     |   4 +-
 arch/x86/ia32/ia32entry.S          |   1 +
 arch/x86/include/asm/unistd_32.h   |   3 +-
 arch/x86/include/asm/unistd_64.h   |   2 +
 arch/x86/kernel/syscall_table_32.S |   1 +
 include/linux/net.h                |   1 +
 include/linux/socket.h             |   2 +
 include/linux/syscalls.h           |   2 +
 include/net/compat.h               |   2 +
 kernel/sys_ni.c                    |   2 +
 net/compat.c                       |  16 ++-
 net/socket.c                       | 199 +++++++++++++++++++++++++++++--------
 16 files changed, 192 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 60f64b132bd4..8489d372077f 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
 COMPAT_SYS_SPU(open_by_handle_at)
 COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
+COMPAT_SYS_SPU(sendmmsg)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3c215648ce6d..6d23c8193caa 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -371,10 +371,11 @@
 #define __NR_open_by_handle_at	346
 #define __NR_clock_adjtime	347
 #define __NR_syncfs		348
+#define __NR_sendmmsg		349
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		349
+#define __NR_syscalls		350
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 9d897b6db983..c5387ed0add8 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -404,8 +404,9 @@
 #define __NR_open_by_handle_at	333
 #define __NR_clock_adjtime	334
 #define __NR_syncfs		335
+#define __NR_sendmmsg		336
 
-#define NR_syscalls		336
+#define NR_syscalls		337
 
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 47ac73c32e88..332c83ff7701 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -84,4 +84,4 @@ sys_call_table:
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 /*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-/*335*/	.long sys_syncfs
+/*335*/	.long sys_syncfs, sys_sendmmsg
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4f3170c1ef47..43887ca0be0e 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,7 @@ sys_call_table32:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
 	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, compat_sys_sendmmsg
 
 #endif /* CONFIG_COMPAT */
 
@@ -162,4 +162,4 @@ sys_call_table:
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
 	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-	.word sys_syncfs
+	.word sys_syncfs, sys_sendmmsg
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 849a9d23c71d..95f5826be458 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -848,4 +848,5 @@ ia32_sys_call_table:
 	.quad compat_sys_open_by_handle_at
 	.quad compat_sys_clock_adjtime
 	.quad sys_syncfs
+	.quad compat_sys_sendmmsg	/* 345 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index a755ef5e5977..fb6a625c99bf 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -350,10 +350,11 @@
 #define __NR_open_by_handle_at  342
 #define __NR_clock_adjtime	343
 #define __NR_syncfs             344
+#define __NR_sendmmsg		345
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 345
+#define NR_syscalls 346
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 160fa76bd578..79f90eb15aad 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
 #define __NR_syncfs                             306
 __SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_sendmmsg				307
+__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index abce34d5c79d..32cbffb0c494 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
 	.long sys_syncfs
+	.long sys_sendmmsg		/* 345 */
diff --git a/include/linux/net.h b/include/linux/net.h
index 94de83c0f877..1da55e9b6f01 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,6 +42,7 @@
 #define SYS_RECVMSG	17		/* sys_recvmsg(2)		*/
 #define SYS_ACCEPT4	18		/* sys_accept4(2)		*/
 #define SYS_RECVMMSG	19		/* sys_recvmmsg(2)		*/
+#define SYS_SENDMMSG	20		/* sys_sendmmsg(2)		*/
 
 typedef enum {
 	SS_FREE = 0,			/* not allocated		*/
diff --git a/include/linux/socket.h b/include/linux/socket.h
index d2b5e982f079..4ef98e422fde 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -333,5 +333,7 @@ struct timespec;
 
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			  unsigned int flags, struct timespec *timeout);
+extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
+			  unsigned int vlen, unsigned int flags);
 #endif /* not kernel and not glibc */
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83ecc1749ef6..ab71447d0c5a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned);
 asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int);
 asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
+			     unsigned int vlen, unsigned flags);
 asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
 asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int __user *);
diff --git a/include/net/compat.h b/include/net/compat.h
index 28d5428ec6a2..9ee75edcc295 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
 extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
 extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
+extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *,
+					   unsigned, unsigned);
 extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
 extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
 					   unsigned, unsigned,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 25cc41cd8f33..97e966f171c6 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt);
 cond_syscall(compat_sys_getsockopt);
 cond_syscall(sys_shutdown);
 cond_syscall(sys_sendmsg);
+cond_syscall(sys_sendmmsg);
 cond_syscall(compat_sys_sendmsg);
+cond_syscall(compat_sys_sendmmsg);
 cond_syscall(sys_recvmsg);
 cond_syscall(sys_recvmmsg);
 cond_syscall(compat_sys_recvmsg);
diff --git a/net/compat.c b/net/compat.c
index 3649d5895361..c578d9382e19 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20] = {
+static unsigned char nas[21] = {
 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
-	AL(4), AL(5)
+	AL(4), AL(5), AL(4)
 };
 #undef AL
 
@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags)
+{
+	return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+			      flags | MSG_CMSG_COMPAT);
+}
+
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
 {
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	u32 a[6];
 	u32 a0, a1;
 
-	if (call < SYS_SOCKET || call > SYS_RECVMMSG)
+	if (call < SYS_SOCKET || call > SYS_SENDMMSG)
 		return -EINVAL;
 	if (copy_from_user(a, args, nas[call]))
 		return -EFAULT;
@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	case SYS_SENDMSG:
 		ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
 		break;
+	case SYS_SENDMMSG:
+		ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
+		break;
 	case SYS_RECVMSG:
 		ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
 		break;
diff --git a/net/socket.c b/net/socket.c
index d25f5a9d6fa2..ed50255143d5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
 
-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
-				 struct msghdr *msg, size_t size)
+static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
+				       struct msghdr *msg, size_t size)
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
-	int err;
 
 	sock_update_classid(sock->sk);
 
@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	si->msg = msg;
 	si->size = size;
 
-	err = security_socket_sendmsg(sock, msg, size);
-	if (err)
-		return err;
-
 	return sock->ops->sendmsg(iocb, sock, msg, size);
 }
 
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+				 struct msghdr *msg, size_t size)
+{
+	int err = security_socket_sendmsg(sock, msg, size);
+
+	return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
+}
+
 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct kiocb iocb;
@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
+int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
 #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
 #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
 
-/*
- *	BSD sendmsg interface
- */
-
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
+static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
 	    (struct compat_msghdr __user *)msg;
-	struct socket *sock;
 	struct sockaddr_storage address;
 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
 	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
-	struct msghdr msg_sys;
 	int err, ctl_len, iov_size, total_len;
-	int fput_needed;
 
 	err = -EFAULT;
 	if (MSG_CMSG_COMPAT & flags) {
-		if (get_compat_msghdr(&msg_sys, msg_compat))
+		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
-	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if (!sock)
-		goto out;
-
 	/* do not move before msg_sys is valid */
 	err = -EMSGSIZE;
-	if (msg_sys.msg_iovlen > UIO_MAXIOV)
-		goto out_put;
+	if (msg_sys->msg_iovlen > UIO_MAXIOV)
+		goto out;
 
 	/* Check whether to allocate the iovec area */
 	err = -ENOMEM;
-	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
-	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+	iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
+	if (msg_sys->msg_iovlen > UIO_FASTIOV) {
 		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
 		if (!iov)
-			goto out_put;
+			goto out;
 	}
 
 	/* This will also move the address data into kernel space */
 	if (MSG_CMSG_COMPAT & flags) {
-		err = verify_compat_iovec(&msg_sys, iov,
+		err = verify_compat_iovec(msg_sys, iov,
 					  (struct sockaddr *)&address,
 					  VERIFY_READ);
 	} else
-		err = verify_iovec(&msg_sys, iov,
+		err = verify_iovec(msg_sys, iov,
 				   (struct sockaddr *)&address,
 				   VERIFY_READ);
 	if (err < 0)
@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 
 	err = -ENOBUFS;
 
-	if (msg_sys.msg_controllen > INT_MAX)
+	if (msg_sys->msg_controllen > INT_MAX)
 		goto out_freeiov;
-	ctl_len = msg_sys.msg_controllen;
+	ctl_len = msg_sys->msg_controllen;
 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
 		err =
-		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
 						     sizeof(ctl));
 		if (err)
 			goto out_freeiov;
-		ctl_buf = msg_sys.msg_control;
-		ctl_len = msg_sys.msg_controllen;
+		ctl_buf = msg_sys->msg_control;
+		ctl_len = msg_sys->msg_controllen;
 	} else if (ctl_len) {
 		if (ctl_len > sizeof(ctl)) {
 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 		}
 		err = -EFAULT;
 		/*
-		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
+		 * Careful! Before this, msg_sys->msg_control contains a user pointer.
 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
 		 * checking falls down on this.
 		 */
 		if (copy_from_user(ctl_buf,
-				   (void __user __force *)msg_sys.msg_control,
+				   (void __user __force *)msg_sys->msg_control,
 				   ctl_len))
 			goto out_freectl;
-		msg_sys.msg_control = ctl_buf;
+		msg_sys->msg_control = ctl_buf;
 	}
-	msg_sys.msg_flags = flags;
+	msg_sys->msg_flags = flags;
 
 	if (sock->file->f_flags & O_NONBLOCK)
-		msg_sys.msg_flags |= MSG_DONTWAIT;
-	err = sock_sendmsg(sock, &msg_sys, total_len);
+		msg_sys->msg_flags |= MSG_DONTWAIT;
+	err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
+							  total_len);
 
 out_freectl:
 	if (ctl_buf != ctl)
@@ -1963,12 +1971,114 @@ out_freectl:
 out_freeiov:
 	if (iov != iovstack)
 		sock_kfree_s(sock->sk, iov, iov_size);
-out_put:
+out:
+	return err;
+}
+
+/*
+ *	BSD sendmsg interface
+ */
+
+SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
+{
+	int fput_needed, err;
+	struct msghdr msg_sys;
+	struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
+
+	if (!sock)
+		goto out;
+
+	err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
+
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
 }
 
+/*
+ *	Linux sendmmsg interface
+ */
+
+int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
+		   unsigned int flags)
+{
+	int fput_needed, err, datagrams;
+	struct socket *sock;
+	struct mmsghdr __user *entry;
+	struct compat_mmsghdr __user *compat_entry;
+	struct msghdr msg_sys;
+
+	datagrams = 0;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		return err;
+
+	err = sock_error(sock->sk);
+	if (err)
+		goto out_put;
+
+	entry = mmsg;
+	compat_entry = (struct compat_mmsghdr __user *)mmsg;
+
+	while (datagrams < vlen) {
+		/*
+		 * No need to ask LSM for more than the first datagram.
+		 */
+		if (MSG_CMSG_COMPAT & flags) {
+			err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+					    &msg_sys, flags, datagrams);
+			if (err < 0)
+				break;
+			err = __put_user(err, &compat_entry->msg_len);
+			++compat_entry;
+		} else {
+			err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
+					    &msg_sys, flags, datagrams);
+			if (err < 0)
+				break;
+			err = put_user(err, &entry->msg_len);
+			++entry;
+		}
+
+		if (err)
+			break;
+		++datagrams;
+	}
+
+out_put:
+	fput_light(sock->file, fput_needed);
+
+	if (err == 0)
+		return datagrams;
+
+	if (datagrams != 0) {
+		/*
+		 * We may send less entries than requested (vlen) if the
+		 * sock is non blocking...
+		 */
+		if (err != -EAGAIN) {
+			/*
+			 * ... or if sendmsg returns an error after we
+			 * send some datagrams, where we record the
+			 * error to return on the next call or if the
+			 * app asks about it using getsockopt(SO_ERROR).
+			 */
+			sock->sk->sk_err = -err;
+		}
+
+		return datagrams;
+	}
+
+	return err;
+}
+
+SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
+		unsigned int, vlen, unsigned int, flags)
+{
+	return __sys_sendmmsg(fd, mmsg, vlen, flags);
+}
+
 static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned flags, int nosec)
 {
@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 #ifdef __ARCH_WANT_SYS_SOCKETCALL
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
-static const unsigned char nargs[20] = {
+static const unsigned char nargs[21] = {
 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
-	AL(4), AL(5)
+	AL(4), AL(5), AL(4)
 };
 
 #undef AL
@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	int err;
 	unsigned int len;
 
-	if (call < 1 || call > SYS_RECVMMSG)
+	if (call < 1 || call > SYS_SENDMMSG)
 		return -EINVAL;
 
 	len = nargs[call];
@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 	case SYS_SENDMSG:
 		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
+	case SYS_SENDMMSG:
+		err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
+		break;
 	case SYS_RECVMSG:
 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
 		break;
-- 
cgit v1.2.3


From 79af2187fa27442e89437d8ee637578cdb1a036c Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 6 May 2011 10:39:08 +1000
Subject: powerpc: Fix compile with icwsx support

Due to a collision between NO_CONTEXT->MMU_NO_CONTEXT change and
Anton's patch.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/mmu_context_hash64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index c517815404c4..3bafc3deca6d 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -279,7 +279,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	if (!mm->context.cop_lockp) {
 		__destroy_context(index);
 		subpage_prot_free(mm);
-		mm->context.id = NO_CONTEXT;
+		mm->context.id = MMU_NO_CONTEXT;
 		return -ENOMEM;
 	}
 	spin_lock_init(mm->context.cop_lockp);
-- 
cgit v1.2.3


From 4cb4638079a487627232ffee5b48ca19c127aed8 Mon Sep 17 00:00:00 2001
From: "Tseng-Hui (Frank) Lin" <thlin@linux.vnet.ibm.com>
Date: Tue, 3 May 2011 18:28:43 +0000
Subject: powerpc/pseries: Add RTAS event log v6 definition

This patch adds definitions of non-IBM specific v6 extended log
definitions to rtas.h.

Signed-off-by: Tseng-Hui (Frank) Lin <tsenglin@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/rtas.h | 45 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9a1193e30f26..58625d1e7802 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -158,7 +158,50 @@ struct rtas_error_log {
 	unsigned long target:4;			/* Target of failed operation */
 	unsigned long type:8;			/* General event or error*/
 	unsigned long extended_log_length:32;	/* length in bytes */
-	unsigned char buffer[1];
+	unsigned char buffer[1];		/* Start of extended log */
+						/* Variable length.      */
+};
+
+#define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG	14
+
+#define RTAS_V6EXT_COMPANY_ID_IBM	(('I' << 24) | ('B' << 16) | ('M' << 8))
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+	/* Byte 0 */
+	uint32_t log_valid:1;		/* 1:Log valid */
+	uint32_t unrecoverable_error:1;	/* 1:Unrecoverable error */
+	uint32_t recoverable_error:1;	/* 1:recoverable (correctable	*/
+					/*   or successfully retried)	*/
+	uint32_t degraded_operation:1;	/* 1:Unrecoverable err, bypassed*/
+					/*   - degraded operation (e.g.	*/
+					/*   CPU or mem taken off-line)	*/
+	uint32_t predictive_error:1;
+	uint32_t new_log:1;		/* 1:"New" log (Always 1 for	*/
+					/*   data returned from RTAS	*/
+	uint32_t big_endian:1;		/* 1: Big endian */
+	uint32_t :1;			/* reserved */
+	/* Byte 1 */
+	uint32_t :8;			/* reserved */
+	/* Byte 2 */
+	uint32_t powerpc_format:1;	/* Set to 1 (indicating log is	*/
+					/* in PowerPC format		*/
+	uint32_t :3;			/* reserved */
+	uint32_t log_format:4;		/* Log format indicator. Define	*/
+					/* format used for byte 12-2047	*/
+	/* Byte 3 */
+	uint32_t :8;			/* reserved */
+	/* Byte 4-11 */
+	uint8_t reserved[8];		/* reserved */
+	/* Byte 12-15 */
+	uint32_t company_id;		/* Company ID of the company	*/
+					/* that defines the format for	*/
+					/* the vendor specific log type	*/
+	/* Byte 16-end of log */
+	uint8_t vendor_log[1];		/* Start of vendor specific log	*/
+					/* Variable length.		*/
 };
 
 /*
-- 
cgit v1.2.3


From 77eafe101a65b609b0693ee4eda381f60a4a5bab Mon Sep 17 00:00:00 2001
From: "Tseng-Hui (Frank) Lin" <thlin@linux.vnet.ibm.com>
Date: Thu, 5 May 2011 12:32:48 +0000
Subject: powerpc/pseries: Add support for IO event interrupts

This patch adds support for handling IO Event interrupts which come
through at the /event-sources/ibm,io-events device tree node.

The interrupts come through ibm,io-events device tree node are generated
by the firmware to report IO events. The firmware uses the same interrupt
to report multiple types of events for multiple devices. Each device may
have its own event handler. This patch implements a plateform interrupt
handler that is triggered by the IO event interrupts come through
ibm,io-events device tree node, pull in the IO events from RTAS and call
device event handlers registered in the notifier list.

Device event handlers are expected to use atomic_notifier_chain_register()
and atomic_notifier_chain_unregister() to register/unregister their
event handler in pseries_ioei_notifier_list list with IO event interrupt.
Device event handlers are responsible to identify if the event belongs
to the device event handler. The device event handle should return NOTIFY_OK
after the event is handled if the event belongs to the device event handler,
or NOTIFY_DONE otherwise.

Signed-off-by: Tseng-Hui (Frank) Lin <thlin@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io_event_irq.h       |  54 ++++++
 arch/powerpc/platforms/pseries/Kconfig        |  18 ++
 arch/powerpc/platforms/pseries/Makefile       |   1 +
 arch/powerpc/platforms/pseries/io_event_irq.c | 231 ++++++++++++++++++++++++++
 4 files changed, 304 insertions(+)
 create mode 100644 arch/powerpc/include/asm/io_event_irq.h
 create mode 100644 arch/powerpc/platforms/pseries/io_event_irq.c

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h
new file mode 100644
index 000000000000..b1a9a1be3c21
--- /dev/null
+++ b/arch/powerpc/include/asm/io_event_irq.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H
+#define _ASM_POWERPC_IO_EVENT_IRQ_H
+
+#include <linux/types.h>
+#include <linux/notifier.h>
+
+#define PSERIES_IOEI_RPC_MAX_LEN 216
+
+#define PSERIES_IOEI_TYPE_ERR_DETECTED		0x01
+#define PSERIES_IOEI_TYPE_ERR_RECOVERED		0x02
+#define PSERIES_IOEI_TYPE_EVENT			0x03
+#define PSERIES_IOEI_TYPE_RPC_PASS_THRU		0x04
+
+#define PSERIES_IOEI_SUBTYPE_NOT_APP		0x00
+#define PSERIES_IOEI_SUBTYPE_REBALANCE_REQ	0x01
+#define PSERIES_IOEI_SUBTYPE_NODE_ONLINE	0x03
+#define PSERIES_IOEI_SUBTYPE_NODE_OFFLINE	0x04
+#define PSERIES_IOEI_SUBTYPE_DUMP_SIZE_CHANGE	0x05
+#define PSERIES_IOEI_SUBTYPE_TORRENT_IRV_UPDATE	0x06
+#define PSERIES_IOEI_SUBTYPE_TORRENT_HFI_CFGED	0x07
+
+#define PSERIES_IOEI_SCOPE_NOT_APP		0x00
+#define PSERIES_IOEI_SCOPE_RIO_HUB		0x36
+#define PSERIES_IOEI_SCOPE_RIO_BRIDGE		0x37
+#define PSERIES_IOEI_SCOPE_PHB			0x38
+#define PSERIES_IOEI_SCOPE_EADS_GLOBAL		0x39
+#define PSERIES_IOEI_SCOPE_EADS_SLOT		0x3A
+#define PSERIES_IOEI_SCOPE_TORRENT_HUB		0x3B
+#define PSERIES_IOEI_SCOPE_SERVICE_PROC		0x51
+
+/* Platform Event Log Format, Version 6, data portition of IO event section */
+struct pseries_io_event {
+	uint8_t event_type;		/* 0x00 IO-Event Type		*/
+	uint8_t rpc_data_len;		/* 0x01 RPC data length		*/
+	uint8_t scope;			/* 0x02 Error/Event Scope	*/
+	uint8_t event_subtype;		/* 0x03 I/O-Event Sub-Type	*/
+	uint32_t drc_index;		/* 0x04 DRC Index		*/
+	uint8_t rpc_data[PSERIES_IOEI_RPC_MAX_LEN];
+					/* 0x08 RPC Data (0-216 bytes,	*/
+					/* padded to 4 bytes alignment)	*/
+};
+
+extern struct atomic_notifier_head pseries_ioei_notifier_list;
+
+#endif /* _ASM_POWERPC_IO_EVENT_IRQ_H */
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index b0449229836e..71af4c5d6c05 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -50,6 +50,24 @@ config SCANLOG
 	tristate "Scanlog dump interface"
 	depends on RTAS_PROC && PPC_PSERIES
 
+config IO_EVENT_IRQ
+	bool "IO Event Interrupt support"
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option, if you want to enable support for IO Event
+	  interrupts. IO event interrupt is a mechanism provided by RTAS
+	  to return information about hardware error and non-error events
+	  which may need OS attention. RTAS returns events for multiple
+	  event types and scopes. Device drivers can register their handlers
+	  to receive events.
+
+	  This option will only enable the IO event platform code. You
+	  will still need to enable or compile the actual drivers
+	  that use this infrastruture to handle IO event interrupts.
+
+	  Say Y if you are unsure.
+
 config LPARCFG
 	bool "LPAR Configuration Data"
 	depends on PPC_PSERIES || PPC_ISERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 4cfefbaccd5f..3556e402cbf5 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
 obj-$(CONFIG_PHYP_DUMP)		+= phyp_dump.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
+obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
 
 ifeq ($(CONFIG_PPC_PSERIES),y)
 obj-$(CONFIG_SUSPEND)		+= suspend.o
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 000000000000..c829e6067d54
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * 	p = (struct pseries_io_event_sect_data *) data;
+ * 	if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * 		:
+ * 		:
+ * 	return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * 	.notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR		(('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR		(('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC	(('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH	(('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS	(('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC	(('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR	(('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR		(('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID	(('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID	(('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID		(('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW		(('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT		(('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO	(('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME		(('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF		(('U' << 8) | 'D')
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_elog_section {
+	uint16_t id;			/* 0x00 2-byte ASCII section ID	*/
+	uint16_t length;		/* 0x02 Section length in bytes	*/
+	uint8_t version;		/* 0x04 Section version		*/
+	uint8_t subtype;		/* 0x05 Section subtype		*/
+	uint16_t creator_component;	/* 0x06 Creator component ID	*/
+	uint8_t data[];			/* 0x08 Start of section data	*/
+};
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find data portion of a specific section in RTAS extended event log.
+ * @elog: RTAS error/event log.
+ * @sect_id: secsion ID.
+ *
+ * Return:
+ *	pointer to the section data of the specified section
+ *	NULL if not found
+ */
+static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog,
+						       uint16_t sect_id)
+{
+	struct rtas_ext_event_log_v6 *xelog =
+		(struct rtas_ext_event_log_v6 *) elog->buffer;
+	struct pseries_elog_section *sect;
+	unsigned char *p, *log_end;
+
+	/* Check that we understand the format */
+	if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+	    xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+	    xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+		return NULL;
+
+	log_end = elog->buffer + elog->extended_log_length;
+	p = xelog->vendor_log;
+	while (p < log_end) {
+		sect = (struct pseries_elog_section *)p;
+		if (sect->id == sect_id)
+			return sect;
+		p += sect->length;
+	}
+	return NULL;
+}
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * 	pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+	struct pseries_elog_section *sect;
+
+	/* We should only ever get called for io-event interrupts, but if
+	 * we do get called for another type then something went wrong so
+	 * make some noise about it.
+	 * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+	 * No need to check event log version.
+	 */
+	if (unlikely(elog->type != RTAS_TYPE_IO)) {
+		printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d",
+			    elog->type);
+		return NULL;
+	}
+
+	sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+	if (unlikely(!sect)) {
+		printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+			    "log does not contain an IO Event section. "
+			    "Could be a bug in system firmware!\n");
+		return NULL;
+	}
+	return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ *   error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ *   multiple events through a single interrupt, it must ensure that the
+ *   interrupt remains asserted until check-exception has been used to
+ *   process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ *   sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ *   event type, and sub-type. There is no easy way to pre-sort clients
+ *   by scope or event type alone. For example, Torrent ISR route change
+ *   event is reported with scope 0x00 (Not Applicatable) rather than
+ *   0x3B (Torrent-hub). It is better to let the clients to identify
+ *   who owns the the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+	struct pseries_io_event *event;
+	int rtas_rc;
+
+	for (;;) {
+		rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+				    RTAS_VECTOR_EXTERNAL_INTERRUPT,
+				    virq_to_hw(irq),
+				    RTAS_IO_EVENTS, 1 /* Time Critical */,
+				    __pa(ioei_rtas_buf),
+				    RTAS_DATA_BUF_SIZE);
+		if (rtas_rc != 0)
+			break;
+
+		event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+		if (!event)
+			continue;
+
+		atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+					   0, event);
+	}
+	return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+	struct device_node *np;
+
+	ioei_check_exception_token = rtas_token("check-exception");
+	if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) {
+		pr_warning("IO Event IRQ not supported on this system !\n");
+		return -ENODEV;
+	}
+	np = of_find_node_by_path("/event-sources/ibm,io-events");
+	if (np) {
+		request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+		of_node_put(np);
+	} else {
+		pr_err("io_event_irq: No ibm,io-events on system! "
+		       "IO Event interrupt disabled.\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
-- 
cgit v1.2.3


From 40bd587a88fcd425f489f3d9f0be7daa84014141 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 3 May 2011 14:07:01 +0000
Subject: powerpc: Rename slb0_limit() to safe_stack_limit() and add Book3E
 support

slb0_limit() wasn't a very descriptive name. This changes it along with
a comment explaining what it's used for, and provides a 64-bit BookE
implementation.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu-book3e.h |  4 ++++
 arch/powerpc/kernel/setup_64.c        | 23 ++++++++++++++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index ec61e7b998c0..3ea0f9a259d8 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -245,6 +245,10 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
 extern int mmu_linear_psize;
 extern int mmu_vmemmap_psize;
 
+#ifdef CONFIG_PPC64
+extern unsigned long linear_map_top;
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 959c63cf62e4..c2ec0a12e14f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -434,17 +434,30 @@ void __init setup_system(void)
 	DBG(" <- setup_system()\n");
 }
 
-static u64 slb0_limit(void)
+/* This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause a TLB or SLB miss. This is
+ * used to allocate interrupt or emergency stacks for which our
+ * exception entry path doesn't deal with being interrupted.
+ */
+static u64 safe_stack_limit(void)
 {
-	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+#ifdef CONFIG_PPC_BOOK3E
+	/* Freescale BookE bolts the entire linear mapping */
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+		return linear_map_top;
+	/* Other BookE, we assume the first GB is bolted */
+	return 1ul << 30;
+#else
+	/* BookS, the first segment is bolted */
+	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 		return 1UL << SID_SHIFT_1T;
-	}
 	return 1UL << SID_SHIFT;
+#endif
 }
 
 static void __init irqstack_early_init(void)
 {
-	u64 limit = slb0_limit();
+	u64 limit = safe_stack_limit();
 	unsigned int i;
 
 	/*
@@ -497,7 +510,7 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(slb0_limit(), ppc64_rma_size);
+	limit = min(safe_stack_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
-- 
cgit v1.2.3


From 82578e192bb837b984ed5d8389245ea1fee09dd5 Mon Sep 17 00:00:00 2001
From: Richard A Lary <rlary@linux.vnet.ibm.com>
Date: Wed, 4 May 2011 12:57:18 +0000
Subject: powerpc/eeh: Display eeh error location for bus and device

  For adapters which have devices under a PCIe switch/bridge it is informative
  to display information for both the PCIe switch/bridge and the device on
  which the bus error was detected.

  rebased to powerpc-next

Signed-off-by: Richard A Lary <rlary@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/eeh_driver.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index b8d70f5d9aa9..1b6cb10589e0 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	struct pci_bus *frozen_bus;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-	const char *location, *pci_str, *drv_str;
+	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
 
 	frozen_dn = find_device_pe(event->dn);
 	if (!frozen_dn) {
@@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	frozen_pdn = PCI_DN(frozen_dn);
 	frozen_pdn->eeh_freeze_count++;
 
-	if (frozen_pdn->pcidev) {
-		pci_str = pci_name (frozen_pdn->pcidev);
-		drv_str = pcid_name (frozen_pdn->pcidev);
-	} else {
-		pci_str = eeh_pci_name(event->dev);
-		drv_str = pcid_name (event->dev);
-	}
+	pci_str = eeh_pci_name(event->dev);
+	drv_str = pcid_name(event->dev);
 	
 	if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto excess_failures;
@@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
 	printk(KERN_WARNING
 	   "EEH: This PCI device has failed %d times in the last hour:\n",
 		frozen_pdn->eeh_freeze_count);
+
+	if (frozen_pdn->pcidev) {
+		bus_pci_str = pci_name(frozen_pdn->pcidev);
+		bus_drv_str = pcid_name(frozen_pdn->pcidev);
+		printk(KERN_WARNING
+			"EEH: Bus location=%s driver=%s pci addr=%s\n",
+			location, bus_drv_str, bus_pci_str);
+	}
+
 	printk(KERN_WARNING
-		"EEH: location=%s driver=%s pci addr=%s\n",
+		"EEH: Device location=%s driver=%s pci addr=%s\n",
 		location, drv_str, pci_str);
 
 	/* Walk the various device drivers attached to this slot through
-- 
cgit v1.2.3


From a1d0d98daf6ce580d017a43b09fe30a375cde3e8 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Thu, 14 Apr 2011 22:32:06 +0000
Subject: powerpc: Add WSP platform

Add a platform for the Wire Speed Processor, based on the PPC A2.

This includes code for the ICS & OPB interrupt controllers, as well
as a SCOM backend, and SCOM based cpu bringup.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Jack Miller <jack@codezen.org>
Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/wsp.h        |  14 +
 arch/powerpc/kernel/exceptions-64e.S  |  23 ++
 arch/powerpc/platforms/Kconfig        |   1 +
 arch/powerpc/platforms/Makefile       |   1 +
 arch/powerpc/platforms/wsp/Kconfig    |  28 ++
 arch/powerpc/platforms/wsp/Makefile   |   6 +
 arch/powerpc/platforms/wsp/ics.c      | 712 ++++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/wsp/ics.h      |  20 +
 arch/powerpc/platforms/wsp/opb_pic.c  | 332 ++++++++++++++++
 arch/powerpc/platforms/wsp/psr2.c     |  95 +++++
 arch/powerpc/platforms/wsp/scom_smp.c | 427 ++++++++++++++++++++
 arch/powerpc/platforms/wsp/scom_wsp.c |  77 ++++
 arch/powerpc/platforms/wsp/setup.c    |  36 ++
 arch/powerpc/platforms/wsp/smp.c      |  87 +++++
 arch/powerpc/platforms/wsp/wsp.h      |  17 +
 arch/powerpc/sysdev/xics/icp-native.c |   1 +
 16 files changed, 1877 insertions(+)
 create mode 100644 arch/powerpc/include/asm/wsp.h
 create mode 100644 arch/powerpc/platforms/wsp/Kconfig
 create mode 100644 arch/powerpc/platforms/wsp/Makefile
 create mode 100644 arch/powerpc/platforms/wsp/ics.c
 create mode 100644 arch/powerpc/platforms/wsp/ics.h
 create mode 100644 arch/powerpc/platforms/wsp/opb_pic.c
 create mode 100644 arch/powerpc/platforms/wsp/psr2.c
 create mode 100644 arch/powerpc/platforms/wsp/scom_smp.c
 create mode 100644 arch/powerpc/platforms/wsp/scom_wsp.c
 create mode 100644 arch/powerpc/platforms/wsp/setup.c
 create mode 100644 arch/powerpc/platforms/wsp/smp.c
 create mode 100644 arch/powerpc/platforms/wsp/wsp.h

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h
new file mode 100644
index 000000000000..c7dc83088a33
--- /dev/null
+++ b/arch/powerpc/include/asm/wsp.h
@@ -0,0 +1,14 @@
+/*
+ *  Copyright 2011 Michael Ellerman, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#ifndef __ASM_POWERPC_WSP_H
+#define __ASM_POWERPC_WSP_H
+
+extern int wsp_get_chip_id(struct device_node *dn);
+
+#endif /* __ASM_POWERPC_WSP_H */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 23bd83b20be4..c98e9d260621 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -864,6 +864,20 @@ have_hes:
 	 * that will have to be made dependent on whether we are running under
 	 * a hypervisor I suppose.
 	 */
+
+	/* BEWARE, MAGIC
+	 * This code is called as an ordinary function on the boot CPU. But to
+	 * avoid duplication, this code is also used in SCOM bringup of
+	 * secondary CPUs. We read the code between the initial_tlb_code_start
+	 * and initial_tlb_code_end labels one instruction at a time and RAM it
+	 * into the new core via SCOM. That doesn't process branches, so there
+	 * must be none between those two labels. It also means if this code
+	 * ever takes any parameters, the SCOM code must also be updated to
+	 * provide them.
+	 */
+	.globl a2_tlbinit_code_start
+a2_tlbinit_code_start:
+
 	ori	r11,r3,MAS0_WQ_ALLWAYS
 	oris	r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
 	mtspr	SPRN_MAS0,r11
@@ -880,6 +894,9 @@ have_hes:
 	/* Write the TLB entry */
 	tlbwe
 
+	.globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
 	/* Now we branch the new virtual address mapped by this entry */
 	LOAD_REG_IMMEDIATE(r3,1f)
 	mtctr	r3
@@ -931,10 +948,16 @@ have_hes:
 	cmpw	r3,r9
 	blt	2b
 
+	.globl  a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
 	PPC_TLBILX(0,0,0)
 	sync
 	isync
 
+	.globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
 	/* We translate LR and return */
 	mflr	r3
 	tovirt(r3,r3)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f2352fc5cbbe..6059053e7158 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -20,6 +20,7 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig"
 source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/wsp/Kconfig"
 
 config KVM_GUEST
 	bool "KVM Guest support"
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index fdb9f0b0d7a8..73e2116cfeed 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
 obj-$(CONFIG_AMIGAONE)		+= amigaone/
+obj-$(CONFIG_PPC_WSP)		+= wsp/
diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig
new file mode 100644
index 000000000000..c3c48eb62cc1
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Kconfig
@@ -0,0 +1,28 @@
+config PPC_WSP
+	bool
+	default n
+
+menu "WSP platform selection"
+	depends on PPC_BOOK3E_64
+
+config PPC_PSR2
+	bool "PSR-2 platform"
+	select PPC_A2
+	select GENERIC_TBSYNC
+	select PPC_SCOM
+	select EPAPR_BOOT
+	select PPC_WSP
+	select PPC_XICS
+	select PPC_ICP_NATIVE
+	default y
+
+endmenu
+
+config PPC_A2_DD2
+	bool "Support for DD2 based A2/WSP systems"
+	depends on PPC_A2
+
+config WORKAROUND_ERRATUM_463
+	depends on PPC_A2_DD2
+	bool "Workaround erratum 463"
+	default y
diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile
new file mode 100644
index 000000000000..095be73d6cd4
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/Makefile
@@ -0,0 +1,6 @@
+ccflags-y			+= -mno-minimal-toc
+
+obj-y				+= setup.o ics.o
+obj-$(CONFIG_PPC_PSR2)		+= psr2.o opb_pic.o
+obj-$(CONFIG_PPC_WSP)		+= scom_wsp.o
+obj-$(CONFIG_SMP)		+= smp.o scom_smp.o
diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c
new file mode 100644
index 000000000000..e53bd9e7b125
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.c
@@ -0,0 +1,712 @@
+/*
+ * Copyright 2008-2011 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/xics.h>
+
+#include "wsp.h"
+#include "ics.h"
+
+
+/* WSP ICS */
+
+struct wsp_ics {
+	struct ics ics;
+	struct device_node *dn;
+	void __iomem *regs;
+	spinlock_t lock;
+	unsigned long *bitmap;
+	u32 chip_id;
+	u32 lsi_base;
+	u32 lsi_count;
+	u64 hwirq_start;
+	u64 count;
+#ifdef CONFIG_SMP
+	int *hwirq_cpu_map;
+#endif
+};
+
+#define to_wsp_ics(ics)	container_of(ics, struct wsp_ics, ics)
+
+#define INT_SRC_LAYER_BUID_REG(base)	((base) + 0x00)
+#define IODA_TBL_ADDR_REG(base)		((base) + 0x18)
+#define IODA_TBL_DATA_REG(base)		((base) + 0x20)
+#define XIVE_UPDATE_REG(base)		((base) + 0x28)
+#define ICS_INT_CAPS_REG(base)		((base) + 0x30)
+
+#define TBL_AUTO_INCREMENT	((1UL << 63) | (1UL << 15))
+#define TBL_SELECT_XIST		(1UL << 48)
+#define TBL_SELECT_XIVT		(1UL << 49)
+
+#define IODA_IRQ(irq)		((irq) & (0x7FFULL))	/* HRM 5.1.3.4 */
+
+#define XIST_REQUIRED		0x8
+#define XIST_REJECTED		0x4
+#define XIST_PRESENTED		0x2
+#define XIST_PENDING		0x1
+
+#define XIVE_SERVER_SHIFT	42
+#define XIVE_SERVER_MASK	0xFFFFULL
+#define XIVE_PRIORITY_MASK	0xFFULL
+#define XIVE_PRIORITY_SHIFT	32
+#define XIVE_WRITE_ENABLE	(1ULL << 63)
+
+/*
+ * The docs refer to a 6 bit field called ChipID, which consists of a
+ * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero
+ * so we ignore it, and every where we use "chip id" in this code we
+ * mean the NodeID.
+ */
+#define WSP_ICS_CHIP_SHIFT		17
+
+
+static struct wsp_ics *ics_list;
+static int num_ics;
+
+/* ICS Source controller accessors */
+
+static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq)
+{
+	unsigned long flags;
+	u64 xive;
+
+	spin_lock_irqsave(&ics->lock, flags);
+	out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq));
+	xive = in_be64(IODA_TBL_DATA_REG(ics->regs));
+	spin_unlock_irqrestore(&ics->lock, flags);
+
+	return xive;
+}
+
+static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive)
+{
+	xive &= ~XIVE_ADDR_MASK;
+	xive |= (irq & XIVE_ADDR_MASK);
+	xive |= XIVE_WRITE_ENABLE;
+
+	out_be64(XIVE_UPDATE_REG(ics->regs), xive);
+}
+
+static u64 xive_set_server(u64 xive, unsigned int server)
+{
+	u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT);
+
+	xive &= mask;
+	xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT;
+
+	return xive;
+}
+
+static u64 xive_set_priority(u64 xive, unsigned int priority)
+{
+	u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT);
+
+	xive &= mask;
+	xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT;
+
+	return xive;
+}
+
+
+#ifdef CONFIG_SMP
+/* Find logical CPUs within mask on a given chip and store result in ret */
+void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret)
+{
+	int cpu, chip;
+	struct device_node *cpu_dn, *dn;
+	const u32 *prop;
+
+	cpumask_clear(ret);
+	for_each_cpu(cpu, mask) {
+		cpu_dn = of_get_cpu_node(cpu, NULL);
+		if (!cpu_dn)
+			continue;
+
+		prop = of_get_property(cpu_dn, "at-node", NULL);
+		if (!prop) {
+			of_node_put(cpu_dn);
+			continue;
+		}
+
+		dn = of_find_node_by_phandle(*prop);
+		of_node_put(cpu_dn);
+
+		chip = wsp_get_chip_id(dn);
+		if (chip == chip_id)
+			cpumask_set_cpu(cpu, ret);
+
+		of_node_put(dn);
+	}
+}
+
+/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+			   const cpumask_t *affinity)
+{
+	cpumask_var_t avail, newmask;
+	int ret = -ENOMEM, cpu, cpu_rover = 0, target;
+	int index = hwirq - ics->hwirq_start;
+	unsigned int nodeid;
+
+	BUG_ON(index < 0 || index >= ics->count);
+
+	if (!ics->hwirq_cpu_map)
+		return -ENOMEM;
+
+	if (!distribute_irqs) {
+		ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server;
+		return 0;
+	}
+
+	/* Allocate needed CPU masks */
+	if (!alloc_cpumask_var(&avail, GFP_KERNEL))
+		goto ret;
+	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+		goto freeavail;
+
+	/* Find PBus attached to the source of this IRQ */
+	nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */
+
+	/* Find CPUs that could handle this IRQ */
+	if (affinity)
+		cpumask_and(avail, cpu_online_mask, affinity);
+	else
+		cpumask_copy(avail, cpu_online_mask);
+
+	/* Narrow selection down to logical CPUs on the same chip */
+	cpus_on_chip(nodeid, avail, newmask);
+
+	/* Ensure we haven't narrowed it down to 0 */
+	if (unlikely(cpumask_empty(newmask))) {
+		if (unlikely(cpumask_empty(avail))) {
+			ret = -1;
+			goto out;
+		}
+		cpumask_copy(newmask, avail);
+	}
+
+	/* Choose a CPU out of those we narrowed it down to in round robin */
+	target = hwirq % cpumask_weight(newmask);
+	for_each_cpu(cpu, newmask) {
+		if (cpu_rover++ >= target) {
+			ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu);
+			ret = 0;
+			goto out;
+		}
+	}
+
+	/* Shouldn't happen */
+	WARN_ON(1);
+
+out:
+	free_cpumask_var(newmask);
+freeavail:
+	free_cpumask_var(avail);
+ret:
+	if (ret < 0) {
+		ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask);
+		pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n",
+			   hwirq, ics->hwirq_cpu_map[index]);
+	}
+	return ret;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics)
+{
+	int i;
+
+	ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL);
+	if (!ics->hwirq_cpu_map) {
+		pr_warning("Allocate hwirq_cpu_map failed, "
+			   "IRQ balancing disabled\n");
+		return;
+	}
+
+	for (i=0; i < ics->count; i++)
+		ics->hwirq_cpu_map[i] = xics_default_server;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+	int index = hwirq - ics->hwirq_start;
+
+	BUG_ON(index < 0 || index >= ics->count);
+
+	if (!ics->hwirq_cpu_map)
+		return xics_default_server;
+
+	return ics->hwirq_cpu_map[index];
+}
+#else /* !CONFIG_SMP */
+static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq,
+			   const cpumask_t *affinity)
+{
+	return 0;
+}
+
+static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq)
+{
+	return xics_default_server;
+}
+
+static void alloc_irq_map(struct wsp_ics *ics) { }
+#endif
+
+static void wsp_chip_unmask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics;
+	int server;
+	u64 xive;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	ics = d->chip_data;
+	if (WARN_ON(!ics))
+		return;
+
+	server = get_irq_server(ics, hw_irq);
+
+	xive = wsp_ics_get_xive(ics, hw_irq);
+	xive = xive_set_server(xive, server);
+	xive = xive_set_priority(xive, DEFAULT_PRIORITY);
+	wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static unsigned int wsp_chip_startup(struct irq_data *d)
+{
+	/* unmask it */
+	wsp_chip_unmask_irq(d);
+	return 0;
+}
+
+static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics)
+{
+	u64 xive;
+
+	if (hw_irq == XICS_IPI)
+		return;
+
+	if (WARN_ON(!ics))
+		return;
+	xive = wsp_ics_get_xive(ics, hw_irq);
+	xive = xive_set_server(xive, xics_default_server);
+	xive = xive_set_priority(xive, LOWEST_PRIORITY);
+	wsp_ics_set_xive(ics, hw_irq, xive);
+}
+
+static void wsp_chip_mask_irq(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics = d->chip_data;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return;
+
+	wsp_mask_real_irq(hw_irq, ics);
+}
+
+static int wsp_chip_set_affinity(struct irq_data *d,
+				 const struct cpumask *cpumask, bool force)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	struct wsp_ics *ics;
+	int ret;
+	u64 xive;
+
+	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
+		return -1;
+
+	ics = d->chip_data;
+	if (WARN_ON(!ics))
+		return -1;
+	xive = wsp_ics_get_xive(ics, hw_irq);
+
+	/*
+	 * For the moment only implement delivery to all cpus or one cpu.
+	 * Get current irq_server for the given irq
+	 */
+	ret = cache_hwirq_map(ics, d->irq, cpumask);
+	if (ret == -1) {
+		char cpulist[128];
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		pr_warning("%s: No online cpus in the mask %s for irq %d\n",
+			   __func__, cpulist, d->irq);
+		return -1;
+	} else if (ret == -ENOMEM) {
+		pr_warning("%s: Out of memory\n", __func__);
+		return -1;
+	}
+
+	xive = xive_set_server(xive, get_irq_server(ics, hw_irq));
+	wsp_ics_set_xive(ics, hw_irq, xive);
+
+	return 0;
+}
+
+static struct irq_chip wsp_irq_chip = {
+	.name = "WSP ICS",
+	.irq_startup		= wsp_chip_startup,
+	.irq_mask		= wsp_chip_mask_irq,
+	.irq_unmask		= wsp_chip_unmask_irq,
+	.irq_set_affinity	= wsp_chip_set_affinity
+};
+
+static int wsp_ics_host_match(struct ics *ics, struct device_node *dn)
+{
+	/* All ICSs in the system implement a global irq number space,
+	 * so match against them all. */
+	return of_device_is_compatible(dn, "ibm,ppc-xics");
+}
+
+static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq)
+{
+	if (hwirq >= wsp_ics->hwirq_start &&
+	    hwirq <  wsp_ics->hwirq_start + wsp_ics->count)
+		return 1;
+
+	return 0;
+}
+
+static int wsp_ics_map(struct ics *ics, unsigned int virq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+	unsigned int hw_irq = virq_to_hw(virq);
+	unsigned long flags;
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return -ENOENT;
+
+	irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq);
+
+	irq_set_chip_data(virq, wsp_ics);
+
+	spin_lock_irqsave(&wsp_ics->lock, flags);
+	bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0);
+	spin_unlock_irqrestore(&wsp_ics->lock, flags);
+
+	return 0;
+}
+
+static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return;
+
+	pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq);
+	wsp_mask_real_irq(hw_irq, wsp_ics);
+}
+
+static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq)
+{
+	struct wsp_ics *wsp_ics = to_wsp_ics(ics);
+
+	if (!wsp_ics_match_hwirq(wsp_ics, hw_irq))
+		return -ENOENT;
+
+	return get_irq_server(wsp_ics, hw_irq);
+}
+
+/* HW Number allocation API */
+
+static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn)
+{
+	struct device_node *iparent;
+	int i;
+
+	iparent = of_irq_find_parent(dn);
+	if (!iparent) {
+		pr_err("wsp_ics: Failed to find interrupt parent!\n");
+		return NULL;
+	}
+
+	for(i = 0; i < num_ics; i++) {
+		if(ics_list[i].dn == iparent)
+			break;
+	}
+
+	if (i >= num_ics) {
+		pr_err("wsp_ics: Unable to find parent bitmap!\n");
+		return NULL;
+	}
+
+	return &ics_list[i];
+}
+
+int wsp_ics_alloc_irq(struct device_node *dn, int num)
+{
+	struct wsp_ics *ics;
+	int order, offset;
+
+	ics = wsp_ics_find_dn_ics(dn);
+	if (!ics)
+		return -ENODEV;
+
+	/* Fast, but overly strict if num isn't a power of two */
+	order = get_count_order(num);
+
+	spin_lock_irq(&ics->lock);
+	offset = bitmap_find_free_region(ics->bitmap, ics->count, order);
+	spin_unlock_irq(&ics->lock);
+
+	if (offset < 0)
+		return offset;
+
+	return offset + ics->hwirq_start;
+}
+
+void wsp_ics_free_irq(struct device_node *dn, unsigned int irq)
+{
+	struct wsp_ics *ics;
+
+	ics = wsp_ics_find_dn_ics(dn);
+	if (WARN_ON(!ics))
+		return;
+
+	spin_lock_irq(&ics->lock);
+	bitmap_release_region(ics->bitmap, irq, 0);
+	spin_unlock_irq(&ics->lock);
+}
+
+/* Initialisation */
+
+static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics,
+				      struct device_node *dn)
+{
+	int len, i, j, size;
+	u32 start, count;
+	const u32 *p;
+
+	size = BITS_TO_LONGS(ics->count) * sizeof(long);
+	ics->bitmap = kzalloc(size, GFP_KERNEL);
+	if (!ics->bitmap) {
+		pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&ics->lock);
+
+	p = of_get_property(dn, "available-ranges", &len);
+	if (!p || !len) {
+		/* FIXME this should be a WARN() once mambo is updated */
+		pr_err("wsp_ics: No available-ranges defined for %s\n",
+			dn->full_name);
+		return 0;
+	}
+
+	if (len % (2 * sizeof(u32)) != 0) {
+		/* FIXME this should be a WARN() once mambo is updated */
+		pr_err("wsp_ics: Invalid available-ranges for %s\n",
+			dn->full_name);
+		return 0;
+	}
+
+	bitmap_fill(ics->bitmap, ics->count);
+
+	for (i = 0; i < len / sizeof(u32); i += 2) {
+		start = of_read_number(p + i, 1);
+		count = of_read_number(p + i + 1, 1);
+
+		pr_devel("%s: start: %d count: %d\n", __func__, start, count);
+
+		if ((start + count) > (ics->hwirq_start + ics->count) ||
+		     start < ics->hwirq_start) {
+			pr_err("wsp_ics: Invalid range! -> %d to %d\n",
+					start, start + count);
+			break;
+		}
+
+		for (j = 0; j < count; j++)
+			bitmap_release_region(ics->bitmap,
+				(start + j) - ics->hwirq_start, 0);
+	}
+
+	/* Ensure LSIs are not available for allocation */
+	bitmap_allocate_region(ics->bitmap, ics->lsi_base,
+			       get_count_order(ics->lsi_count));
+
+	return 0;
+}
+
+static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn)
+{
+	u32 lsi_buid, msi_buid, msi_base, msi_count;
+	void __iomem *regs;
+	const u32 *p;
+	int rc, len, i;
+	u64 caps, buid;
+
+	p = of_get_property(dn, "interrupt-ranges", &len);
+	if (!p || len < (2 * sizeof(u32))) {
+		pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n",
+			dn->full_name);
+		return -ENOENT;
+	}
+
+	if (len > (2 * sizeof(u32))) {
+		pr_err("wsp_ics: Multiple ics ranges not supported.\n");
+		return -EINVAL;
+	}
+
+	regs = of_iomap(dn, 0);
+	if (!regs) {
+		pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name);
+		return -ENXIO;
+	}
+
+	ics->hwirq_start = of_read_number(p, 1);
+	ics->count = of_read_number(p + 1, 1);
+	ics->regs = regs;
+
+	ics->chip_id = wsp_get_chip_id(dn);
+	if (WARN_ON(ics->chip_id < 0))
+		ics->chip_id = 0;
+
+	/* Get some informations about the critter */
+	caps = in_be64(ICS_INT_CAPS_REG(ics->regs));
+	buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs));
+	ics->lsi_count = caps >> 56;
+	msi_count = (caps >> 44) & 0x7ff;
+
+	/* Note: LSI BUID is 9 bits, but really only 3 are BUID and the
+	 * rest is mixed in the interrupt number. We store the whole
+	 * thing though
+	 */
+	lsi_buid = (buid >> 48) & 0x1ff;
+	ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5;
+	msi_buid = (buid >> 37) & 0x7;
+	msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11;
+
+	pr_info("wsp_ics: Found %s\n", dn->full_name);
+	pr_info("wsp_ics:    irq range : 0x%06llx..0x%06llx\n",
+		ics->hwirq_start, ics->hwirq_start + ics->count - 1);
+	pr_info("wsp_ics:    %4d LSIs : 0x%06x..0x%06x\n",
+		ics->lsi_count, ics->lsi_base,
+		ics->lsi_base + ics->lsi_count - 1);
+	pr_info("wsp_ics:    %4d MSIs : 0x%06x..0x%06x\n",
+		msi_count, msi_base,
+		msi_base + msi_count - 1);
+
+	/* Let's check the HW config is sane */
+	if (ics->lsi_base < ics->hwirq_start ||
+	    (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count))
+		pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n");
+	if (msi_base < ics->hwirq_start ||
+	    (msi_base + msi_count) > (ics->hwirq_start + ics->count))
+		pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n");
+
+	/* We don't check for overlap between LSI and MSI, which will happen
+	 * if we use the same BUID, I'm not sure yet how legit that is.
+	 */
+
+	rc = wsp_ics_bitmap_setup(ics, dn);
+	if (rc) {
+		iounmap(regs);
+		return rc;
+	}
+
+	ics->dn = of_node_get(dn);
+	alloc_irq_map(ics);
+
+	for(i = 0; i < ics->count; i++)
+		wsp_mask_real_irq(ics->hwirq_start + i, ics);
+
+	ics->ics.map = wsp_ics_map;
+	ics->ics.mask_unknown = wsp_ics_mask_unknown;
+	ics->ics.get_server = wsp_ics_get_server;
+	ics->ics.host_match = wsp_ics_host_match;
+
+	xics_register_ics(&ics->ics);
+
+	return 0;
+}
+
+static void __init wsp_ics_set_default_server(void)
+{
+	struct device_node *np;
+	u32 hwid;
+
+	/* Find the server number for the boot cpu. */
+	np = of_get_cpu_node(boot_cpuid, NULL);
+	BUG_ON(!np);
+
+	hwid = get_hard_smp_processor_id(boot_cpuid);
+
+	pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name);
+	xics_default_server = hwid;
+
+	of_node_put(np);
+}
+
+static int __init wsp_ics_init(void)
+{
+	struct device_node *dn;
+	struct wsp_ics *ics;
+	int rc, found;
+
+	wsp_ics_set_default_server();
+
+	found = 0;
+	for_each_compatible_node(dn, NULL, "ibm,ppc-xics")
+		found++;
+
+	if (found == 0) {
+		pr_err("wsp_ics: No ICS's found!\n");
+		return -ENODEV;
+	}
+
+	ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL);
+	if (!ics_list) {
+		pr_err("wsp_ics: No memory for structs.\n");
+		return -ENOMEM;
+	}
+
+	num_ics = 0;
+	ics = ics_list;
+	for_each_compatible_node(dn, NULL, "ibm,wsp-xics") {
+		rc = wsp_ics_setup(ics, dn);
+		if (rc == 0) {
+			ics++;
+			num_ics++;
+		}
+	}
+
+	if (found != num_ics) {
+		pr_err("wsp_ics: Failed setting up %d ICS's\n",
+			found - num_ics);
+		return -1;
+	}
+
+	return 0;
+}
+
+void __init wsp_init_irq(void)
+{
+	wsp_ics_init();
+	xics_init();
+
+	/* We need to patch our irq chip's EOI to point to the right ICP */
+	wsp_irq_chip.irq_eoi = icp_ops->eoi;
+}
diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h
new file mode 100644
index 000000000000..e34d53102640
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/ics.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2009 IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ICS_H
+#define __ICS_H
+
+#define XIVE_ADDR_MASK		0x7FFULL
+
+extern void wsp_init_irq(void);
+
+extern int wsp_ics_alloc_irq(struct device_node *dn, int num);
+extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq);
+
+#endif /* __ICS_H */
diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c
new file mode 100644
index 000000000000..be05631a3c1c
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/opb_pic.c
@@ -0,0 +1,332 @@
+/*
+ * IBM Onboard Peripheral Bus Interrupt Controller
+ *
+ * Copyright 2010 Jack Miller, IBM Corporation.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+
+#include <asm/reg_a2.h>
+#include <asm/irq.h>
+
+#define OPB_NR_IRQS 32
+
+#define OPB_MLSASIER	0x04    /* MLS Accumulated Status IER */
+#define OPB_MLSIR	0x50	/* MLS Interrupt Register */
+#define OPB_MLSIER	0x54	/* MLS Interrupt Enable Register */
+#define OPB_MLSIPR	0x58	/* MLS Interrupt Polarity Register */
+#define OPB_MLSIIR	0x5c	/* MLS Interrupt Inputs Register */
+
+static int opb_index = 0;
+
+struct opb_pic {
+	struct irq_host *host;
+	void *regs;
+	int index;
+	spinlock_t lock;
+};
+
+static u32 opb_in(struct opb_pic *opb, int offset)
+{
+	return in_be32(opb->regs + offset);
+}
+
+static void opb_out(struct opb_pic *opb, int offset, u32 val)
+{
+	out_be32(opb->regs + offset, val);
+}
+
+static void opb_unmask_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 ier, bitset;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier | bitset);
+	ier = opb_in(opb, OPB_MLSIER);
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 ier, mask;
+
+	opb = d->chip_data;
+	mask = ~(1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier & mask);
+	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_ack_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 bitset;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	opb_out(opb, OPB_MLSIR, bitset);
+	opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static void opb_mask_ack_irq(struct irq_data *d)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	u32 bitset;
+	u32 ier, ir;
+
+	opb = d->chip_data;
+	bitset = (1 << (31 - irqd_to_hwirq(d)));
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ier = opb_in(opb, OPB_MLSIER);
+	opb_out(opb, OPB_MLSIER, ier & ~bitset);
+	ier = opb_in(opb, OPB_MLSIER); // Flush posted writes
+
+	opb_out(opb, OPB_MLSIR, bitset);
+	ir = opb_in(opb, OPB_MLSIR); // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+}
+
+static int opb_set_irq_type(struct irq_data *d, unsigned int flow)
+{
+	struct opb_pic *opb;
+	unsigned long flags;
+	int invert, ipr, mask, bit;
+
+	opb = d->chip_data;
+
+	/* The only information we're interested in in the type is whether it's
+	 * a high or low trigger. For high triggered interrupts, the polarity
+	 * set for it in the MLS Interrupt Polarity Register is 0, for low
+	 * interrupts it's 1 so that the proper input in the MLS Interrupt Input
+	 * Register is interrupted as asserting the interrupt. */
+
+	switch (flow) {
+		case IRQ_TYPE_NONE:
+			opb_mask_irq(d);
+			return 0;
+
+		case IRQ_TYPE_LEVEL_HIGH:
+			invert = 0;
+			break;
+
+		case IRQ_TYPE_LEVEL_LOW:
+			invert = 1;
+			break;
+
+		default:
+			return -EINVAL;
+	}
+
+	bit = (1 << (31 - irqd_to_hwirq(d)));
+	mask = ~bit;
+
+	spin_lock_irqsave(&opb->lock, flags);
+
+	ipr = opb_in(opb, OPB_MLSIPR);
+	ipr = (ipr & mask) | (invert ? bit : 0);
+	opb_out(opb, OPB_MLSIPR, ipr);
+	ipr = opb_in(opb, OPB_MLSIPR);  // Flush posted writes
+
+	spin_unlock_irqrestore(&opb->lock, flags);
+
+	/* Record the type in the interrupt descriptor */
+	irqd_set_trigger_type(d, flow);
+
+	return 0;
+}
+
+static struct irq_chip opb_irq_chip = {
+	.name		= "OPB",
+	.irq_mask	= opb_mask_irq,
+	.irq_unmask	= opb_unmask_irq,
+	.irq_mask_ack	= opb_mask_ack_irq,
+	.irq_ack	= opb_ack_irq,
+	.irq_set_type	= opb_set_irq_type
+};
+
+static int opb_host_map(struct irq_host *host, unsigned int virq,
+		irq_hw_number_t hwirq)
+{
+	struct opb_pic *opb;
+
+	opb = host->host_data;
+
+	/* Most of the important stuff is handled by the generic host code, like
+	 * the lookup, so just attach some info to the virtual irq */
+
+	irq_set_chip_data(virq, opb);
+	irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq);
+	irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+	return 0;
+}
+
+static int opb_host_xlate(struct irq_host *host, struct device_node *dn,
+		const u32 *intspec, unsigned int intsize,
+		irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+	/* Interrupt size must == 2 */
+	BUG_ON(intsize != 2);
+	*out_hwirq = intspec[0];
+	*out_type = intspec[1];
+	return 0;
+}
+
+static struct irq_host_ops opb_host_ops = {
+	.map = opb_host_map,
+	.xlate = opb_host_xlate,
+};
+
+irqreturn_t opb_irq_handler(int irq, void *private)
+{
+	struct opb_pic *opb;
+	u32 ir, src, subvirq;
+
+	opb = (struct opb_pic *) private;
+
+	/* Read the OPB MLS Interrupt Register for
+	 * asserted interrupts */
+	ir = opb_in(opb, OPB_MLSIR);
+	if (!ir)
+		return IRQ_NONE;
+
+	do {
+		/* Get 1 - 32 source, *NOT* bit */
+		src = 32 - ffs(ir);
+
+		/* Translate from the OPB's conception of interrupt number to
+		 * Linux's virtual IRQ */
+
+		subvirq = irq_linear_revmap(opb->host, src);
+
+		generic_handle_irq(subvirq);
+	} while ((ir = opb_in(opb, OPB_MLSIR)));
+
+	return IRQ_HANDLED;
+}
+
+struct opb_pic *opb_pic_init_one(struct device_node *dn)
+{
+	struct opb_pic *opb;
+	struct resource res;
+
+	if (of_address_to_resource(dn, 0, &res)) {
+		printk(KERN_ERR "opb: Couldn't translate resource\n");
+		return  NULL;
+	}
+
+	opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL);
+	if (!opb) {
+		printk(KERN_ERR "opb: Failed to allocate opb struct!\n");
+		return NULL;
+	}
+
+	/* Get access to the OPB MMIO registers */
+	opb->regs = ioremap(res.start + 0x10000, 0x1000);
+	if (!opb->regs) {
+		printk(KERN_ERR "opb: Failed to allocate register space!\n");
+		goto free_opb;
+	}
+
+	/* Allocate an irq host so that Linux knows that despite only
+	 * having one interrupt to issue, we're the controller for multiple
+	 * hardware IRQs, so later we can lookup their virtual IRQs. */
+
+	opb->host = irq_alloc_host(dn, IRQ_HOST_MAP_LINEAR,
+			OPB_NR_IRQS, &opb_host_ops, -1);
+
+	if (!opb->host) {
+		printk(KERN_ERR "opb: Failed to allocate IRQ host!\n");
+		goto free_regs;
+	}
+
+	opb->index = opb_index++;
+	spin_lock_init(&opb->lock);
+	opb->host->host_data = opb;
+
+	/* Disable all interrupts by default */
+	opb_out(opb, OPB_MLSASIER, 0);
+	opb_out(opb, OPB_MLSIER, 0);
+
+	/* ACK any interrupts left by FW */
+	opb_out(opb, OPB_MLSIR, 0xFFFFFFFF);
+
+	return opb;
+
+free_regs:
+	iounmap(opb->regs);
+free_opb:
+	kfree(opb);
+	return NULL;
+}
+
+void __init opb_pic_init(void)
+{
+	struct device_node *dn;
+	struct opb_pic *opb;
+	int virq;
+	int rc;
+
+	/* Call init_one for each OPB device */
+	for_each_compatible_node(dn, NULL, "ibm,opb") {
+
+		/* Fill in an OPB struct */
+		opb = opb_pic_init_one(dn);
+		if (!opb) {
+			printk(KERN_WARNING "opb: Failed to init node, skipped!\n");
+			continue;
+		}
+
+		/* Map / get opb's hardware virtual irq */
+		virq = irq_of_parse_and_map(dn, 0);
+		if (virq <= 0) {
+			printk("opb: irq_op_parse_and_map failed!\n");
+			continue;
+		}
+
+		/* Attach opb interrupt handler to new virtual IRQ */
+		rc = request_irq(virq, opb_irq_handler, 0, "OPB LS Cascade", opb);
+		if (rc) {
+			printk("opb: request_irq failed: %d\n", rc);
+			continue;
+		}
+
+		printk("OPB%d init with %d IRQs at %p\n", opb->index,
+				OPB_NR_IRQS, opb->regs);
+	}
+}
diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c
new file mode 100644
index 000000000000..40f28916ff6c
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/psr2.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2008-2011, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+
+static void psr2_spin(void)
+{
+	hard_irq_disable();
+	for (;;) ;
+}
+
+static void psr2_restart(char *cmd)
+{
+	psr2_spin();
+}
+
+static int psr2_probe_devices(void)
+{
+	struct device_node *np;
+
+	/* Our RTC is a ds1500. It seems to be programatically compatible
+	 * with the ds1511 for which we have a driver so let's use that
+	 */
+	np = of_find_compatible_node(NULL, NULL, "dallas,ds1500");
+	if (np != NULL) {
+		struct resource res;
+		if (of_address_to_resource(np, 0, &res) == 0)
+			platform_device_register_simple("ds1511", 0, &res, 1);
+	}
+	return 0;
+}
+machine_arch_initcall(psr2_md, psr2_probe_devices);
+
+static void __init psr2_setup_arch(void)
+{
+	/* init to some ~sane value until calibrate_delay() runs */
+	loops_per_jiffy = 50000000;
+
+	scom_init_wsp();
+
+	/* Setup SMP callback */
+#ifdef CONFIG_SMP
+	a2_setup_smp();
+#endif
+}
+
+static int __init psr2_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	if (!of_flat_dt_is_compatible(root, "ibm,psr2"))
+		return 0;
+
+	return 1;
+}
+
+static void __init psr2_init_irq(void)
+{
+	wsp_init_irq();
+	opb_pic_init();
+}
+
+define_machine(psr2_md) {
+	.name			= "PSR2 A2",
+	.probe			= psr2_probe,
+	.setup_arch		= psr2_setup_arch,
+	.restart		= psr2_restart,
+	.power_off		= psr2_spin,
+	.halt			= psr2_spin,
+	.calibrate_decr		= generic_calibrate_decr,
+	.init_IRQ		= psr2_init_irq,
+	.progress		= udbg_progress,
+	.power_save		= book3e_idle,
+};
diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c
new file mode 100644
index 000000000000..141e78032097
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_smp.c
@@ -0,0 +1,427 @@
+/*
+ * SCOM support for A2 platforms
+ *
+ * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson,
+ *		       Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+#define SCOM_RAMC		0x2a		/* Ram Command */
+#define SCOM_RAMC_TGT1_EXT	0x80000000
+#define SCOM_RAMC_SRC1_EXT	0x40000000
+#define SCOM_RAMC_SRC2_EXT	0x20000000
+#define SCOM_RAMC_SRC3_EXT	0x10000000
+#define SCOM_RAMC_ENABLE	0x00080000
+#define SCOM_RAMC_THREADSEL	0x00060000
+#define SCOM_RAMC_EXECUTE	0x00010000
+#define SCOM_RAMC_MSR_OVERRIDE	0x00008000
+#define SCOM_RAMC_MSR_PR	0x00004000
+#define SCOM_RAMC_MSR_GS	0x00002000
+#define SCOM_RAMC_FORCE		0x00001000
+#define SCOM_RAMC_FLUSH		0x00000800
+#define SCOM_RAMC_INTERRUPT	0x00000004
+#define SCOM_RAMC_ERROR		0x00000002
+#define SCOM_RAMC_DONE		0x00000001
+#define SCOM_RAMI		0x29		/* Ram Instruction */
+#define SCOM_RAMIC		0x28		/* Ram Instruction and Command */
+#define SCOM_RAMIC_INSN		0xffffffff00000000
+#define SCOM_RAMD		0x2d		/* Ram Data */
+#define SCOM_RAMDH		0x2e		/* Ram Data High */
+#define SCOM_RAMDL		0x2f		/* Ram Data Low */
+#define SCOM_PCCR0		0x33		/* PC Configuration Register 0 */
+#define SCOM_PCCR0_ENABLE_DEBUG	0x80000000
+#define SCOM_PCCR0_ENABLE_RAM	0x40000000
+#define SCOM_THRCTL		0x30		/* Thread Control and Status */
+#define SCOM_THRCTL_T0_STOP	0x80000000
+#define SCOM_THRCTL_T1_STOP	0x40000000
+#define SCOM_THRCTL_T2_STOP	0x20000000
+#define SCOM_THRCTL_T3_STOP	0x10000000
+#define SCOM_THRCTL_T0_STEP	0x08000000
+#define SCOM_THRCTL_T1_STEP	0x04000000
+#define SCOM_THRCTL_T2_STEP	0x02000000
+#define SCOM_THRCTL_T3_STEP	0x01000000
+#define SCOM_THRCTL_T0_RUN	0x00800000
+#define SCOM_THRCTL_T1_RUN	0x00400000
+#define SCOM_THRCTL_T2_RUN	0x00200000
+#define SCOM_THRCTL_T3_RUN	0x00100000
+#define SCOM_THRCTL_T0_PM	0x00080000
+#define SCOM_THRCTL_T1_PM	0x00040000
+#define SCOM_THRCTL_T2_PM	0x00020000
+#define SCOM_THRCTL_T3_PM	0x00010000
+#define SCOM_THRCTL_T0_UDE	0x00008000
+#define SCOM_THRCTL_T1_UDE	0x00004000
+#define SCOM_THRCTL_T2_UDE	0x00002000
+#define SCOM_THRCTL_T3_UDE	0x00001000
+#define SCOM_THRCTL_ASYNC_DIS	0x00000800
+#define SCOM_THRCTL_TB_DIS	0x00000400
+#define SCOM_THRCTL_DEC_DIS	0x00000200
+#define SCOM_THRCTL_AND		0x31		/* Thread Control and Status */
+#define SCOM_THRCTL_OR		0x32		/* Thread Control and Status */
+
+
+static DEFINE_PER_CPU(scom_map_t, scom_ptrs);
+
+static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread)
+{
+	scom_map_t scom = per_cpu(scom_ptrs, cpu);
+	int tcpu;
+
+	if (scom_map_ok(scom)) {
+		*first_thread = 0;
+		return scom;
+	}
+
+	*first_thread = 1;
+
+	scom = scom_map_device(np, 0);
+
+	for (tcpu = cpu_first_thread_sibling(cpu);
+	     tcpu <= cpu_last_thread_sibling(cpu); tcpu++)
+		per_cpu(scom_ptrs, tcpu) = scom;
+
+	/* Hack: for the boot core, this will actually get called on
+	 * the second thread up, not the first so our test above will
+	 * set first_thread incorrectly. */
+	if (cpu_first_thread_sibling(cpu) == 0)
+		*first_thread = 0;
+
+	return scom;
+}
+
+static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask)
+{
+	u64 cmd, mask, val;
+	int n = 0;
+
+	cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28)
+		| ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE;
+	mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR;
+
+	scom_write(scom, SCOM_RAMIC, cmd);
+
+	while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) {
+		pr_devel("Waiting on RAMC = 0x%llx\n", val);
+		if (++n == 3) {
+			pr_err("RAMC timeout on instruction 0x%08x, thread %d\n",
+			       insn, thread);
+			return -1;
+		}
+	}
+
+	if (val & SCOM_RAMC_INTERRUPT) {
+		pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n",
+		       insn, thread);
+		return -SCOM_RAMC_INTERRUPT;
+	}
+
+	if (val & SCOM_RAMC_ERROR) {
+		pr_err("RAMC error on instruction 0x%08x, thread %d\n",
+		       insn, thread);
+		return -SCOM_RAMC_ERROR;
+	}
+
+	return 0;
+}
+
+static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt,
+			  u64 *out_gpr)
+{
+	int rc;
+
+	/* or rN, rN, rN */
+	u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11);
+	rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0);
+	if (rc)
+		return rc;
+
+	*out_gpr = scom_read(scom, SCOM_RAMD);
+
+	return 0;
+}
+
+static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr)
+{
+	int rc, sprhi, sprlo;
+	u32 insn;
+
+	sprhi = spr >> 5;
+	sprlo = spr & 0x1f;
+	insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */
+
+	if (spr == 0x0ff0)
+		insn = 0x7c2000a6; /* mfmsr r1 */
+
+	rc = a2_scom_ram(scom, thread, insn, 0xf);
+	if (rc)
+		return rc;
+	return a2_scom_getgpr(scom, thread, 1, 1, out_spr);
+}
+
+static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr,
+			  int alt, u64 val)
+{
+	u32 lis = 0x3c000000 | (gpr << 21);
+	u32 li = 0x38000000 | (gpr << 21);
+	u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16);
+	u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16);
+	u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16);
+	u32 highest = val >> 48;
+	u32 higher = (val >> 32) & 0xffff;
+	u32 high = (val >> 16) & 0xffff;
+	u32 low = val & 0xffff;
+	int lext = alt ? 0x8 : 0x0;
+	int oext = alt ? 0xf : 0x0;
+	int rc = 0;
+
+	if (highest)
+		rc |= a2_scom_ram(scom, thread, lis | highest, lext);
+
+	if (higher) {
+		if (highest)
+			rc |= a2_scom_ram(scom, thread, oris | higher, oext);
+		else
+			rc |= a2_scom_ram(scom, thread, li | higher, lext);
+	}
+
+	if (highest || higher)
+		rc |= a2_scom_ram(scom, thread, rldicr32, oext);
+
+	if (high) {
+		if (highest || higher)
+			rc |= a2_scom_ram(scom, thread, oris | high, oext);
+		else
+			rc |= a2_scom_ram(scom, thread, lis | high, lext);
+	}
+
+	if (highest || higher || high)
+		rc |= a2_scom_ram(scom, thread, ori | low, oext);
+	else
+		rc |= a2_scom_ram(scom, thread, li | low, lext);
+
+	return rc;
+}
+
+static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val)
+{
+	int sprhi = spr >> 5;
+	int sprlo = spr & 0x1f;
+	/* mtspr spr, r1 */
+	u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11);
+
+	if (spr == 0x0ff0)
+		insn = 0x7c200124; /* mtmsr r1 */
+
+	if (a2_scom_setgpr(scom, thread, 1, 1, val))
+		return -1;
+
+	return a2_scom_ram(scom, thread, insn, 0xf);
+}
+
+static int a2_scom_initial_tlb(scom_map_t scom, int thread)
+{
+	extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[];
+	extern u32 a2_tlbinit_after_iprot_flush[];
+	extern u32 a2_tlbinit_after_linear_map[];
+	u32 assoc, entries, i;
+	u64 epn, tlbcfg;
+	u32 *p;
+	int rc;
+
+	/* Invalidate all entries (including iprot) */
+
+	rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg);
+	if (rc)
+		goto scom_fail;
+	entries = tlbcfg & TLBnCFG_N_ENTRY;
+	assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24;
+	epn = 0;
+
+	/* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */
+	a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531);
+	/* Set MMUCR3 to write all thids bit to the TLB */
+	a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f);
+
+	/* Set MAS1 for 1G page size, and MAS2 to our initial EPN */
+	a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB));
+	a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+	for (i = 0; i < entries; i++) {
+
+		a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc));
+
+		/* tlbwe */
+		rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0);
+		if (rc)
+			goto scom_fail;
+
+		/* Next entry is new address? */
+		if((i + 1) % assoc == 0) {
+			epn += (1 << 30);
+			a2_scom_setspr(scom, thread, SPRN_MAS2, epn);
+		}
+	}
+
+	/* Setup args for linear mapping */
+	rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0));
+	if (rc)
+		goto scom_fail;
+
+	/* Linear mapping */
+	for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) {
+		rc = a2_scom_ram(scom, thread, *p, 0);
+		if (rc)
+			goto scom_fail;
+	}
+
+	/*
+	 * For the boot thread, between the linear mapping and the debug
+	 * mappings there is a loop to flush iprot mappings. Ramming doesn't do
+	 * branches, but the secondary threads don't need to be nearly as smart
+	 * (i.e. we don't need to worry about invalidating the mapping we're
+	 * standing on).
+	 */
+
+	/* Debug mappings. Expects r11 = MAS0 from linear map (set above) */
+	for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) {
+		rc = a2_scom_ram(scom, thread, *p, 0);
+		if (rc)
+			goto scom_fail;
+	}
+
+scom_fail:
+	if (rc)
+		pr_err("Setting up initial TLB failed, err %d\n", rc);
+
+	if (rc == -SCOM_RAMC_INTERRUPT) {
+		/* Interrupt, dump some status */
+		int rc[10];
+		u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2;
+		rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar);
+		rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0);
+		rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1);
+		rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr);
+		rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0);
+		rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1);
+		rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2);
+		rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3);
+		rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8);
+		rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2);
+		pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]);
+		pr_err("    retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]);
+		pr_err("    retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]);
+		pr_err("    retreived ESR =0x%llx (err %d)\n", esr, rc[3]);
+		pr_err("    retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]);
+		pr_err("    retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]);
+		pr_err("    retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]);
+		pr_err("    retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]);
+		pr_err("    retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]);
+		pr_err("    retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]);
+	}
+
+	return rc;
+}
+
+int __devinit a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+				  struct device_node *np)
+{
+	u64 init_iar, init_msr, init_ccr2;
+	unsigned long start_here;
+	int rc, core_setup;
+	scom_map_t scom;
+	u64 pccr0;
+
+	scom = get_scom(lcpu, np, &core_setup);
+	if (!scom) {
+		printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu);
+		return -1;
+	}
+
+	pr_devel("Bringing up CPU%d using SCOM...\n", lcpu);
+
+	pccr0 = scom_read(scom, SCOM_PCCR0);
+	scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG |
+				     SCOM_PCCR0_ENABLE_RAM);
+
+	/* Stop the thead with THRCTL. If we are setting up the TLB we stop all
+	 * threads. We also disable asynchronous interrupts while RAMing.
+	 */
+	if (core_setup)
+		scom_write(scom, SCOM_THRCTL_OR,
+			      SCOM_THRCTL_T0_STOP |
+			      SCOM_THRCTL_T1_STOP |
+			      SCOM_THRCTL_T2_STOP |
+			      SCOM_THRCTL_T3_STOP |
+			      SCOM_THRCTL_ASYNC_DIS);
+	else
+		scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx);
+
+	/* Flush its pipeline just in case */
+	scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) |
+		      SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE);
+
+	a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar);
+	a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr);
+	a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2);
+
+	/* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */
+	rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM);
+	if (rc) {
+		pr_err("Failed to set MSR ! err %d\n", rc);
+		return rc;
+	}
+
+	/* RAM in an sync/isync for the sake of it */
+	a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0);
+	a2_scom_ram(scom, thr_idx, 0x4c00012c, 0);
+
+	if (core_setup) {
+		pr_devel("CPU%d is first thread in core, initializing TLB...\n",
+			 lcpu);
+		rc = a2_scom_initial_tlb(scom, thr_idx);
+		if (rc)
+			goto fail;
+	}
+
+	start_here = *(unsigned long *)(core_setup ? generic_secondary_smp_init
+					: generic_secondary_thread_init);
+	pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here);
+
+	rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here);
+	rc |= a2_scom_setgpr(scom, thr_idx, 3, 0,
+			     get_hard_smp_processor_id(lcpu));
+	/*
+	 * Tell book3e_secondary_core_init not to set up the TLB, we've
+	 * already done that.
+	 */
+	rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1);
+
+	rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx);
+
+	scom_write(scom, SCOM_RAMC, 0);
+	scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx));
+	scom_write(scom, SCOM_PCCR0, pccr0);
+fail:
+	pr_devel("  SCOM initialization %s\n", rc ? "failed" : "succeeded");
+	if (rc) {
+		pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n",
+		       init_iar, init_msr, init_ccr2);
+	}
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c
new file mode 100644
index 000000000000..4052e2259f30
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/scom_wsp.c
@@ -0,0 +1,77 @@
+/*
+ *  SCOM backend for WSP
+ *
+ *  Copyright 2010 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm/cputhreads.h>
+#include <asm/reg_a2.h>
+#include <asm/scom.h>
+#include <asm/udbg.h>
+
+#include "wsp.h"
+
+
+static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count)
+{
+	struct resource r;
+	u64 xscom_addr;
+
+	if (!of_get_property(dev, "scom-controller", NULL)) {
+		pr_err("%s: device %s is not a SCOM controller\n",
+			__func__, dev->full_name);
+		return SCOM_MAP_INVALID;
+	}
+
+	if (of_address_to_resource(dev, 0, &r)) {
+		pr_debug("Failed to find SCOM controller address\n");
+		return 0;
+	}
+
+	/* Transform the SCOM address into an XSCOM offset */
+	xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3);
+
+	return (scom_map_t)ioremap(r.start + xscom_addr, count << 3);
+}
+
+static void wsp_scom_unmap(scom_map_t map)
+{
+	iounmap((void *)map);
+}
+
+static u64 wsp_scom_read(scom_map_t map, u32 reg)
+{
+	u64 __iomem *addr = (u64 __iomem *)map;
+
+	return in_be64(addr + reg);
+}
+
+static void wsp_scom_write(scom_map_t map, u32 reg, u64 value)
+{
+	u64 __iomem *addr = (u64 __iomem *)map;
+
+	return out_be64(addr + reg, value);
+}
+
+static const struct scom_controller wsp_scom_controller = {
+	.map	= wsp_scom_map,
+	.unmap	= wsp_scom_unmap,
+	.read	= wsp_scom_read,
+	.write	= wsp_scom_write
+};
+
+void scom_init_wsp(void)
+{
+	scom_init(&wsp_scom_controller);
+}
diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c
new file mode 100644
index 000000000000..11ac2f05e01c
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/setup.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+
+#include "wsp.h"
+
+/*
+ * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property.
+ * Won't work for nodes that are not a descendant of a wsp node.
+ */
+int wsp_get_chip_id(struct device_node *dn)
+{
+	const u32 *p;
+	int rc;
+
+	/* Start looking at the specified node, not its parent */
+	dn = of_node_get(dn);
+	while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL)))
+		dn = of_get_next_parent(dn);
+
+	if (!dn)
+		return -1;
+
+	rc = *p;
+	of_node_put(dn);
+
+	return rc;
+}
diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
new file mode 100644
index 000000000000..c7b8db9ed9b3
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/smp.c
@@ -0,0 +1,87 @@
+/*
+ *  SMP Support for A2 platforms
+ *
+ *  Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/smp.h>
+
+#include <asm/dbell.h>
+#include <asm/machdep.h>
+#include <asm/xics.h>
+
+#include "ics.h"
+#include "wsp.h"
+
+static void __devinit smp_a2_setup_cpu(int cpu)
+{
+	doorbell_setup_this_cpu();
+
+	if (cpu != boot_cpuid)
+		xics_setup_cpu();
+}
+
+int __devinit smp_a2_kick_cpu(int nr)
+{
+	const char *enable_method;
+	struct device_node *np;
+	int thr_idx;
+
+	if (nr < 0 || nr >= NR_CPUS)
+		return -ENOENT;
+
+	np = of_get_cpu_node(nr, &thr_idx);
+	if (!np)
+		return -ENODEV;
+
+	enable_method = of_get_property(np, "enable-method", NULL);
+	pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method);
+
+	if (!enable_method) {
+                printk(KERN_ERR "CPU%d has no enable-method\n", nr);
+		return -ENOENT;
+	} else if (strcmp(enable_method, "ibm,a2-scom") == 0) {
+		if (a2_scom_startup_cpu(nr, thr_idx, np))
+			return -1;
+	} else {
+		printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n",
+                       nr, enable_method);
+		return -EINVAL;
+	}
+
+	/*
+	 * The processor is currently spinning, waiting for the
+	 * cpu_start field to become non-zero After we set cpu_start,
+	 * the processor will continue on to secondary_start
+	 */
+	paca[nr].cpu_start = 1;
+
+	return 0;
+}
+
+static int __init smp_a2_probe(void)
+{
+	return cpus_weight(cpu_possible_map);
+}
+
+static struct smp_ops_t a2_smp_ops = {
+	.message_pass	= doorbell_message_pass,
+	.probe		= smp_a2_probe,
+	.kick_cpu	= smp_a2_kick_cpu,
+	.setup_cpu	= smp_a2_setup_cpu,
+};
+
+void __init a2_setup_smp(void)
+{
+	smp_ops = &a2_smp_ops;
+}
diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h
new file mode 100644
index 000000000000..7c3e087fd2f2
--- /dev/null
+++ b/arch/powerpc/platforms/wsp/wsp.h
@@ -0,0 +1,17 @@
+#ifndef __WSP_H
+#define __WSP_H
+
+#include <asm/wsp.h>
+
+extern void wsp_setup_pci(void);
+extern void scom_init_wsp(void);
+
+extern void a2_setup_smp(void);
+extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx,
+			       struct device_node *np);
+int smp_a2_cpu_bootable(unsigned int nr);
+int __devinit smp_a2_kick_cpu(int nr);
+
+void opb_pic_init(void);
+
+#endif /*  __WSP_H */
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index d9e0515592c4..3508321c4501 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -7,6 +7,7 @@
  *  2 of the License, or (at your option) any later version.
  *
  */
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/irq.h>
-- 
cgit v1.2.3


From a0496d450ab8c17f6c4d86979b1f6ba486fe9365 Mon Sep 17 00:00:00 2001
From: Jack Miller <jack@codezen.org>
Date: Thu, 14 Apr 2011 22:32:08 +0000
Subject: powerpc: Add early debug for WSP platforms

Signed-off-by: Jack Miller <jack@codezen.org>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig.debug           |  5 ++++
 arch/powerpc/include/asm/reg_a2.h    |  9 +++++++
 arch/powerpc/include/asm/udbg.h      |  1 +
 arch/powerpc/kernel/exceptions-64e.S | 17 ++++++++++++
 arch/powerpc/kernel/udbg.c           |  2 ++
 arch/powerpc/kernel/udbg_16550.c     | 51 ++++++++++++++++++++++++++++++++++++
 6 files changed, 85 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 2d38a50e66ba..a597dd77b903 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -267,6 +267,11 @@ config PPC_EARLY_DEBUG_USBGECKO
 	  Select this to enable early debugging for Nintendo GameCube/Wii
 	  consoles via an external USB Gecko adapter.
 
+config PPC_EARLY_DEBUG_WSP
+	bool "Early debugging via WSP's internal UART"
+	depends on PPC_WSP
+	select PPC_UDBG_16550
+
 endchoice
 
 config PPC_EARLY_DEBUG_44x_PHYSLOW
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
index 3ba9c6f096fc..3d52a1132f3d 100644
--- a/arch/powerpc/include/asm/reg_a2.h
+++ b/arch/powerpc/include/asm/reg_a2.h
@@ -110,6 +110,15 @@
 #define TLB1_UR			ASM_CONST(0x0000000000000002)
 #define TLB1_SR			ASM_CONST(0x0000000000000001)
 
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+#define WSP_UART_PHYS	0xffc000c000
+/* This needs to be careful chosen to hit a !0 congruence class
+ * in the TLB since we bolt it in way 3, which is already occupied
+ * by our linear mapping primary bolted entry in CC 0.
+ */
+#define WSP_UART_VIRT	0xf000000000001000
+#endif
+
 /* A2 erativax attributes definitions */
 #define ERATIVAX_RS_IS_ALL		0x000
 #define ERATIVAX_RS_IS_TID		0x040
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 11ae699135ba..58580e94a2bb 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void);
 extern void __init udbg_init_40x_realmode(void);
 extern void __init udbg_init_cpm(void);
 extern void __init udbg_init_usbgecko(void);
+extern void __init udbg_init_wsp(void);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index c98e9d260621..4d0abb4930a1 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -17,6 +17,7 @@
 #include <asm/cputable.h>
 #include <asm/setup.h>
 #include <asm/thread_info.h>
+#include <asm/reg_a2.h>
 #include <asm/exception-64e.h>
 #include <asm/bug.h>
 #include <asm/irqflags.h>
@@ -951,6 +952,22 @@ a2_tlbinit_after_linear_map:
 	.globl  a2_tlbinit_after_iprot_flush
 a2_tlbinit_after_iprot_flush:
 
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+	/* Now establish early debug mappings if applicable */
+	/* Restore the MAS0 we used for linear mapping load */
+	mtspr	SPRN_MAS0,r11
+
+	lis	r3,(MAS1_VALID | MAS1_IPROT)@h
+	ori	r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT)
+	mtspr	SPRN_MAS1,r3
+	LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G)
+	mtspr	SPRN_MAS2,r3
+	LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW)
+	mtspr	SPRN_MAS7_MAS3,r3
+	/* re-use the MAS8 value from the linear mapping */
+	tlbwe
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
+
 	PPC_TLBILX(0,0,0)
 	sync
 	isync
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index e39cad83c884..23d65abbedce 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -62,6 +62,8 @@ void __init udbg_early_init(void)
 	udbg_init_cpm();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
 	udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
+	udbg_init_wsp();
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index baa33a7517bc..6837f839ab78 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <asm/udbg.h>
 #include <asm/io.h>
+#include <asm/reg_a2.h>
 
 extern u8 real_readb(volatile u8 __iomem  *addr);
 extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void)
 	udbg_getc_poll = NULL;
 }
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_WSP
+static void udbg_wsp_flush(void)
+{
+	if (udbg_comport) {
+		while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0)
+			/* wait for idle */;
+	}
+}
+
+static void udbg_wsp_putc(char c)
+{
+	if (udbg_comport) {
+		if (c == '\n')
+			udbg_wsp_putc('\r');
+		udbg_wsp_flush();
+		writeb(c, &udbg_comport->thr); eieio();
+	}
+}
+
+static int udbg_wsp_getc(void)
+{
+	if (udbg_comport) {
+		while ((readb(&udbg_comport->lsr) & LSR_DR) == 0)
+			; /* wait for char */
+		return readb(&udbg_comport->rbr);
+	}
+	return -1;
+}
+
+static int udbg_wsp_getc_poll(void)
+{
+	if (udbg_comport)
+		if (readb(&udbg_comport->lsr) & LSR_DR)
+			return readb(&udbg_comport->rbr);
+	return -1;
+}
+
+void __init udbg_init_wsp(void)
+{
+	udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT;
+
+	udbg_init_uart(udbg_comport, 57600, 50000000);
+
+	udbg_putc = udbg_wsp_putc;
+	udbg_flush = udbg_wsp_flush;
+	udbg_getc = udbg_wsp_getc;
+	udbg_getc_poll = udbg_wsp_getc_poll;
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
-- 
cgit v1.2.3


From e04d1b23f9706186187dcb0be1a752e48dcc540b Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Fri, 6 May 2011 07:14:02 +0000
Subject: perf events, x86: Add SandyBridge stalled-cycles-frontend/backend
 events

Extend the Intel SandyBridge PMU driver with definitions
for generic front-end and back-end stall events.

( As commit 3011203 "perf events, x86: Add Westmere stalled-cycles-frontend/backend
  events" says, these are only approximations. )

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1304666042-17577-1-git-send-email-ming.m.lin@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e61539b07d2c..7cf2ec59c813 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1474,6 +1474,12 @@ static __init int intel_pmu_init(void)
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_events;
+
+		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+
 		pr_cont("SandyBridge events, ");
 		break;
 
-- 
cgit v1.2.3


From 925f83c085e1bb08435556c5b4844a60de002e31 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 6 May 2011 01:53:18 +0200
Subject: hw_breakpoints, powerpc: Fix CONFIG_HAVE_HW_BREAKPOINT off-case in
 ptrace_set_debugreg()

We make use of ptrace_get_breakpoints() / ptrace_put_breakpoints() to
protect ptrace_set_debugreg() even if CONFIG_HAVE_HW_BREAKPOINT if off.
However in this case, these APIs are not implemented.

To fix this, push the protection down inside the relevant ifdef.
Best would be to export the code inside
CONFIG_HAVE_HW_BREAKPOINT into a standalone function to cleanup
the ifdefury there and call the breakpoint ref API inside. But
as it is more invasive, this should be rather made in an -rc1.

Fixes this build error:

  arch/powerpc/kernel/ptrace.c:1594: error: implicit declaration of function 'ptrace_get_breakpoints' make[2]: ***

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: LPPC <linuxppc-dev@lists.ozlabs.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: v2.6.33.. <stable@kernel.org>
Link: http://lkml.kernel.org/r/1304639598-4707-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/ptrace.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 4edeeb325429..a6ae1cfad86c 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -933,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (ptrace_get_breakpoints(task) < 0)
+		return -ESRCH;
+
 	bp = thread->ptrace_bps[0];
 	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
 		if (bp) {
 			unregister_hw_breakpoint(bp);
 			thread->ptrace_bps[0] = NULL;
 		}
+		ptrace_put_breakpoints(task);
 		return 0;
 	}
 	if (bp) {
@@ -948,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 					(DABR_DATA_WRITE | DABR_DATA_READ),
 							&attr.bp_type);
 		ret =  modify_user_hw_breakpoint(bp, &attr);
-		if (ret)
+		if (ret) {
+			ptrace_put_breakpoints(task);
 			return ret;
+		}
 		thread->ptrace_bps[0] = bp;
+		ptrace_put_breakpoints(task);
 		thread->dabr = data;
 		return 0;
 	}
@@ -965,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 							ptrace_triggered, task);
 	if (IS_ERR(bp)) {
 		thread->ptrace_bps[0] = NULL;
+		ptrace_put_breakpoints(task);
 		return PTR_ERR(bp);
 	}
 
+	ptrace_put_breakpoints(task);
+
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	/* Move contents to the DABR register */
@@ -1591,10 +1601,7 @@ long arch_ptrace(struct task_struct *child, long request,
 	}
 
 	case PTRACE_SET_DEBUGREG:
-		if (ptrace_get_breakpoints(child) < 0)
-			return -ESRCH;
 		ret = ptrace_set_debugreg(child, addr, data);
-		ptrace_put_breakpoints(child);
 		break;
 
 #ifdef CONFIG_PPC64
-- 
cgit v1.2.3


From 63b6a6758eede2f9283c3594265b6e32e75d7456 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 23 Apr 2011 00:57:42 +0200
Subject: perf events, x86: Fix Intel Nehalem and Westmere last level cache
 event definitions

The Intel Nehalem offcore bits implemented in:

  e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere

... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as
MISS even though its clearly documented as an L3 hit ...

Fix them and the Westmere definitions as well.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1299119690-13991-3-git-send-email-ming.m.lin@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 87 ++++++++++++++++++++--------------
 1 file changed, 52 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e61539b07d2c..447a28de6f09 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
 	},
  },
  [ C(LL  ) ] = {
-	/*
-	 * TBD: Need Off-core Response Performance Monitoring support
-	 */
 	[ C(OP_READ) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_WRITE) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
  },
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	/*
 	 * Use RFO, not WRITEBACK, because a write miss would typically occur
 	 * on RFO.
 	 */
 	[ C(OP_WRITE) ] = {
-		/* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
-		[ C(RESULT_ACCESS) ] = 0x01bb,
-		/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x01b7,
+		/* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
 		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		/* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01bb,
+		/* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x01b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
 };
 
 /*
- * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
  */
 
-#define DMND_DATA_RD     (1 << 0)
-#define DMND_RFO         (1 << 1)
-#define DMND_WB          (1 << 3)
-#define PF_DATA_RD       (1 << 4)
-#define PF_DATA_RFO      (1 << 5)
-#define RESP_UNCORE_HIT  (1 << 8)
-#define RESP_MISS        (0xf600) /* non uncore hit */
+#define NHM_DMND_DATA_RD	(1 << 0)
+#define NHM_DMND_RFO		(1 << 1)
+#define NHM_DMND_IFETCH		(1 << 2)
+#define NHM_DMND_WB		(1 << 3)
+#define NHM_PF_DATA_RD		(1 << 4)
+#define NHM_PF_DATA_RFO		(1 << 5)
+#define NHM_PF_IFETCH		(1 << 6)
+#define NHM_OFFCORE_OTHER	(1 << 7)
+#define NHM_UNCORE_HIT		(1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP	(1 << 9)
+#define NHM_OTHER_CORE_HITM	(1 << 10)
+        			/* reserved */
+#define NHM_REMOTE_CACHE_FWD	(1 << 12)
+#define NHM_REMOTE_DRAM		(1 << 13)
+#define NHM_LOCAL_DRAM		(1 << 14)
+#define NHM_NON_DRAM		(1 << 15)
+
+#define NHM_ALL_DRAM		(NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+
+#define NHM_DMND_READ		(NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE		(NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH	(NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT	(NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS	(NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS	(NHM_L3_HIT|NHM_L3_MISS)
 
 static __initconst const u64 nehalem_hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_MAX]
@@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
 {
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = DMND_DATA_RD|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
 	},
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = DMND_RFO|DMND_WB|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
-		[ C(RESULT_MISS)   ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+		[ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+		[ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
 	},
  }
 };
-- 
cgit v1.2.3


From 61516587513c84ac26e68e3ab008dc6e965d0378 Mon Sep 17 00:00:00 2001
From: Rob Landley <rob@landley.net>
Date: Fri, 6 May 2011 09:27:36 -0700
Subject: Correct occurrences of - Documentation/kvm/ to
 Documentation/virtual/kvm - Documentation/uml/ to Documentation/virtual/uml -
 Documentation/lguest/ to Documentation/virtual/lguest throughout the kernel
 source tree.

Signed-off-by: Rob Landley <rob@landley.net>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 Documentation/virtual/kvm/review-checklist.txt | 2 +-
 Documentation/virtual/lguest/lguest.txt        | 3 ++-
 MAINTAINERS                                    | 4 ++--
 arch/x86/lguest/boot.c                         | 2 +-
 drivers/lguest/Kconfig                         | 6 ++++--
 drivers/lguest/Makefile                        | 2 +-
 drivers/vhost/vhost.c                          | 2 +-
 7 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt
index 730475ae1b8d..a850986ed684 100644
--- a/Documentation/virtual/kvm/review-checklist.txt
+++ b/Documentation/virtual/kvm/review-checklist.txt
@@ -7,7 +7,7 @@ Review checklist for kvm patches
 2.  Patches should be against kvm.git master branch.
 
 3.  If the patch introduces or modifies a new userspace API:
-    - the API must be documented in Documentation/kvm/api.txt
+    - the API must be documented in Documentation/virtual/kvm/api.txt
     - the API must be discoverable using KVM_CHECK_EXTENSION
 
 4.  New state must include support for save/restore.
diff --git a/Documentation/virtual/lguest/lguest.txt b/Documentation/virtual/lguest/lguest.txt
index dad99978a6a8..bff0c554485d 100644
--- a/Documentation/virtual/lguest/lguest.txt
+++ b/Documentation/virtual/lguest/lguest.txt
@@ -74,7 +74,8 @@ Running Lguest:
 
 - Run an lguest as root:
 
-      Documentation/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 --block=rootfile root=/dev/vda
+      Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
+        --block=rootfile root=/dev/vda
 
    Explanation:
     64: the amount of memory to use, in MB.
diff --git a/MAINTAINERS b/MAINTAINERS
index 16a5c5f2c6a6..aa9bbd1c0e2b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3807,7 +3807,7 @@ M:	Rusty Russell <rusty@rustcorp.com.au>
 L:	lguest@lists.ozlabs.org
 W:	http://lguest.ozlabs.org/
 S:	Odd Fixes
-F:	Documentation/lguest/
+F:	Documentation/virtual/lguest/
 F:	arch/x86/lguest/
 F:	drivers/lguest/
 F:	include/linux/lguest*.h
@@ -6618,7 +6618,7 @@ L:	user-mode-linux-devel@lists.sourceforge.net
 L:	user-mode-linux-user@lists.sourceforge.net
 W:	http://user-mode-linux.sourceforge.net
 S:	Maintained
-F:	Documentation/uml/
+F:	Documentation/virtual/uml/
 F:	arch/um/
 F:	fs/hostfs/
 F:	fs/hppfs/
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 1cd608973ce5..395bf0114aad 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -7,7 +7,7 @@
  * kernel and insert a module (lg.ko) which allows us to run other Linux
  * kernels the same way we'd run processes.  We call the first kernel the Host,
  * and the others the Guests.  The program which sets up and configures Guests
- * (such as the example in Documentation/lguest/lguest.c) is called the
+ * (such as the example in Documentation/virtual/lguest/lguest.c) is called the
  * Launcher.
  *
  * Secondly, we only run specially modified Guests, not normal kernels: setting
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 0aaa0597a622..34ae49dc557c 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -5,8 +5,10 @@ config LGUEST
 	---help---
 	  This is a very simple module which allows you to run
 	  multiple instances of the same Linux kernel, using the
-	  "lguest" command found in the Documentation/lguest directory.
+	  "lguest" command found in the Documentation/virtual/lguest
+	  directory.
+
 	  Note that "lguest" is pronounced to rhyme with "fell quest",
-	  not "rustyvisor".  See Documentation/lguest/lguest.txt.
+	  not "rustyvisor". See Documentation/virtual/lguest/lguest.txt.
 
 	  If unsure, say N.  If curious, say M.  If masochistic, say Y.
diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index 7d463c26124f..8ac947c7e7c7 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile
@@ -18,7 +18,7 @@ Mastery: PREFIX=M
 Beer:
 	@for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}"
 Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery:
-	@sh ../../Documentation/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
+	@sh ../../Documentation/virtual/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
 Puppy:
 	@clear
 	@printf "      __  \n (___()'\`;\n /,    /\`\n \\\\\\\"--\\\\\\   \n"
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2ab291241635..7aa4eea930f1 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -4,7 +4,7 @@
  * Author: Michael S. Tsirkin <mst@redhat.com>
  *
  * Inspiration, some code, and most witty comments come from
- * Documentation/lguest/lguest.c, by Rusty Russell
+ * Documentation/virtual/lguest/lguest.c, by Rusty Russell
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  *
-- 
cgit v1.2.3


From 3bd2cbb95543acf44fe123eb9f038de54e655eb4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 21 Apr 2011 21:45:08 -0400
Subject: ARM: zImage: make sure the stack is 64-bit aligned

With ARMv5+ and EABI, the compiler expects a 64-bit aligned stack so
instructions like STRD and LDRD can be used.  Without this, mysterious
boot failures were seen semi randomly with the LZMA decompressor.

While at it, let's align .bss as well.

Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
CC: stable@kernel.org
---
 arch/arm/boot/compressed/Makefile       | 2 +-
 arch/arm/boot/compressed/vmlinux.lds.in | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 8ebbb511c783..0c6852d93506 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -74,7 +74,7 @@ ZTEXTADDR	:= $(CONFIG_ZBOOT_ROM_TEXT)
 ZBSSADDR	:= $(CONFIG_ZBOOT_ROM_BSS)
 else
 ZTEXTADDR	:= 0
-ZBSSADDR	:= ALIGN(4)
+ZBSSADDR	:= ALIGN(8)
 endif
 
 SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 5309909d7282..ea80abe78844 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -54,6 +54,7 @@ SECTIONS
   .bss			: { *(.bss) }
   _end = .;
 
+  . = ALIGN(8);		/* the stack must be 64-bit aligned */
   .stack		: { *(.stack) }
 
   .stab 0		: { *(.stab) }
-- 
cgit v1.2.3


From 7c2527f0c4bf6bd096f58296597e1373387d69fd Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 26 Apr 2011 05:37:46 -0700
Subject: ARM: zImage: Fix bad SP address after relocating kernel

Otherwise cache_clean_flush can overwrite some of the relocated
area depending on where the kernel image gets loaded. This fixes
booting on n900 after commit 6d7d0ae51574943bf571d269da3243257a2d15db
(ARM: 6750/1: improvements to compressed/head.S).

Thanks to Aaro Koskinen <aaro.koskinen@nokia.com> for debugging
the address of the relocated area that gets corrupted, and to
Nicolas Pitre <nicolas.pitre@linaro.org> for the other uncompress
related fixes.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
 arch/arm/boot/compressed/head.S | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 84ac4d656310..55a5bcb82ba0 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -253,6 +253,11 @@ restart:	adr	r0, LC0
 		/* Preserve offset to relocated code. */
 		sub	r6, r9, r6
 
+#ifndef CONFIG_ZBOOT_ROM
+		/* cache_clean_flush may use the stack, so relocate it */
+		add	sp, sp, r6
+#endif
+
 		bl	cache_clean_flush
 
 		adr	r0, BSYM(restart)
-- 
cgit v1.2.3


From adcc25915b98e5752d51d66774ec4a61e50af3c5 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Wed, 27 Apr 2011 16:15:11 -0400
Subject: ARM: zImage: make sure not to relocate on top of the relocation code

If the zImage load address is slightly below the relocation address,
there is a risk for the copied data to overwrite the copy loop or
cache flush code that the relocation process requires.  Always
bump the relocation address by the size of that code to avoid this
issue.

Noticed by Tony Lindgren <tony@atomide.com>.

While at it, let's start the copy from the restart symbol which makes
the above code size computation possible by the assembler directly
(same sections), given that we don't need to preserve the code before
that point anyway. And therefore we don't need to carry the _start
pointer in r5 anymore.

Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Tested-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/compressed/head.S | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 55a5bcb82ba0..53dd5da84f8a 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -187,15 +187,14 @@ not_angel:
 		bl	cache_on
 
 restart:	adr	r0, LC0
-		ldmia	r0, {r1, r2, r3, r5, r6, r9, r11, r12}
-		ldr	sp, [r0, #32]
+		ldmia	r0, {r1, r2, r3, r6, r9, r11, r12}
+		ldr	sp, [r0, #28]
 
 		/*
 		 * We might be running at a different address.  We need
 		 * to fix up various pointers.
 		 */
 		sub	r0, r0, r1		@ calculate the delta offset
-		add	r5, r5, r0		@ _start
 		add	r6, r6, r0		@ _edata
 
 #ifndef CONFIG_ZBOOT_ROM
@@ -214,31 +213,39 @@ restart:	adr	r0, LC0
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
- *   r5  = start of this image
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 >= r10 -> OK
- *   r4 + image length <= r5 -> OK
+ *   r4 + image length <= current position (pc) -> OK
  */
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-		cmp	r10, r5
+   ARM(		cmp	r10, pc		)
+ THUMB(		mov	lr, pc		)
+ THUMB(		cmp	r10, lr		)
 		bls	wont_overwrite
 
 /*
  * Relocate ourselves past the end of the decompressed kernel.
- *   r5  = start of this image
  *   r6  = _edata
  *   r10 = end of the decompressed kernel
  * Because we always copy ahead, we need to do it from the end and go
  * backward in case the source and destination overlap.
  */
-		/* Round up to next 256-byte boundary. */
-		add	r10, r10, #256
+		/*
+		 * Bump to the next 256-byte boundary with the size of
+		 * the relocation code added. This avoids overwriting
+		 * ourself when the offset is small.
+		 */
+		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 		bic	r10, r10, #255
 
+		/* Get start of code we want to copy and align it down. */
+		adr	r5, restart
+		bic	r5, r5, #31
+
 		sub	r9, r6, r5		@ size to copy
 		add	r9, r9, #31		@ rounded up to a multiple
 		bic	r9, r9, #31		@ ... of 32 bytes
@@ -346,7 +353,6 @@ not_relocated:	mov	r0, #0
 LC0:		.word	LC0			@ r1
 		.word	__bss_start		@ r2
 		.word	_end			@ r3
-		.word	_start			@ r5
 		.word	_edata			@ r6
 		.word	_image_size		@ r9
 		.word	_got_start		@ r11
@@ -1075,6 +1081,7 @@ memdump:	mov	r12, r0
 #endif
 
 		.ltorg
+reloc_code_end:
 
 		.align
 		.section ".stack", "aw", %nobits
-- 
cgit v1.2.3


From ea9df3b168e641e87dbf889afae16390119e4179 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Thu, 21 Apr 2011 22:52:06 -0400
Subject: ARM: zImage: the page table memory must be considered before
 relocation

For correctness, the initial page table located right before the
decompressed kernel should be considered when determining if relocation
is required.

Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/compressed/head.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 53dd5da84f8a..d1fd1cfca9c2 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -216,9 +216,10 @@ restart:	adr	r0, LC0
  *   r9  = size of decompressed image
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
- *   r4 >= r10 -> OK
+ *   r4 - 16k page directory >= r10 -> OK
  *   r4 + image length <= current position (pc) -> OK
  */
+		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-- 
cgit v1.2.3


From 8fab6af2156c0100f953fd61f4e0b2f82c9776dc Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 6 May 2011 15:00:09 -0700
Subject: x86: Fix mrst sparse complaints

Fix these Sparse complaints:

  CHECK   arch/x86/platform/mrst/mrst.c
  arch/x86/platform/mrst/mrst.c:197:13: warning: symbol 'mrst_time_init' was not declared. Should it be static?
  arch/x86/platform/mrst/mrst.c:219:16: warning: symbol 'mrst_arch_setup' was not declared. Should it be static?

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Roman Gezikov <roman.gezikov@atheros.com>
Cc: Joonas Viskari <joonas.viskari@atheros.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Allen Kao <allen.kao@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Link: http://lkml.kernel.org/r/1304719209-26913-1-git-send-email-lrodriguez@atheros.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/platform/mrst/mrst.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 275dbc19e2cf..7000e74b3087 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -194,7 +194,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
 	return 0;
 }
 
-void __init mrst_time_init(void)
+static void __init mrst_time_init(void)
 {
 	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
 	switch (mrst_timer_options) {
@@ -216,7 +216,7 @@ void __init mrst_time_init(void)
 	apbt_time_init();
 }
 
-void __cpuinit mrst_arch_setup(void)
+static void __cpuinit mrst_arch_setup(void)
 {
 	if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
 		__mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
-- 
cgit v1.2.3


From 2b5e8ef35bc89eee944c0627edacbb1fea5a1b84 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 5 May 2011 15:19:42 -0400
Subject: x86, efi: Remove virtual-mode SetVirtualAddressMap call

The spec says that SetVirtualAddressMap doesn't work once you're in
virtual mode, so there's no point in having infrastructure for calling
it from there.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1304623186-18261-1-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0fe27d7c6258..f2e4fe9e92a1 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -145,17 +145,6 @@ static void virt_efi_reset_system(int reset_type,
 		       data_size, data);
 }
 
-static efi_status_t virt_efi_set_virtual_address_map(
-	unsigned long memory_map_size,
-	unsigned long descriptor_size,
-	u32 descriptor_version,
-	efi_memory_desc_t *virtual_map)
-{
-	return efi_call_virt4(set_virtual_address_map,
-			      memory_map_size, descriptor_size,
-			      descriptor_version, virtual_map);
-}
-
 static efi_status_t __init phys_efi_set_virtual_address_map(
 	unsigned long memory_map_size,
 	unsigned long descriptor_size,
@@ -572,7 +561,7 @@ void __init efi_enter_virtual_mode(void)
 	efi.set_variable = virt_efi_set_variable;
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
-	efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
+	efi.set_virtual_address_map = NULL;
 	if (__supported_pte_mask & _PAGE_NX)
 		runtime_code_page_mkexec();
 	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
-- 
cgit v1.2.3


From 9cd2b07c197e3ff594fc04f5fb3d86efbeab6ad8 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 5 May 2011 15:19:43 -0400
Subject: x86, efi: Consolidate EFI nx control

The core EFI code and 64-bit EFI code currently have independent
implementations of code for setting memory regions as executable or not.
Let's consolidate them.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1304623186-18261-2-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/efi.h     |  1 +
 arch/x86/platform/efi/efi.c    | 21 ++++++++++++++++-----
 arch/x86/platform/efi/efi_64.c | 28 +++++-----------------------
 3 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 8e4a16508d4e..7093e4a6a0bc 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
+extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern void efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f2e4fe9e92a1..7daae27e975e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -457,11 +457,25 @@ void __init efi_init(void)
 #endif
 }
 
+void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
+{
+	u64 addr, npages;
+
+	addr = md->virt_addr;
+	npages = md->num_pages;
+
+	memrange_efi_to_native(&addr, &npages);
+
+	if (executable)
+		set_memory_x(addr, npages);
+	else
+		set_memory_nx(addr, npages);
+}
+
 static void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
 	void *p;
-	u64 addr, npages;
 
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -470,10 +484,7 @@ static void __init runtime_code_page_mkexec(void)
 		if (md->type != EFI_RUNTIME_SERVICES_CODE)
 			continue;
 
-		addr = md->virt_addr;
-		npages = md->num_pages;
-		memrange_efi_to_native(&addr, &npages);
-		set_memory_x(addr, npages);
+		efi_set_executable(md, true);
 	}
 }
 
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac0621a7ac3d..94d6b394b654 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,22 +41,7 @@
 static pgd_t save_pgd __initdata;
 static unsigned long efi_flags __initdata;
 
-static void __init early_mapping_set_exec(unsigned long start,
-					  unsigned long end,
-					  int executable)
-{
-	unsigned long num_pages;
-
-	start &= PMD_MASK;
-	end = (end + PMD_SIZE - 1) & PMD_MASK;
-	num_pages = (end - start) >> PAGE_SHIFT;
-	if (executable)
-		set_memory_x((unsigned long)__va(start), num_pages);
-	else
-		set_memory_nx((unsigned long)__va(start), num_pages);
-}
-
-static void __init early_runtime_code_mapping_set_exec(int executable)
+static void __init early_code_mapping_set_exec(int executable)
 {
 	efi_memory_desc_t *md;
 	void *p;
@@ -67,11 +52,8 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		if (md->type == EFI_RUNTIME_SERVICES_CODE) {
-			unsigned long end;
-			end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-			early_mapping_set_exec(md->phys_addr, end, executable);
-		}
+		if (md->type == EFI_RUNTIME_SERVICES_CODE)
+			efi_set_executable(md, executable);
 	}
 }
 
@@ -79,7 +61,7 @@ void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;
 
-	early_runtime_code_mapping_set_exec(1);
+	early_code_mapping_set_exec(1);
 	local_irq_save(efi_flags);
 	vaddress = (unsigned long)__va(0x0UL);
 	save_pgd = *pgd_offset_k(0x0UL);
@@ -95,7 +77,7 @@ void __init efi_call_phys_epilog(void)
 	set_pgd(pgd_offset_k(0x0UL), save_pgd);
 	__flush_tlb_all();
 	local_irq_restore(efi_flags);
-	early_runtime_code_mapping_set_exec(0);
+	early_code_mapping_set_exec(0);
 }
 
 void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
-- 
cgit v1.2.3


From 202f9d0a41809e3424af5f61489b48b622824aed Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 5 May 2011 15:19:44 -0400
Subject: x86, efi: Merge contiguous memory regions of the same type and
 attribute

Some firmware implementations assume that physically contiguous regions
will be contiguous in virtual address space. This assumption is, obviously,
entirely unjustifiable. Said firmware implementations lack the good grace
to handle their failings in a measured and reasonable manner, instead
tending to shit all over address space and oopsing the kernel.

In an ideal universe these firmware implementations would simultaneously
catch fire and cease to be a problem, but since some of them are present
in attractively thin and shiny metal devices vanity wins out and some
poor developer spends an extended period of time surrounded by a
growing array of empty bottles until the underlying reason becomes
apparent. Said developer presents this patch, which simply merges
adjacent regions if they happen to be contiguous and have the same EFI
memory type and caching attributes.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1304623186-18261-3-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 7daae27e975e..a46a73ecc5f3 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -498,13 +498,41 @@ static void __init runtime_code_page_mkexec(void)
  */
 void __init efi_enter_virtual_mode(void)
 {
-	efi_memory_desc_t *md;
+	efi_memory_desc_t *md, *prev_md = NULL;
 	efi_status_t status;
 	unsigned long size;
 	u64 end, systab, addr, npages, end_pfn;
 	void *p, *va;
 
 	efi.systab = NULL;
+
+	/* Merge contiguous regions of the same type and attribute */
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		u64 prev_size;
+		md = p;
+
+		if (!prev_md) {
+			prev_md = md;
+			continue;
+		}
+
+		if (prev_md->type != md->type ||
+		    prev_md->attribute != md->attribute) {
+			prev_md = md;
+			continue;
+		}
+
+		prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
+
+		if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
+			prev_md->num_pages += md->num_pages;
+			md->type = EFI_RESERVED_TYPE;
+			md->attribute = 0;
+			continue;
+		}
+		prev_md = md;
+	}
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
-- 
cgit v1.2.3


From 7cb00b72876ea2451eb79d468da0e8fb9134aa8a Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 5 May 2011 15:19:45 -0400
Subject: x86, efi: Pass a minimal map to SetVirtualAddressMap()

Experimentation with various EFI implementations has shown that functions
outside runtime services will still update their pointers if
SetVirtualAddressMap() is called with memory descriptors outside the
runtime area. This is obviously insane, and therefore is unsurprising.
Evidence from instrumenting another EFI implementation suggests that it
only passes the set of descriptors covering runtime regions, so let's
avoid any problems by doing the same. Runtime descriptors are copied to
a separate memory map, and only that map is passed back to the firmware.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1304623186-18261-4-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a46a73ecc5f3..b30aa26a8df2 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -502,7 +502,8 @@ void __init efi_enter_virtual_mode(void)
 	efi_status_t status;
 	unsigned long size;
 	u64 end, systab, addr, npages, end_pfn;
-	void *p, *va;
+	void *p, *va, *new_memmap = NULL;
+	int count = 0;
 
 	efi.systab = NULL;
 
@@ -569,15 +570,21 @@ void __init efi_enter_virtual_mode(void)
 			systab += md->virt_addr - md->phys_addr;
 			efi.systab = (efi_system_table_t *) (unsigned long) systab;
 		}
+		new_memmap = krealloc(new_memmap,
+				      (count + 1) * memmap.desc_size,
+				      GFP_KERNEL);
+		memcpy(new_memmap + (count * memmap.desc_size), md,
+		       memmap.desc_size);
+		count++;
 	}
 
 	BUG_ON(!efi.systab);
 
 	status = phys_efi_set_virtual_address_map(
-		memmap.desc_size * memmap.nr_map,
+		memmap.desc_size * count,
 		memmap.desc_size,
 		memmap.desc_version,
-		memmap.phys_map);
+		(efi_memory_desc_t *)__pa(new_memmap));
 
 	if (status != EFI_SUCCESS) {
 		printk(KERN_ALERT "Unable to switch EFI into virtual mode "
@@ -605,6 +612,7 @@ void __init efi_enter_virtual_mode(void)
 		runtime_code_page_mkexec();
 	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 	memmap.map = NULL;
+	kfree(new_memmap);
 }
 
 /*
-- 
cgit v1.2.3


From 935a638241b0658b9749edd060f972575f9d4a78 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 5 May 2011 15:19:46 -0400
Subject: x86, efi: Ensure that the entirity of a region is mapped

It's possible for init_memory_mapping() to fail to map the entire region
if it crosses a boundary, so ensure that we complete the mapping.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Link: http://lkml.kernel.org/r/1304623186-18261-5-git-send-email-mjg@redhat.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi_64.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 94d6b394b654..2649426a7905 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -89,8 +89,10 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
 		return ioremap(phys_addr, size);
 
 	last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
-	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
-		return NULL;
+	if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
+		unsigned long top = last_map_pfn << PAGE_SHIFT;
+		efi_ioremap(top, size - (top - phys_addr), type);
+	}
 
 	return (void __iomem *)__va(phys_addr);
 }
-- 
cgit v1.2.3


From 1d44e8288a0557c28c447d7e511f50d06ff93a34 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Mon, 9 May 2011 11:35:19 -0500
Subject: x86, UV: Fix NMI handler for UV platforms

This fixes problems seen on UV systems handling NMIs from the
node controller.

I isolated the "dazed..." messages that I saw earlier to a bug in
the BMC on our platform. It was sending NMIs w/o properly setting
a register that indicated the source of NMI.

So rather than _assuming_ any unhandled NMI came from the UV system
maintenance console (SMC), add a check to verify that the SMC actually
sent the NMI.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Cc: gorcunov@gmail.com
Cc: dzickus@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_hub.h   |  2 ++
 arch/x86/include/asm/uv/uv_mmrs.h  | 16 ++++++++++++-
 arch/x86/kernel/apic/x2apic_uv_x.c | 48 ++++++++++++++++++++++++++++++++++----
 3 files changed, 60 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a501741c2335..4298002d0c83 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -398,6 +398,8 @@ struct uv_blade_info {
 	unsigned short	nr_online_cpus;
 	unsigned short	pnode;
 	short		memory_nid;
+	spinlock_t	nmi_lock;
+	unsigned long	nmi_count;
 };
 extern struct uv_blade_info *uv_blade_info;
 extern short *uv_node_to_blade;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 20cafeac7455..f5bb64a823d7 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u {
     } s;
 };
 
+/* ========================================================================= */
+/*                               UVH_SCRATCH5                                */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+    unsigned long	v;
+    struct uvh_scratch5_s {
+	unsigned long	scratch5 : 64;  /* RW, W1CS */
+    } s;
+};
 
 #endif /* __ASM_UV_MMRS_X86_H__ */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 33b10a0fc095..7acd2d2ac965 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -37,6 +37,13 @@
 #include <asm/smp.h>
 #include <asm/x86_init.h>
 #include <asm/emergency-restart.h>
+#include <asm/nmi.h>
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR				UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR			(UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK			(1UL << 63)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void)
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
+	unsigned long real_uv_nmi;
+	int bid;
+
 	if (reason != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
 		/* do nothing if entering the crash kernel */
 		return NOTIFY_OK;
+
 	/*
-	 * Use a lock so only one cpu prints at a time
-	 * to prevent intermixed output.
+	 * Each blade has an MMR that indicates when an NMI has been sent
+	 * to cpus on the blade. If an NMI is detected, atomically
+	 * clear the MMR and update a per-blade NMI count used to
+	 * cause each cpu on the blade to notice a new NMI.
+	 */
+	bid = uv_numa_blade_id();
+	real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+
+	if (unlikely(real_uv_nmi)) {
+		spin_lock(&uv_blade_info[bid].nmi_lock);
+		real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+		if (real_uv_nmi) {
+			uv_blade_info[bid].nmi_count++;
+			uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+		}
+		spin_unlock(&uv_blade_info[bid].nmi_lock);
+	}
+
+	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
+		return NOTIFY_DONE;
+
+	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+
+	/*
+	 * Use a lock so only one cpu prints at a time.
+	 * This prevents intermixed output.
 	 */
 	spin_lock(&uv_nmi_lock);
-	pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+	pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
 
@@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 }
 
 static struct notifier_block uv_dump_stack_nmi_nb = {
-	.notifier_call	= uv_handle_nmi
+	.notifier_call	= uv_handle_nmi,
+	.priority = NMI_LOCAL_LOW_PRIOR - 1,
 };
 
 void uv_register_nmi_notifier(void)
@@ -720,8 +756,9 @@ void __init uv_system_init(void)
 	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
 	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
 	BUG_ON(!uv_blade_info);
+
 	for (blade = 0; blade < uv_num_possible_blades(); blade++)
 		uv_blade_info[blade].memory_nid = -1;
 
@@ -747,6 +784,7 @@ void __init uv_system_init(void)
 			uv_blade_info[blade].pnode = pnode;
 			uv_blade_info[blade].nr_possible_cpus = 0;
 			uv_blade_info[blade].nr_online_cpus = 0;
+			spin_lock_init(&uv_blade_info[blade].nmi_lock);
 			max_pnode = max(pnode, max_pnode);
 			blade++;
 		}
-- 
cgit v1.2.3


From e969687595c27e02e02be0c9363261826123ba77 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 5 Nov 2010 16:12:35 -0700
Subject: arch/x86/kernel/pci-iommu_table.c: Convert sprintf_symbol to %pS

Coalesce format as well.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/pci-iommu_table.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 55d745ec1181..35ccf75696eb 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -50,20 +50,14 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 				struct iommu_table_entry *finish)
 {
 	struct iommu_table_entry *p, *q, *x;
-	char sym_p[KSYM_SYMBOL_LEN];
-	char sym_q[KSYM_SYMBOL_LEN];
 
 	/* Simple cyclic dependency checker. */
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(start, finish, p);
 		x = find_dependents_of(start, finish, q);
 		if (p == x) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %s depends" \
-					" on %s and vice-versa. BREAKING IT.\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
+			       p->detect, q->detect);
 			/* Heavy handed way..*/
 			x->depend = 0;
 		}
@@ -72,12 +66,8 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
 	for (p = start; p < finish; p++) {
 		q = find_dependents_of(p, finish, p);
 		if (q && q > p) {
-			sprint_symbol(sym_p, (unsigned long)p->detect);
-			sprint_symbol(sym_q, (unsigned long)q->detect);
-
-			printk(KERN_ERR "EXECUTION ORDER INVALID! %s "\
-					"should be called before %s!\n",
-					sym_p, sym_q);
+			printk(KERN_ERR "EXECUTION ORDER INVALID! %pS should be called before %pS!\n",
+			       p->detect, q->detect);
 		}
 	}
 }
-- 
cgit v1.2.3


From 72fe00f01f9a3240a1073be27aeaf4fc476cc662 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 10 May 2011 10:50:42 +0200
Subject: x86/amd-iommu: Use threaded interupt handler

Move the interupt handling for the iommu into the interupt
thread to reduce latencies and prepare interupt handling for
pri handling.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_proto.h | 1 +
 arch/x86/kernel/amd_iommu.c            | 7 ++++++-
 arch/x86/kernel/amd_iommu_init.c       | 9 +++++----
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index a4ae6c3875eb..55d95eb789b3 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -23,6 +23,7 @@
 
 extern int amd_iommu_init_dma_ops(void);
 extern int amd_iommu_init_passthrough(void);
+extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
 extern void amd_iommu_apply_erratum_63(u16 devid);
 extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index dc5dddafe5c2..873e7e1ead7b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -366,7 +366,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-irqreturn_t amd_iommu_int_handler(int irq, void *data)
+irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
 	struct amd_iommu *iommu;
 
@@ -376,6 +376,11 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t amd_iommu_int_handler(int irq, void *data)
+{
+	return IRQ_WAKE_THREAD;
+}
+
 /****************************************************************************
  *
  * IOMMU command queuing functions
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 28b078133688..9179c21120a8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1034,10 +1034,11 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 	if (pci_enable_msi(iommu->dev))
 		return 1;
 
-	r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
-			IRQF_SAMPLE_RANDOM,
-			"AMD-Vi",
-			NULL);
+	r = request_threaded_irq(iommu->dev->irq,
+				 amd_iommu_int_handler,
+				 amd_iommu_int_thread,
+				 0, "AMD-Vi",
+				 iommu->dev);
 
 	if (r) {
 		pci_disable_msi(iommu->dev);
-- 
cgit v1.2.3


From 8eb4bd666ffdca7171cd8118138a91842012b028 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Tue, 10 May 2011 17:13:39 +0200
Subject: [S390] kernel: Initialize register 14 when starting new CPU

When starting a new CPU we currently jump to start_secondary() without
setting register 14 (the return address) correctly. Therefore on the stack
frame for start_secondary an invalid return address is stored. This leads
to wrong stack back traces in kernel dumps.

Example:

 #00 [1f33fe48] cpu_idle at 10614a
 #01 [1f33fe90] start_secondary at 54fa88
 #02 [1f33feb8] (null) at 0                 <--- invalid

To fix this start_secondary() is called now with basr/brasl that sets
register 14 correctly. The output of the stack backtrace looks then
like the following:

 #00 [1f33fe48] cpu_idle at 10614a
 #01 [1f33fe90] start_secondary at 54fa88
 #02 [1f33feb8] restart_base at 54f41e      <--- correct

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S   | 2 +-
 arch/s390/kernel/entry64.S | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 648f64239a9d..1b67fc6ebdc2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -836,7 +836,7 @@ restart_base:
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
 	basr	%r14,0
 	l	%r14,restart_addr-.(%r14)
-	br	%r14			# branch to start_secondary
+	basr	%r14,%r14		# branch to start_secondary
 restart_addr:
 	.long	start_secondary
 	.align	8
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9d3603d6c511..9fd864563499 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -841,7 +841,7 @@ restart_base:
 	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
 	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
-	jg	start_secondary
+	brasl	%r14,start_secondary
 	.align	8
 restart_vtime:
 	.long	0x7fffffff,0xffffffff
-- 
cgit v1.2.3


From 91d378088b104f8e31baba8c518f32a7a219d58c Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 10 May 2011 17:13:40 +0200
Subject: [S390] disassembler: handle b280/spp instruction

arch/s390/kvm/sie64a.S uses the b280 instruction. Tell the builtin
disassembler to handle that code.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/dis.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index c83726c9fe03..3d4a78fc1adc 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -672,6 +672,7 @@ static struct insn opcode_b2[] = {
 	{ "rp", 0x77, INSTR_S_RD },
 	{ "stcke", 0x78, INSTR_S_RD },
 	{ "sacf", 0x79, INSTR_S_RD },
+	{ "spp", 0x80, INSTR_S_RD },
 	{ "stsi", 0x7d, INSTR_S_RD },
 	{ "srnm", 0x99, INSTR_S_RD },
 	{ "stfpc", 0x9c, INSTR_S_RD },
-- 
cgit v1.2.3


From 83ace2701b81be549cca7af33c5b0499cb2602d6 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Tue, 10 May 2011 17:13:41 +0200
Subject: [S390] replace diag10() with diag10_range() function

Currently the diag10() function can only release one page. For exploiters
that have to call diag10 on a contiguous memory region this is suboptimal.
This patch replaces the diag10() function with diag10_range() that is
able to release multiple pages. In addition to that the new function now
allows to release memory with addresses higher than 2047 MiB. This was
due to a restriction of the diagnose implementation under z/VM prior to
release 5.2.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/diag.h | 17 +++++++++++++++--
 arch/s390/kernel/diag.c      | 21 ---------------------
 arch/s390/mm/cmm.c           |  2 +-
 3 files changed, 16 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 72b2e2f2d32d..7e91c58072e2 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -9,9 +9,22 @@
 #define _ASM_S390_DIAG_H
 
 /*
- * Diagnose 10: Release pages
+ * Diagnose 10: Release page range
  */
-extern void diag10(unsigned long addr);
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+	unsigned long start_addr, end_addr;
+
+	start_addr = start_pfn << PAGE_SHIFT;
+	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+
+	asm volatile(
+		"0:	diag	%0,%1,0x10\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		EX_TABLE(1b, 1b)
+		: : "a" (start_addr), "a" (end_addr));
+}
 
 /*
  * Diagnose 14: Input spool file manipulation
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index c032d11da8a1..8237fc07ac79 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -8,27 +8,6 @@
 #include <linux/module.h>
 #include <asm/diag.h>
 
-/*
- * Diagnose 10: Release pages
- */
-void diag10(unsigned long addr)
-{
-	if (addr >= 0x7ff00000)
-		return;
-	asm volatile(
-#ifdef CONFIG_64BIT
-		"	sam31\n"
-		"	diag	%0,%0,0x10\n"
-		"0:	sam64\n"
-#else
-		"	diag	%0,%0,0x10\n"
-		"0:\n"
-#endif
-		EX_TABLE(0b, 0b)
-		: : "a" (addr));
-}
-EXPORT_SYMBOL(diag10);
-
 /*
  * Diagnose 14: Input spool file manipulation
  */
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index c66ffd8dbbb7..1f1dba9dcf58 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -91,7 +91,7 @@ static long cmm_alloc_pages(long nr, long *counter,
 			} else
 				free_page((unsigned long) npa);
 		}
-		diag10(addr);
+		diag10_range(addr >> PAGE_SHIFT, 1);
 		pa->pages[pa->index++] = addr;
 		(*counter)++;
 		spin_unlock(&cmm_lock);
-- 
cgit v1.2.3


From 3d8dcb3c76bb2930798f61675c33cce8945ab988 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 10 May 2011 17:13:42 +0200
Subject: [S390] oprofile: fix min/max interval query checks

oprofile_min_interval and oprofile_max_interval are unsigned, checking
for negative values doesn't work. Change hwsampler_query_min_interval
and hwsampler_query_max_interval to return an unsigned long and
check for a zero value instead.

Reported-by: Nicolas Kaiser <nikai@nikai.net>
Acked-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/oprofile/hwsampler.c | 14 ++++----------
 arch/s390/oprofile/hwsampler.h |  4 ++--
 arch/s390/oprofile/init.c      |  8 ++------
 3 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 4952872d6f0a..33cbd373cce4 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -1021,20 +1021,14 @@ deallocate_exit:
 	return rc;
 }
 
-long hwsampler_query_min_interval(void)
+unsigned long hwsampler_query_min_interval(void)
 {
-	if (min_sampler_rate)
-		return min_sampler_rate;
-	else
-		return -EINVAL;
+	return min_sampler_rate;
 }
 
-long hwsampler_query_max_interval(void)
+unsigned long hwsampler_query_max_interval(void)
 {
-	if (max_sampler_rate)
-		return max_sampler_rate;
-	else
-		return -EINVAL;
+	return max_sampler_rate;
 }
 
 unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
index 8c72b59316b5..1912f3bb190c 100644
--- a/arch/s390/oprofile/hwsampler.h
+++ b/arch/s390/oprofile/hwsampler.h
@@ -102,8 +102,8 @@ int hwsampler_setup(void);
 int hwsampler_shutdown(void);
 int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
 int hwsampler_deallocate(void);
-long hwsampler_query_min_interval(void);
-long hwsampler_query_max_interval(void);
+unsigned long hwsampler_query_min_interval(void);
+unsigned long hwsampler_query_max_interval(void);
 int hwsampler_start_all(unsigned long interval);
 int hwsampler_stop_all(void);
 int hwsampler_deactivate(unsigned int cpu);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index c63d7e58352b..5995e9bc72d9 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -145,15 +145,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 	 * create hwsampler files only if hwsampler_setup() succeeds.
 	 */
 	oprofile_min_interval = hwsampler_query_min_interval();
-	if (oprofile_min_interval < 0) {
-		oprofile_min_interval = 0;
+	if (oprofile_min_interval == 0)
 		return -ENODEV;
-	}
 	oprofile_max_interval = hwsampler_query_max_interval();
-	if (oprofile_max_interval < 0) {
-		oprofile_max_interval = 0;
+	if (oprofile_max_interval == 0)
 		return -ENODEV;
-	}
 
 	if (oprofile_timer_init(ops))
 		return -ENODEV;
-- 
cgit v1.2.3


From badb8bb983e9cf5b7a872e0a4f6ebeac2b1ce133 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 10 May 2011 17:13:43 +0200
Subject: [S390] fix alloc_pgste check in init_new_context

Processes started with kernel_execve from a kernel thread will have
current->mm==NULL. Reading current->mm->context.alloc_pgste will
read a more or less random bit from lowcore in this case. If the
bit turns out to be set the whole process tree started this way
will allocate page table extensions although they have no need
for it.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/mmu_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index a6f0e7cc9cde..8c277caa8d3a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -23,7 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
 #ifdef CONFIG_64BIT
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #endif
-	if (current->mm->context.alloc_pgste) {
+	if (current->mm && current->mm->context.alloc_pgste) {
 		/*
 		 * alloc_pgste indicates, that any NEW context will be created
 		 * with extended page tables. The old context is unchanged. The
-- 
cgit v1.2.3


From fffcda1183e93df84ad73ba7eb7782a5c354e2b3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 10 May 2011 17:22:06 +0200
Subject: x86, gart: Rename pci-gart_64.c to amd_gart_64.c

This file only contains code relevant for the northbridge
gart in AMD processors. This patch renames the file to
represent this fact in the filename.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/x86/x86_64/boot-options.txt |   2 +-
 arch/x86/kernel/Makefile                  |   2 +-
 arch/x86/kernel/amd_gart_64.c             | 898 ++++++++++++++++++++++++++++++
 arch/x86/kernel/pci-gart_64.c             | 898 ------------------------------
 4 files changed, 900 insertions(+), 900 deletions(-)
 create mode 100644 arch/x86/kernel/amd_gart_64.c
 delete mode 100644 arch/x86/kernel/pci-gart_64.c

(limited to 'arch')

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 092e596a1301..c54b4f503e2a 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -206,7 +206,7 @@ IOMMU (input/output memory management unit)
       (e.g. because you have < 3 GB memory).
       Kernel boot message: "PCI-DMA: Disabling IOMMU"
 
-   2. <arch/x86_64/kernel/pci-gart.c>: AMD GART based hardware IOMMU.
+   2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
       Kernel boot message: "PCI-DMA: using GART IOMMU"
 
    3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7338ef2218bc..97ebf82e0b7f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -117,7 +117,7 @@ obj-$(CONFIG_OF)			+= devicetree.o
 ifeq ($(CONFIG_X86_64),y)
 	obj-$(CONFIG_AUDIT)		+= audit_64.o
 
-	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
+	obj-$(CONFIG_GART_IOMMU)	+= amd_gart_64.o aperture_64.o
 	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
 	obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
 
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
new file mode 100644
index 000000000000..b117efd24f71
--- /dev/null
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -0,0 +1,898 @@
+/*
+ * Dynamic DMA mapping support for AMD Hammer.
+ *
+ * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
+ * This allows to use PCI devices that only support 32bit addresses on systems
+ * with more than 4GB.
+ *
+ * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
+ *
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ * Subject to the GNU General Public License v2 only.
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/agp_backend.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+#include <linux/interrupt.h>
+#include <linux/bitmap.h>
+#include <linux/kdebug.h>
+#include <linux/scatterlist.h>
+#include <linux/iommu-helper.h>
+#include <linux/syscore_ops.h>
+#include <linux/io.h>
+#include <linux/gfp.h>
+#include <asm/atomic.h>
+#include <asm/mtrr.h>
+#include <asm/pgtable.h>
+#include <asm/proto.h>
+#include <asm/iommu.h>
+#include <asm/gart.h>
+#include <asm/cacheflush.h>
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/amd_nb.h>
+#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
+
+static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
+static unsigned long iommu_size;	/* size of remapping area bytes */
+static unsigned long iommu_pages;	/* .. and in pages */
+
+static u32 *iommu_gatt_base;		/* Remapping table */
+
+static dma_addr_t bad_dma_addr;
+
+/*
+ * If this is disabled the IOMMU will use an optimized flushing strategy
+ * of only flushing when an mapping is reused. With it true the GART is
+ * flushed for every mapping. Problem is that doing the lazy flush seems
+ * to trigger bugs with some popular PCI cards, in particular 3ware (but
+ * has been also also seen with Qlogic at least).
+ */
+static int iommu_fullflush = 1;
+
+/* Allocation bitmap for the remapping area: */
+static DEFINE_SPINLOCK(iommu_bitmap_lock);
+/* Guarded by iommu_bitmap_lock: */
+static unsigned long *iommu_gart_bitmap;
+
+static u32 gart_unmapped_entry;
+
+#define GPTE_VALID    1
+#define GPTE_COHERENT 2
+#define GPTE_ENCODE(x) \
+	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
+#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
+
+#define EMERGENCY_PAGES 32 /* = 128KB */
+
+#ifdef CONFIG_AGP
+#define AGPEXTERN extern
+#else
+#define AGPEXTERN
+#endif
+
+/* GART can only remap to physical addresses < 1TB */
+#define GART_MAX_PHYS_ADDR	(1ULL << 40)
+
+/* backdoor interface to AGP driver */
+AGPEXTERN int agp_memory_reserved;
+AGPEXTERN __u32 *agp_gatt_table;
+
+static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
+static bool need_flush;		/* global flush state. set for each gart wrap */
+
+static unsigned long alloc_iommu(struct device *dev, int size,
+				 unsigned long align_mask)
+{
+	unsigned long offset, flags;
+	unsigned long boundary_size;
+	unsigned long base_index;
+
+	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
+			   PAGE_SIZE) >> PAGE_SHIFT;
+	boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
+			      PAGE_SIZE) >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
+				  size, base_index, boundary_size, align_mask);
+	if (offset == -1) {
+		need_flush = true;
+		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
+					  size, base_index, boundary_size,
+					  align_mask);
+	}
+	if (offset != -1) {
+		next_bit = offset+size;
+		if (next_bit >= iommu_pages) {
+			next_bit = 0;
+			need_flush = true;
+		}
+	}
+	if (iommu_fullflush)
+		need_flush = true;
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+
+	return offset;
+}
+
+static void free_iommu(unsigned long offset, int size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	bitmap_clear(iommu_gart_bitmap, offset, size);
+	if (offset >= next_bit)
+		next_bit = offset + size;
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+}
+
+/*
+ * Use global flush state to avoid races with multiple flushers.
+ */
+static void flush_gart(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu_bitmap_lock, flags);
+	if (need_flush) {
+		amd_flush_garts();
+		need_flush = false;
+	}
+	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
+}
+
+#ifdef CONFIG_IOMMU_LEAK
+/* Debugging aid for drivers that don't free their IOMMU tables */
+static int leak_trace;
+static int iommu_leak_pages = 20;
+
+static void dump_leak(void)
+{
+	static int dump;
+
+	if (dump)
+		return;
+	dump = 1;
+
+	show_stack(NULL, NULL);
+	debug_dma_dump_mappings(NULL);
+}
+#endif
+
+static void iommu_full(struct device *dev, size_t size, int dir)
+{
+	/*
+	 * Ran out of IOMMU space for this operation. This is very bad.
+	 * Unfortunately the drivers cannot handle this operation properly.
+	 * Return some non mapped prereserved space in the aperture and
+	 * let the Northbridge deal with it. This will result in garbage
+	 * in the IO operation. When the size exceeds the prereserved space
+	 * memory corruption will occur or random memory will be DMAed
+	 * out. Hopefully no network devices use single mappings that big.
+	 */
+
+	dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
+
+	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
+		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+			panic("PCI-DMA: Memory would be corrupted\n");
+		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
+			panic(KERN_ERR
+				"PCI-DMA: Random memory would be DMAed\n");
+	}
+#ifdef CONFIG_IOMMU_LEAK
+	dump_leak();
+#endif
+}
+
+static inline int
+need_iommu(struct device *dev, unsigned long addr, size_t size)
+{
+	return force_iommu || !dma_capable(dev, addr, size);
+}
+
+static inline int
+nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
+{
+	return !dma_capable(dev, addr, size);
+}
+
+/* Map a single continuous physical area into the IOMMU.
+ * Caller needs to check if the iommu is needed and flush.
+ */
+static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
+				size_t size, int dir, unsigned long align_mask)
+{
+	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
+	unsigned long iommu_page;
+	int i;
+
+	if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
+		return bad_dma_addr;
+
+	iommu_page = alloc_iommu(dev, npages, align_mask);
+	if (iommu_page == -1) {
+		if (!nonforced_iommu(dev, phys_mem, size))
+			return phys_mem;
+		if (panic_on_overflow)
+			panic("dma_map_area overflow %lu bytes\n", size);
+		iommu_full(dev, size, dir);
+		return bad_dma_addr;
+	}
+
+	for (i = 0; i < npages; i++) {
+		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
+		phys_mem += PAGE_SIZE;
+	}
+	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
+}
+
+/* Map a single area into the IOMMU */
+static dma_addr_t gart_map_page(struct device *dev, struct page *page,
+				unsigned long offset, size_t size,
+				enum dma_data_direction dir,
+				struct dma_attrs *attrs)
+{
+	unsigned long bus;
+	phys_addr_t paddr = page_to_phys(page) + offset;
+
+	if (!dev)
+		dev = &x86_dma_fallback_dev;
+
+	if (!need_iommu(dev, paddr, size))
+		return paddr;
+
+	bus = dma_map_area(dev, paddr, size, dir, 0);
+	flush_gart();
+
+	return bus;
+}
+
+/*
+ * Free a DMA mapping.
+ */
+static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
+			    size_t size, enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	unsigned long iommu_page;
+	int npages;
+	int i;
+
+	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
+	    dma_addr >= iommu_bus_base + iommu_size)
+		return;
+
+	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
+	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+	for (i = 0; i < npages; i++) {
+		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
+	}
+	free_iommu(iommu_page, npages);
+}
+
+/*
+ * Wrapper for pci_unmap_single working with scatterlists.
+ */
+static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+			  enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!s->dma_length || !s->length)
+			break;
+		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
+	}
+}
+
+/* Fallback for dma_map_sg in case of overflow */
+static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
+			       int nents, int dir)
+{
+	struct scatterlist *s;
+	int i;
+
+#ifdef CONFIG_IOMMU_DEBUG
+	pr_debug("dma_map_sg overflow\n");
+#endif
+
+	for_each_sg(sg, s, nents, i) {
+		unsigned long addr = sg_phys(s);
+
+		if (nonforced_iommu(dev, addr, s->length)) {
+			addr = dma_map_area(dev, addr, s->length, dir, 0);
+			if (addr == bad_dma_addr) {
+				if (i > 0)
+					gart_unmap_sg(dev, sg, i, dir, NULL);
+				nents = 0;
+				sg[0].dma_length = 0;
+				break;
+			}
+		}
+		s->dma_address = addr;
+		s->dma_length = s->length;
+	}
+	flush_gart();
+
+	return nents;
+}
+
+/* Map multiple scatterlist entries continuous into the first. */
+static int __dma_map_cont(struct device *dev, struct scatterlist *start,
+			  int nelems, struct scatterlist *sout,
+			  unsigned long pages)
+{
+	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
+	unsigned long iommu_page = iommu_start;
+	struct scatterlist *s;
+	int i;
+
+	if (iommu_start == -1)
+		return -1;
+
+	for_each_sg(start, s, nelems, i) {
+		unsigned long pages, addr;
+		unsigned long phys_addr = s->dma_address;
+
+		BUG_ON(s != start && s->offset);
+		if (s == start) {
+			sout->dma_address = iommu_bus_base;
+			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
+			sout->dma_length = s->length;
+		} else {
+			sout->dma_length += s->length;
+		}
+
+		addr = phys_addr;
+		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
+		while (pages--) {
+			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
+			addr += PAGE_SIZE;
+			iommu_page++;
+		}
+	}
+	BUG_ON(iommu_page - iommu_start != pages);
+
+	return 0;
+}
+
+static inline int
+dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
+	     struct scatterlist *sout, unsigned long pages, int need)
+{
+	if (!need) {
+		BUG_ON(nelems != 1);
+		sout->dma_address = start->dma_address;
+		sout->dma_length = start->length;
+		return 0;
+	}
+	return __dma_map_cont(dev, start, nelems, sout, pages);
+}
+
+/*
+ * DMA map all entries in a scatterlist.
+ * Merge chunks that have page aligned sizes into a continuous mapping.
+ */
+static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		       enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	struct scatterlist *s, *ps, *start_sg, *sgmap;
+	int need = 0, nextneed, i, out, start;
+	unsigned long pages = 0;
+	unsigned int seg_size;
+	unsigned int max_seg_size;
+
+	if (nents == 0)
+		return 0;
+
+	if (!dev)
+		dev = &x86_dma_fallback_dev;
+
+	out		= 0;
+	start		= 0;
+	start_sg	= sg;
+	sgmap		= sg;
+	seg_size	= 0;
+	max_seg_size	= dma_get_max_seg_size(dev);
+	ps		= NULL; /* shut up gcc */
+
+	for_each_sg(sg, s, nents, i) {
+		dma_addr_t addr = sg_phys(s);
+
+		s->dma_address = addr;
+		BUG_ON(s->length == 0);
+
+		nextneed = need_iommu(dev, addr, s->length);
+
+		/* Handle the previous not yet processed entries */
+		if (i > start) {
+			/*
+			 * Can only merge when the last chunk ends on a
+			 * page boundary and the new one doesn't have an
+			 * offset.
+			 */
+			if (!iommu_merge || !nextneed || !need || s->offset ||
+			    (s->length + seg_size > max_seg_size) ||
+			    (ps->offset + ps->length) % PAGE_SIZE) {
+				if (dma_map_cont(dev, start_sg, i - start,
+						 sgmap, pages, need) < 0)
+					goto error;
+				out++;
+
+				seg_size	= 0;
+				sgmap		= sg_next(sgmap);
+				pages		= 0;
+				start		= i;
+				start_sg	= s;
+			}
+		}
+
+		seg_size += s->length;
+		need = nextneed;
+		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
+		ps = s;
+	}
+	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
+		goto error;
+	out++;
+	flush_gart();
+	if (out < nents) {
+		sgmap = sg_next(sgmap);
+		sgmap->dma_length = 0;
+	}
+	return out;
+
+error:
+	flush_gart();
+	gart_unmap_sg(dev, sg, out, dir, NULL);
+
+	/* When it was forced or merged try again in a dumb way */
+	if (force_iommu || iommu_merge) {
+		out = dma_map_sg_nonforce(dev, sg, nents, dir);
+		if (out > 0)
+			return out;
+	}
+	if (panic_on_overflow)
+		panic("dma_map_sg: overflow on %lu pages\n", pages);
+
+	iommu_full(dev, pages << PAGE_SHIFT, dir);
+	for_each_sg(sg, s, nents, i)
+		s->dma_address = bad_dma_addr;
+	return 0;
+}
+
+/* allocate and map a coherent mapping */
+static void *
+gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
+		    gfp_t flag)
+{
+	dma_addr_t paddr;
+	unsigned long align_mask;
+	struct page *page;
+
+	if (force_iommu && !(flag & GFP_DMA)) {
+		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
+		if (!page)
+			return NULL;
+
+		align_mask = (1UL << get_order(size)) - 1;
+		paddr = dma_map_area(dev, page_to_phys(page), size,
+				     DMA_BIDIRECTIONAL, align_mask);
+
+		flush_gart();
+		if (paddr != bad_dma_addr) {
+			*dma_addr = paddr;
+			return page_address(page);
+		}
+		__free_pages(page, get_order(size));
+	} else
+		return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
+
+	return NULL;
+}
+
+/* free a coherent mapping */
+static void
+gart_free_coherent(struct device *dev, size_t size, void *vaddr,
+		   dma_addr_t dma_addr)
+{
+	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
+	free_pages((unsigned long)vaddr, get_order(size));
+}
+
+static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return (dma_addr == bad_dma_addr);
+}
+
+static int no_agp;
+
+static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
+{
+	unsigned long a;
+
+	if (!iommu_size) {
+		iommu_size = aper_size;
+		if (!no_agp)
+			iommu_size /= 2;
+	}
+
+	a = aper + iommu_size;
+	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
+
+	if (iommu_size < 64*1024*1024) {
+		pr_warning(
+			"PCI-DMA: Warning: Small IOMMU %luMB."
+			" Consider increasing the AGP aperture in BIOS\n",
+				iommu_size >> 20);
+	}
+
+	return iommu_size;
+}
+
+static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
+{
+	unsigned aper_size = 0, aper_base_32, aper_order;
+	u64 aper_base;
+
+	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
+	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
+	aper_order = (aper_order >> 1) & 7;
+
+	aper_base = aper_base_32 & 0x7fff;
+	aper_base <<= 25;
+
+	aper_size = (32 * 1024 * 1024) << aper_order;
+	if (aper_base + aper_size > 0x100000000UL || !aper_size)
+		aper_base = 0;
+
+	*size = aper_size;
+	return aper_base;
+}
+
+static void enable_gart_translations(void)
+{
+	int i;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return;
+
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
+
+		enable_gart_translation(dev, __pa(agp_gatt_table));
+	}
+
+	/* Flush the GART-TLB to remove stale entries */
+	amd_flush_garts();
+}
+
+/*
+ * If fix_up_north_bridges is set, the north bridges have to be fixed up on
+ * resume in the same way as they are handled in gart_iommu_hole_init().
+ */
+static bool fix_up_north_bridges;
+static u32 aperture_order;
+static u32 aperture_alloc;
+
+void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
+{
+	fix_up_north_bridges = true;
+	aperture_order = aper_order;
+	aperture_alloc = aper_alloc;
+}
+
+static void gart_fixup_northbridges(void)
+{
+	int i;
+
+	if (!fix_up_north_bridges)
+		return;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return;
+
+	pr_info("PCI-DMA: Restoring GART aperture settings\n");
+
+	for (i = 0; i < amd_nb_num(); i++) {
+		struct pci_dev *dev = node_to_amd_nb(i)->misc;
+
+		/*
+		 * Don't enable translations just yet.  That is the next
+		 * step.  Restore the pre-suspend aperture settings.
+		 */
+		gart_set_size_and_enable(dev, aperture_order);
+		pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
+	}
+}
+
+static void gart_resume(void)
+{
+	pr_info("PCI-DMA: Resuming GART IOMMU\n");
+
+	gart_fixup_northbridges();
+
+	enable_gart_translations();
+}
+
+static struct syscore_ops gart_syscore_ops = {
+	.resume		= gart_resume,
+
+};
+
+/*
+ * Private Northbridge GATT initialization in case we cannot use the
+ * AGP driver for some reason.
+ */
+static __init int init_amd_gatt(struct agp_kern_info *info)
+{
+	unsigned aper_size, gatt_size, new_aper_size;
+	unsigned aper_base, new_aper_base;
+	struct pci_dev *dev;
+	void *gatt;
+	int i;
+
+	pr_info("PCI-DMA: Disabling AGP.\n");
+
+	aper_size = aper_base = info->aper_size = 0;
+	dev = NULL;
+	for (i = 0; i < amd_nb_num(); i++) {
+		dev = node_to_amd_nb(i)->misc;
+		new_aper_base = read_aperture(dev, &new_aper_size);
+		if (!new_aper_base)
+			goto nommu;
+
+		if (!aper_base) {
+			aper_size = new_aper_size;
+			aper_base = new_aper_base;
+		}
+		if (aper_size != new_aper_size || aper_base != new_aper_base)
+			goto nommu;
+	}
+	if (!aper_base)
+		goto nommu;
+
+	info->aper_base = aper_base;
+	info->aper_size = aper_size >> 20;
+
+	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
+	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(gatt_size));
+	if (!gatt)
+		panic("Cannot allocate GATT table");
+	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
+		panic("Could not set GART PTEs to uncacheable pages");
+
+	agp_gatt_table = gatt;
+
+	register_syscore_ops(&gart_syscore_ops);
+
+	flush_gart();
+
+	pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
+	       aper_base, aper_size>>10);
+
+	return 0;
+
+ nommu:
+	/* Should not happen anymore */
+	pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
+	       "falling back to iommu=soft.\n");
+	return -1;
+}
+
+static struct dma_map_ops gart_dma_ops = {
+	.map_sg				= gart_map_sg,
+	.unmap_sg			= gart_unmap_sg,
+	.map_page			= gart_map_page,
+	.unmap_page			= gart_unmap_page,
+	.alloc_coherent			= gart_alloc_coherent,
+	.free_coherent			= gart_free_coherent,
+	.mapping_error			= gart_mapping_error,
+};
+
+static void gart_iommu_shutdown(void)
+{
+	struct pci_dev *dev;
+	int i;
+
+	/* don't shutdown it if there is AGP installed */
+	if (!no_agp)
+		return;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return;
+
+	for (i = 0; i < amd_nb_num(); i++) {
+		u32 ctl;
+
+		dev = node_to_amd_nb(i)->misc;
+		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
+
+		ctl &= ~GARTEN;
+
+		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
+	}
+}
+
+int __init gart_iommu_init(void)
+{
+	struct agp_kern_info info;
+	unsigned long iommu_start;
+	unsigned long aper_base, aper_size;
+	unsigned long start_pfn, end_pfn;
+	unsigned long scratch;
+	long i;
+
+	if (!amd_nb_has_feature(AMD_NB_GART))
+		return 0;
+
+#ifndef CONFIG_AGP_AMD64
+	no_agp = 1;
+#else
+	/* Makefile puts PCI initialization via subsys_initcall first. */
+	/* Add other AMD AGP bridge drivers here */
+	no_agp = no_agp ||
+		(agp_amd64_init() < 0) ||
+		(agp_copy_info(agp_bridge, &info) < 0);
+#endif
+
+	if (no_iommu ||
+	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
+	    !gart_iommu_aperture ||
+	    (no_agp && init_amd_gatt(&info) < 0)) {
+		if (max_pfn > MAX_DMA32_PFN) {
+			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
+			pr_warning("falling back to iommu=soft.\n");
+		}
+		return 0;
+	}
+
+	/* need to map that range */
+	aper_size	= info.aper_size << 20;
+	aper_base	= info.aper_base;
+	end_pfn		= (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+
+	if (end_pfn > max_low_pfn_mapped) {
+		start_pfn = (aper_base>>PAGE_SHIFT);
+		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+	}
+
+	pr_info("PCI-DMA: using GART IOMMU.\n");
+	iommu_size = check_iommu_size(info.aper_base, aper_size);
+	iommu_pages = iommu_size >> PAGE_SHIFT;
+
+	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+						      get_order(iommu_pages/8));
+	if (!iommu_gart_bitmap)
+		panic("Cannot allocate iommu bitmap\n");
+
+#ifdef CONFIG_IOMMU_LEAK
+	if (leak_trace) {
+		int ret;
+
+		ret = dma_debug_resize_entries(iommu_pages);
+		if (ret)
+			pr_debug("PCI-DMA: Cannot trace all the entries\n");
+	}
+#endif
+
+	/*
+	 * Out of IOMMU space handling.
+	 * Reserve some invalid pages at the beginning of the GART.
+	 */
+	bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+
+	pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
+	       iommu_size >> 20);
+
+	agp_memory_reserved	= iommu_size;
+	iommu_start		= aper_size - iommu_size;
+	iommu_bus_base		= info.aper_base + iommu_start;
+	bad_dma_addr		= iommu_bus_base;
+	iommu_gatt_base		= agp_gatt_table + (iommu_start>>PAGE_SHIFT);
+
+	/*
+	 * Unmap the IOMMU part of the GART. The alias of the page is
+	 * always mapped with cache enabled and there is no full cache
+	 * coherency across the GART remapping. The unmapping avoids
+	 * automatic prefetches from the CPU allocating cache lines in
+	 * there. All CPU accesses are done via the direct mapping to
+	 * the backing memory. The GART address is only used by PCI
+	 * devices.
+	 */
+	set_memory_np((unsigned long)__va(iommu_bus_base),
+				iommu_size >> PAGE_SHIFT);
+	/*
+	 * Tricky. The GART table remaps the physical memory range,
+	 * so the CPU wont notice potential aliases and if the memory
+	 * is remapped to UC later on, we might surprise the PCI devices
+	 * with a stray writeout of a cacheline. So play it sure and
+	 * do an explicit, full-scale wbinvd() _after_ having marked all
+	 * the pages as Not-Present:
+	 */
+	wbinvd();
+
+	/*
+	 * Now all caches are flushed and we can safely enable
+	 * GART hardware.  Doing it early leaves the possibility
+	 * of stale cache entries that can lead to GART PTE
+	 * errors.
+	 */
+	enable_gart_translations();
+
+	/*
+	 * Try to workaround a bug (thanks to BenH):
+	 * Set unmapped entries to a scratch page instead of 0.
+	 * Any prefetches that hit unmapped entries won't get an bus abort
+	 * then. (P2P bridge may be prefetching on DMA reads).
+	 */
+	scratch = get_zeroed_page(GFP_KERNEL);
+	if (!scratch)
+		panic("Cannot allocate iommu scratch page");
+	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
+	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
+		iommu_gatt_base[i] = gart_unmapped_entry;
+
+	flush_gart();
+	dma_ops = &gart_dma_ops;
+	x86_platform.iommu_shutdown = gart_iommu_shutdown;
+	swiotlb = 0;
+
+	return 0;
+}
+
+void __init gart_parse_options(char *p)
+{
+	int arg;
+
+#ifdef CONFIG_IOMMU_LEAK
+	if (!strncmp(p, "leak", 4)) {
+		leak_trace = 1;
+		p += 4;
+		if (*p == '=')
+			++p;
+		if (isdigit(*p) && get_option(&p, &arg))
+			iommu_leak_pages = arg;
+	}
+#endif
+	if (isdigit(*p) && get_option(&p, &arg))
+		iommu_size = arg;
+	if (!strncmp(p, "fullflush", 9))
+		iommu_fullflush = 1;
+	if (!strncmp(p, "nofullflush", 11))
+		iommu_fullflush = 0;
+	if (!strncmp(p, "noagp", 5))
+		no_agp = 1;
+	if (!strncmp(p, "noaperture", 10))
+		fix_aperture = 0;
+	/* duplicated from pci-dma.c */
+	if (!strncmp(p, "force", 5))
+		gart_iommu_aperture_allowed = 1;
+	if (!strncmp(p, "allowed", 7))
+		gart_iommu_aperture_allowed = 1;
+	if (!strncmp(p, "memaper", 7)) {
+		fallback_aper_force = 1;
+		p += 7;
+		if (*p == '=') {
+			++p;
+			if (get_option(&p, &arg))
+				fallback_aper_order = arg;
+		}
+	}
+}
+IOMMU_INIT_POST(gart_iommu_hole_init);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
deleted file mode 100644
index b117efd24f71..000000000000
--- a/arch/x86/kernel/pci-gart_64.c
+++ /dev/null
@@ -1,898 +0,0 @@
-/*
- * Dynamic DMA mapping support for AMD Hammer.
- *
- * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
- * This allows to use PCI devices that only support 32bit addresses on systems
- * with more than 4GB.
- *
- * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
- *
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Subject to the GNU General Public License v2 only.
- */
-
-#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/agp_backend.h>
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/topology.h>
-#include <linux/interrupt.h>
-#include <linux/bitmap.h>
-#include <linux/kdebug.h>
-#include <linux/scatterlist.h>
-#include <linux/iommu-helper.h>
-#include <linux/syscore_ops.h>
-#include <linux/io.h>
-#include <linux/gfp.h>
-#include <asm/atomic.h>
-#include <asm/mtrr.h>
-#include <asm/pgtable.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
-#include <asm/gart.h>
-#include <asm/cacheflush.h>
-#include <asm/swiotlb.h>
-#include <asm/dma.h>
-#include <asm/amd_nb.h>
-#include <asm/x86_init.h>
-#include <asm/iommu_table.h>
-
-static unsigned long iommu_bus_base;	/* GART remapping area (physical) */
-static unsigned long iommu_size;	/* size of remapping area bytes */
-static unsigned long iommu_pages;	/* .. and in pages */
-
-static u32 *iommu_gatt_base;		/* Remapping table */
-
-static dma_addr_t bad_dma_addr;
-
-/*
- * If this is disabled the IOMMU will use an optimized flushing strategy
- * of only flushing when an mapping is reused. With it true the GART is
- * flushed for every mapping. Problem is that doing the lazy flush seems
- * to trigger bugs with some popular PCI cards, in particular 3ware (but
- * has been also also seen with Qlogic at least).
- */
-static int iommu_fullflush = 1;
-
-/* Allocation bitmap for the remapping area: */
-static DEFINE_SPINLOCK(iommu_bitmap_lock);
-/* Guarded by iommu_bitmap_lock: */
-static unsigned long *iommu_gart_bitmap;
-
-static u32 gart_unmapped_entry;
-
-#define GPTE_VALID    1
-#define GPTE_COHERENT 2
-#define GPTE_ENCODE(x) \
-	(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
-#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-
-#define EMERGENCY_PAGES 32 /* = 128KB */
-
-#ifdef CONFIG_AGP
-#define AGPEXTERN extern
-#else
-#define AGPEXTERN
-#endif
-
-/* GART can only remap to physical addresses < 1TB */
-#define GART_MAX_PHYS_ADDR	(1ULL << 40)
-
-/* backdoor interface to AGP driver */
-AGPEXTERN int agp_memory_reserved;
-AGPEXTERN __u32 *agp_gatt_table;
-
-static unsigned long next_bit;  /* protected by iommu_bitmap_lock */
-static bool need_flush;		/* global flush state. set for each gart wrap */
-
-static unsigned long alloc_iommu(struct device *dev, int size,
-				 unsigned long align_mask)
-{
-	unsigned long offset, flags;
-	unsigned long boundary_size;
-	unsigned long base_index;
-
-	base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev),
-			   PAGE_SIZE) >> PAGE_SHIFT;
-	boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1,
-			      PAGE_SIZE) >> PAGE_SHIFT;
-
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
-				  size, base_index, boundary_size, align_mask);
-	if (offset == -1) {
-		need_flush = true;
-		offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
-					  size, base_index, boundary_size,
-					  align_mask);
-	}
-	if (offset != -1) {
-		next_bit = offset+size;
-		if (next_bit >= iommu_pages) {
-			next_bit = 0;
-			need_flush = true;
-		}
-	}
-	if (iommu_fullflush)
-		need_flush = true;
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-
-	return offset;
-}
-
-static void free_iommu(unsigned long offset, int size)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	bitmap_clear(iommu_gart_bitmap, offset, size);
-	if (offset >= next_bit)
-		next_bit = offset + size;
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
-
-/*
- * Use global flush state to avoid races with multiple flushers.
- */
-static void flush_gart(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	if (need_flush) {
-		amd_flush_garts();
-		need_flush = false;
-	}
-	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
-}
-
-#ifdef CONFIG_IOMMU_LEAK
-/* Debugging aid for drivers that don't free their IOMMU tables */
-static int leak_trace;
-static int iommu_leak_pages = 20;
-
-static void dump_leak(void)
-{
-	static int dump;
-
-	if (dump)
-		return;
-	dump = 1;
-
-	show_stack(NULL, NULL);
-	debug_dma_dump_mappings(NULL);
-}
-#endif
-
-static void iommu_full(struct device *dev, size_t size, int dir)
-{
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * Return some non mapped prereserved space in the aperture and
-	 * let the Northbridge deal with it. This will result in garbage
-	 * in the IO operation. When the size exceeds the prereserved space
-	 * memory corruption will occur or random memory will be DMAed
-	 * out. Hopefully no network devices use single mappings that big.
-	 */
-
-	dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size);
-
-	if (size > PAGE_SIZE*EMERGENCY_PAGES) {
-		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic(KERN_ERR
-				"PCI-DMA: Random memory would be DMAed\n");
-	}
-#ifdef CONFIG_IOMMU_LEAK
-	dump_leak();
-#endif
-}
-
-static inline int
-need_iommu(struct device *dev, unsigned long addr, size_t size)
-{
-	return force_iommu || !dma_capable(dev, addr, size);
-}
-
-static inline int
-nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
-{
-	return !dma_capable(dev, addr, size);
-}
-
-/* Map a single continuous physical area into the IOMMU.
- * Caller needs to check if the iommu is needed and flush.
- */
-static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
-				size_t size, int dir, unsigned long align_mask)
-{
-	unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE);
-	unsigned long iommu_page;
-	int i;
-
-	if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR))
-		return bad_dma_addr;
-
-	iommu_page = alloc_iommu(dev, npages, align_mask);
-	if (iommu_page == -1) {
-		if (!nonforced_iommu(dev, phys_mem, size))
-			return phys_mem;
-		if (panic_on_overflow)
-			panic("dma_map_area overflow %lu bytes\n", size);
-		iommu_full(dev, size, dir);
-		return bad_dma_addr;
-	}
-
-	for (i = 0; i < npages; i++) {
-		iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-		phys_mem += PAGE_SIZE;
-	}
-	return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
-}
-
-/* Map a single area into the IOMMU */
-static dma_addr_t gart_map_page(struct device *dev, struct page *page,
-				unsigned long offset, size_t size,
-				enum dma_data_direction dir,
-				struct dma_attrs *attrs)
-{
-	unsigned long bus;
-	phys_addr_t paddr = page_to_phys(page) + offset;
-
-	if (!dev)
-		dev = &x86_dma_fallback_dev;
-
-	if (!need_iommu(dev, paddr, size))
-		return paddr;
-
-	bus = dma_map_area(dev, paddr, size, dir, 0);
-	flush_gart();
-
-	return bus;
-}
-
-/*
- * Free a DMA mapping.
- */
-static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
-			    size_t size, enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
-{
-	unsigned long iommu_page;
-	int npages;
-	int i;
-
-	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
-	    dma_addr >= iommu_bus_base + iommu_size)
-		return;
-
-	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
-	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
-	for (i = 0; i < npages; i++) {
-		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-	}
-	free_iommu(iommu_page, npages);
-}
-
-/*
- * Wrapper for pci_unmap_single working with scatterlists.
- */
-static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-			  enum dma_data_direction dir, struct dma_attrs *attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nents, i) {
-		if (!s->dma_length || !s->length)
-			break;
-		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
-	}
-}
-
-/* Fallback for dma_map_sg in case of overflow */
-static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
-			       int nents, int dir)
-{
-	struct scatterlist *s;
-	int i;
-
-#ifdef CONFIG_IOMMU_DEBUG
-	pr_debug("dma_map_sg overflow\n");
-#endif
-
-	for_each_sg(sg, s, nents, i) {
-		unsigned long addr = sg_phys(s);
-
-		if (nonforced_iommu(dev, addr, s->length)) {
-			addr = dma_map_area(dev, addr, s->length, dir, 0);
-			if (addr == bad_dma_addr) {
-				if (i > 0)
-					gart_unmap_sg(dev, sg, i, dir, NULL);
-				nents = 0;
-				sg[0].dma_length = 0;
-				break;
-			}
-		}
-		s->dma_address = addr;
-		s->dma_length = s->length;
-	}
-	flush_gart();
-
-	return nents;
-}
-
-/* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct device *dev, struct scatterlist *start,
-			  int nelems, struct scatterlist *sout,
-			  unsigned long pages)
-{
-	unsigned long iommu_start = alloc_iommu(dev, pages, 0);
-	unsigned long iommu_page = iommu_start;
-	struct scatterlist *s;
-	int i;
-
-	if (iommu_start == -1)
-		return -1;
-
-	for_each_sg(start, s, nelems, i) {
-		unsigned long pages, addr;
-		unsigned long phys_addr = s->dma_address;
-
-		BUG_ON(s != start && s->offset);
-		if (s == start) {
-			sout->dma_address = iommu_bus_base;
-			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
-			sout->dma_length = s->length;
-		} else {
-			sout->dma_length += s->length;
-		}
-
-		addr = phys_addr;
-		pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
-		while (pages--) {
-			iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
-			addr += PAGE_SIZE;
-			iommu_page++;
-		}
-	}
-	BUG_ON(iommu_page - iommu_start != pages);
-
-	return 0;
-}
-
-static inline int
-dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
-	     struct scatterlist *sout, unsigned long pages, int need)
-{
-	if (!need) {
-		BUG_ON(nelems != 1);
-		sout->dma_address = start->dma_address;
-		sout->dma_length = start->length;
-		return 0;
-	}
-	return __dma_map_cont(dev, start, nelems, sout, pages);
-}
-
-/*
- * DMA map all entries in a scatterlist.
- * Merge chunks that have page aligned sizes into a continuous mapping.
- */
-static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		       enum dma_data_direction dir, struct dma_attrs *attrs)
-{
-	struct scatterlist *s, *ps, *start_sg, *sgmap;
-	int need = 0, nextneed, i, out, start;
-	unsigned long pages = 0;
-	unsigned int seg_size;
-	unsigned int max_seg_size;
-
-	if (nents == 0)
-		return 0;
-
-	if (!dev)
-		dev = &x86_dma_fallback_dev;
-
-	out		= 0;
-	start		= 0;
-	start_sg	= sg;
-	sgmap		= sg;
-	seg_size	= 0;
-	max_seg_size	= dma_get_max_seg_size(dev);
-	ps		= NULL; /* shut up gcc */
-
-	for_each_sg(sg, s, nents, i) {
-		dma_addr_t addr = sg_phys(s);
-
-		s->dma_address = addr;
-		BUG_ON(s->length == 0);
-
-		nextneed = need_iommu(dev, addr, s->length);
-
-		/* Handle the previous not yet processed entries */
-		if (i > start) {
-			/*
-			 * Can only merge when the last chunk ends on a
-			 * page boundary and the new one doesn't have an
-			 * offset.
-			 */
-			if (!iommu_merge || !nextneed || !need || s->offset ||
-			    (s->length + seg_size > max_seg_size) ||
-			    (ps->offset + ps->length) % PAGE_SIZE) {
-				if (dma_map_cont(dev, start_sg, i - start,
-						 sgmap, pages, need) < 0)
-					goto error;
-				out++;
-
-				seg_size	= 0;
-				sgmap		= sg_next(sgmap);
-				pages		= 0;
-				start		= i;
-				start_sg	= s;
-			}
-		}
-
-		seg_size += s->length;
-		need = nextneed;
-		pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE);
-		ps = s;
-	}
-	if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0)
-		goto error;
-	out++;
-	flush_gart();
-	if (out < nents) {
-		sgmap = sg_next(sgmap);
-		sgmap->dma_length = 0;
-	}
-	return out;
-
-error:
-	flush_gart();
-	gart_unmap_sg(dev, sg, out, dir, NULL);
-
-	/* When it was forced or merged try again in a dumb way */
-	if (force_iommu || iommu_merge) {
-		out = dma_map_sg_nonforce(dev, sg, nents, dir);
-		if (out > 0)
-			return out;
-	}
-	if (panic_on_overflow)
-		panic("dma_map_sg: overflow on %lu pages\n", pages);
-
-	iommu_full(dev, pages << PAGE_SHIFT, dir);
-	for_each_sg(sg, s, nents, i)
-		s->dma_address = bad_dma_addr;
-	return 0;
-}
-
-/* allocate and map a coherent mapping */
-static void *
-gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
-		    gfp_t flag)
-{
-	dma_addr_t paddr;
-	unsigned long align_mask;
-	struct page *page;
-
-	if (force_iommu && !(flag & GFP_DMA)) {
-		flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-		page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-		if (!page)
-			return NULL;
-
-		align_mask = (1UL << get_order(size)) - 1;
-		paddr = dma_map_area(dev, page_to_phys(page), size,
-				     DMA_BIDIRECTIONAL, align_mask);
-
-		flush_gart();
-		if (paddr != bad_dma_addr) {
-			*dma_addr = paddr;
-			return page_address(page);
-		}
-		__free_pages(page, get_order(size));
-	} else
-		return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
-
-	return NULL;
-}
-
-/* free a coherent mapping */
-static void
-gart_free_coherent(struct device *dev, size_t size, void *vaddr,
-		   dma_addr_t dma_addr)
-{
-	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
-	free_pages((unsigned long)vaddr, get_order(size));
-}
-
-static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return (dma_addr == bad_dma_addr);
-}
-
-static int no_agp;
-
-static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
-{
-	unsigned long a;
-
-	if (!iommu_size) {
-		iommu_size = aper_size;
-		if (!no_agp)
-			iommu_size /= 2;
-	}
-
-	a = aper + iommu_size;
-	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
-
-	if (iommu_size < 64*1024*1024) {
-		pr_warning(
-			"PCI-DMA: Warning: Small IOMMU %luMB."
-			" Consider increasing the AGP aperture in BIOS\n",
-				iommu_size >> 20);
-	}
-
-	return iommu_size;
-}
-
-static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
-{
-	unsigned aper_size = 0, aper_base_32, aper_order;
-	u64 aper_base;
-
-	pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32);
-	pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order);
-	aper_order = (aper_order >> 1) & 7;
-
-	aper_base = aper_base_32 & 0x7fff;
-	aper_base <<= 25;
-
-	aper_size = (32 * 1024 * 1024) << aper_order;
-	if (aper_base + aper_size > 0x100000000UL || !aper_size)
-		aper_base = 0;
-
-	*size = aper_size;
-	return aper_base;
-}
-
-static void enable_gart_translations(void)
-{
-	int i;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return;
-
-	for (i = 0; i < amd_nb_num(); i++) {
-		struct pci_dev *dev = node_to_amd_nb(i)->misc;
-
-		enable_gart_translation(dev, __pa(agp_gatt_table));
-	}
-
-	/* Flush the GART-TLB to remove stale entries */
-	amd_flush_garts();
-}
-
-/*
- * If fix_up_north_bridges is set, the north bridges have to be fixed up on
- * resume in the same way as they are handled in gart_iommu_hole_init().
- */
-static bool fix_up_north_bridges;
-static u32 aperture_order;
-static u32 aperture_alloc;
-
-void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
-{
-	fix_up_north_bridges = true;
-	aperture_order = aper_order;
-	aperture_alloc = aper_alloc;
-}
-
-static void gart_fixup_northbridges(void)
-{
-	int i;
-
-	if (!fix_up_north_bridges)
-		return;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return;
-
-	pr_info("PCI-DMA: Restoring GART aperture settings\n");
-
-	for (i = 0; i < amd_nb_num(); i++) {
-		struct pci_dev *dev = node_to_amd_nb(i)->misc;
-
-		/*
-		 * Don't enable translations just yet.  That is the next
-		 * step.  Restore the pre-suspend aperture settings.
-		 */
-		gart_set_size_and_enable(dev, aperture_order);
-		pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
-	}
-}
-
-static void gart_resume(void)
-{
-	pr_info("PCI-DMA: Resuming GART IOMMU\n");
-
-	gart_fixup_northbridges();
-
-	enable_gart_translations();
-}
-
-static struct syscore_ops gart_syscore_ops = {
-	.resume		= gart_resume,
-
-};
-
-/*
- * Private Northbridge GATT initialization in case we cannot use the
- * AGP driver for some reason.
- */
-static __init int init_amd_gatt(struct agp_kern_info *info)
-{
-	unsigned aper_size, gatt_size, new_aper_size;
-	unsigned aper_base, new_aper_base;
-	struct pci_dev *dev;
-	void *gatt;
-	int i;
-
-	pr_info("PCI-DMA: Disabling AGP.\n");
-
-	aper_size = aper_base = info->aper_size = 0;
-	dev = NULL;
-	for (i = 0; i < amd_nb_num(); i++) {
-		dev = node_to_amd_nb(i)->misc;
-		new_aper_base = read_aperture(dev, &new_aper_size);
-		if (!new_aper_base)
-			goto nommu;
-
-		if (!aper_base) {
-			aper_size = new_aper_size;
-			aper_base = new_aper_base;
-		}
-		if (aper_size != new_aper_size || aper_base != new_aper_base)
-			goto nommu;
-	}
-	if (!aper_base)
-		goto nommu;
-
-	info->aper_base = aper_base;
-	info->aper_size = aper_size >> 20;
-
-	gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
-	gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					get_order(gatt_size));
-	if (!gatt)
-		panic("Cannot allocate GATT table");
-	if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
-		panic("Could not set GART PTEs to uncacheable pages");
-
-	agp_gatt_table = gatt;
-
-	register_syscore_ops(&gart_syscore_ops);
-
-	flush_gart();
-
-	pr_info("PCI-DMA: aperture base @ %x size %u KB\n",
-	       aper_base, aper_size>>10);
-
-	return 0;
-
- nommu:
-	/* Should not happen anymore */
-	pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
-	       "falling back to iommu=soft.\n");
-	return -1;
-}
-
-static struct dma_map_ops gart_dma_ops = {
-	.map_sg				= gart_map_sg,
-	.unmap_sg			= gart_unmap_sg,
-	.map_page			= gart_map_page,
-	.unmap_page			= gart_unmap_page,
-	.alloc_coherent			= gart_alloc_coherent,
-	.free_coherent			= gart_free_coherent,
-	.mapping_error			= gart_mapping_error,
-};
-
-static void gart_iommu_shutdown(void)
-{
-	struct pci_dev *dev;
-	int i;
-
-	/* don't shutdown it if there is AGP installed */
-	if (!no_agp)
-		return;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return;
-
-	for (i = 0; i < amd_nb_num(); i++) {
-		u32 ctl;
-
-		dev = node_to_amd_nb(i)->misc;
-		pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
-
-		ctl &= ~GARTEN;
-
-		pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
-	}
-}
-
-int __init gart_iommu_init(void)
-{
-	struct agp_kern_info info;
-	unsigned long iommu_start;
-	unsigned long aper_base, aper_size;
-	unsigned long start_pfn, end_pfn;
-	unsigned long scratch;
-	long i;
-
-	if (!amd_nb_has_feature(AMD_NB_GART))
-		return 0;
-
-#ifndef CONFIG_AGP_AMD64
-	no_agp = 1;
-#else
-	/* Makefile puts PCI initialization via subsys_initcall first. */
-	/* Add other AMD AGP bridge drivers here */
-	no_agp = no_agp ||
-		(agp_amd64_init() < 0) ||
-		(agp_copy_info(agp_bridge, &info) < 0);
-#endif
-
-	if (no_iommu ||
-	    (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
-	    !gart_iommu_aperture ||
-	    (no_agp && init_amd_gatt(&info) < 0)) {
-		if (max_pfn > MAX_DMA32_PFN) {
-			pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
-			pr_warning("falling back to iommu=soft.\n");
-		}
-		return 0;
-	}
-
-	/* need to map that range */
-	aper_size	= info.aper_size << 20;
-	aper_base	= info.aper_base;
-	end_pfn		= (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
-
-	if (end_pfn > max_low_pfn_mapped) {
-		start_pfn = (aper_base>>PAGE_SHIFT);
-		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
-	}
-
-	pr_info("PCI-DMA: using GART IOMMU.\n");
-	iommu_size = check_iommu_size(info.aper_base, aper_size);
-	iommu_pages = iommu_size >> PAGE_SHIFT;
-
-	iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-						      get_order(iommu_pages/8));
-	if (!iommu_gart_bitmap)
-		panic("Cannot allocate iommu bitmap\n");
-
-#ifdef CONFIG_IOMMU_LEAK
-	if (leak_trace) {
-		int ret;
-
-		ret = dma_debug_resize_entries(iommu_pages);
-		if (ret)
-			pr_debug("PCI-DMA: Cannot trace all the entries\n");
-	}
-#endif
-
-	/*
-	 * Out of IOMMU space handling.
-	 * Reserve some invalid pages at the beginning of the GART.
-	 */
-	bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
-
-	pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
-	       iommu_size >> 20);
-
-	agp_memory_reserved	= iommu_size;
-	iommu_start		= aper_size - iommu_size;
-	iommu_bus_base		= info.aper_base + iommu_start;
-	bad_dma_addr		= iommu_bus_base;
-	iommu_gatt_base		= agp_gatt_table + (iommu_start>>PAGE_SHIFT);
-
-	/*
-	 * Unmap the IOMMU part of the GART. The alias of the page is
-	 * always mapped with cache enabled and there is no full cache
-	 * coherency across the GART remapping. The unmapping avoids
-	 * automatic prefetches from the CPU allocating cache lines in
-	 * there. All CPU accesses are done via the direct mapping to
-	 * the backing memory. The GART address is only used by PCI
-	 * devices.
-	 */
-	set_memory_np((unsigned long)__va(iommu_bus_base),
-				iommu_size >> PAGE_SHIFT);
-	/*
-	 * Tricky. The GART table remaps the physical memory range,
-	 * so the CPU wont notice potential aliases and if the memory
-	 * is remapped to UC later on, we might surprise the PCI devices
-	 * with a stray writeout of a cacheline. So play it sure and
-	 * do an explicit, full-scale wbinvd() _after_ having marked all
-	 * the pages as Not-Present:
-	 */
-	wbinvd();
-
-	/*
-	 * Now all caches are flushed and we can safely enable
-	 * GART hardware.  Doing it early leaves the possibility
-	 * of stale cache entries that can lead to GART PTE
-	 * errors.
-	 */
-	enable_gart_translations();
-
-	/*
-	 * Try to workaround a bug (thanks to BenH):
-	 * Set unmapped entries to a scratch page instead of 0.
-	 * Any prefetches that hit unmapped entries won't get an bus abort
-	 * then. (P2P bridge may be prefetching on DMA reads).
-	 */
-	scratch = get_zeroed_page(GFP_KERNEL);
-	if (!scratch)
-		panic("Cannot allocate iommu scratch page");
-	gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
-	for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
-		iommu_gatt_base[i] = gart_unmapped_entry;
-
-	flush_gart();
-	dma_ops = &gart_dma_ops;
-	x86_platform.iommu_shutdown = gart_iommu_shutdown;
-	swiotlb = 0;
-
-	return 0;
-}
-
-void __init gart_parse_options(char *p)
-{
-	int arg;
-
-#ifdef CONFIG_IOMMU_LEAK
-	if (!strncmp(p, "leak", 4)) {
-		leak_trace = 1;
-		p += 4;
-		if (*p == '=')
-			++p;
-		if (isdigit(*p) && get_option(&p, &arg))
-			iommu_leak_pages = arg;
-	}
-#endif
-	if (isdigit(*p) && get_option(&p, &arg))
-		iommu_size = arg;
-	if (!strncmp(p, "fullflush", 9))
-		iommu_fullflush = 1;
-	if (!strncmp(p, "nofullflush", 11))
-		iommu_fullflush = 0;
-	if (!strncmp(p, "noagp", 5))
-		no_agp = 1;
-	if (!strncmp(p, "noaperture", 10))
-		fix_aperture = 0;
-	/* duplicated from pci-dma.c */
-	if (!strncmp(p, "force", 5))
-		gart_iommu_aperture_allowed = 1;
-	if (!strncmp(p, "allowed", 7))
-		gart_iommu_aperture_allowed = 1;
-	if (!strncmp(p, "memaper", 7)) {
-		fallback_aper_force = 1;
-		p += 7;
-		if (*p == '=') {
-			++p;
-			if (get_option(&p, &arg))
-				fallback_aper_order = arg;
-		}
-	}
-}
-IOMMU_INIT_POST(gart_iommu_hole_init);
-- 
cgit v1.2.3


From c54794d19e61472156e37263c074225574c80df1 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Tue, 28 Dec 2010 13:21:37 -0800
Subject: MIPS: Mask jump target in ftrace_dyn_arch_init_insns().

The current code is abusing the uasm interface by passing jump target
addresses with high bits set.  Mask the addresses to avoid annoying
messages at boot time.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Wu Zhangjin <wuzhangjin@gmail.com>
Patchwork: https://patchwork.linux-mips.org/patch/1922/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ftrace.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 94ca2b018af7..feb8021a305f 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -23,6 +23,7 @@
 
 #define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
 #define ADDR_MASK 0x03ffffff	/*  op_code|addr : 31...26|25 ....0 */
+#define JUMP_RANGE_MASK ((1UL << 28) - 1)
 
 #define INSN_NOP 0x00000000	/* nop */
 #define INSN_JAL(addr)	\
@@ -44,12 +45,12 @@ static inline void ftrace_dyn_arch_init_insns(void)
 
 	/* jal (ftrace_caller + 8), jump over the first two instruction */
 	buf = (u32 *)&insn_jal_ftrace_caller;
-	uasm_i_jal(&buf, (FTRACE_ADDR + 8));
+	uasm_i_jal(&buf, (FTRACE_ADDR + 8) & JUMP_RANGE_MASK);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* j ftrace_graph_caller */
 	buf = (u32 *)&insn_j_ftrace_graph_caller;
-	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller);
+	uasm_i_j(&buf, (unsigned long)ftrace_graph_caller & JUMP_RANGE_MASK);
 #endif
 }
 
-- 
cgit v1.2.3


From 71271aab8cbdeb9612761db3230fe8dadb9a01c3 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 10:50:38 +0200
Subject: MIPS: c-r4k: Fix GCC 4.6.0 build error

  CC      arch/mips/mm/c-r4k.o
arch/mips/mm/c-r4k.c: In function 'probe_scache':
arch/mips/mm/c-r4k.c:1078:6: error: variable 'tmp' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Older GCC versions didn't warn about the unused variable tmp because it was
getting initialized.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-r4k.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index b4923a75cb4b..71bddf8f7d25 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1075,7 +1075,6 @@ static int __cpuinit probe_scache(void)
 	unsigned long flags, addr, begin, end, pow2;
 	unsigned int config = read_c0_config();
 	struct cpuinfo_mips *c = &current_cpu_data;
-	int tmp;
 
 	if (config & CONF_SC)
 		return 0;
@@ -1108,7 +1107,6 @@ static int __cpuinit probe_scache(void)
 
 	/* Now search for the wrap around point. */
 	pow2 = (128 * 1024);
-	tmp = 0;
 	for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) {
 		cache_op(Index_Load_Tag_SD, addr);
 		__asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */
-- 
cgit v1.2.3


From 4a9040f451c32cd62971ecda1cb5bc4aed444c78 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 10:54:54 +0200
Subject: MIPS: tlbex: Fix GCC 4.6.0 build error

  CC      arch/mips/mm/tlbex.o
arch/mips/mm/tlbex.c: In function 'build_r4000_tlb_refill_handler':
arch/mips/mm/tlbex.c:1155:22: error: variable 'vmalloc_mode' set but not used [-Werror=unused-but-set-variable]
arch/mips/mm/tlbex.c:1154:28: error: variable 'htlb_info' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/tlbex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 5ef294fbb6e7..f5734c2c8097 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1151,8 +1151,8 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	struct uasm_reloc *r = relocs;
 	u32 *f;
 	unsigned int final_len;
-	struct mips_huge_tlb_info htlb_info;
-	enum vmalloc64_mode vmalloc_mode;
+	struct mips_huge_tlb_info htlb_info __maybe_unused;
+	enum vmalloc64_mode vmalloc_mode __maybe_unused;
 
 	memset(tlb_handler, 0, sizeof(tlb_handler));
 	memset(labels, 0, sizeof(labels));
-- 
cgit v1.2.3


From 6fd78fc1fa3ed1e70501c978c2d0bef94320252f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 11:00:44 +0200
Subject: MIPS: IP22: Fix GCC 4.6.0 build error

  CC      arch/mips/sgi-ip22/ip22-time.o
arch/mips/sgi-ip22/ip22-time.c: In function 'dosample':
arch/mips/sgi-ip22/ip22-time.c:35:10: error: variable 'lsb' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip22/ip22-time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index 603fc91c1030..1a94c9894188 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -32,7 +32,7 @@
 static unsigned long dosample(void)
 {
 	u32 ct0, ct1;
-	u8 msb, lsb;
+	u8 msb;
 
 	/* Start the counter. */
 	sgint->tcword = (SGINT_TCWORD_CNT2 | SGINT_TCWORD_CALL |
@@ -46,7 +46,7 @@ static unsigned long dosample(void)
 	/* Latch and spin until top byte of counter2 is zero */
 	do {
 		writeb(SGINT_TCWORD_CNT2 | SGINT_TCWORD_CLAT, &sgint->tcword);
-		lsb = readb(&sgint->tcnt2);
+		(void) readb(&sgint->tcnt2);
 		msb = readb(&sgint->tcnt2);
 		ct1 = read_c0_count();
 	} while (msb);
-- 
cgit v1.2.3


From 3be1afc8f64742552325d9f03c2b96339e822f9e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 11:06:49 +0200
Subject: MIPS: IP22: Fix GCC 4.6.0 build error

  CC      arch/mips/sgi-ip22/ip22-platform.o
arch/mips/sgi-ip22/ip22-platform.c: In function 'sgiseeq_devinit':
arch/mips/sgi-ip22/ip22-platform.c:135:15: error: variable 'tmp' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

While at it rename the variable to pbdma for readability; there is a
local variable tmp of different type being used in two nested blocks.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip22/ip22-platform.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c
index deddbf0ebe5c..698904daf901 100644
--- a/arch/mips/sgi-ip22/ip22-platform.c
+++ b/arch/mips/sgi-ip22/ip22-platform.c
@@ -132,7 +132,7 @@ static struct platform_device eth1_device = {
  */
 static int __init sgiseeq_devinit(void)
 {
-	unsigned int tmp;
+	unsigned int pbdma __maybe_unused;
 	int res, i;
 
 	eth0_pd.hpc = hpc3c0;
@@ -151,7 +151,7 @@ static int __init sgiseeq_devinit(void)
 
 	/* Second HPC is missing? */
 	if (ip22_is_fullhouse() ||
-	    get_dbe(tmp, (unsigned int *)&hpc3c1->pbdma[1]))
+	    get_dbe(pbdma, (unsigned int *)&hpc3c1->pbdma[1]))
 		return 0;
 
 	sgimc->giopar |= SGIMC_GIOPAR_MASTEREXP1 | SGIMC_GIOPAR_EXP164 |
-- 
cgit v1.2.3


From af3a1f6f4813907e143f87030cde67a9971db533 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 11:43:19 +0200
Subject: MIPS: Malta: Fix GCC 4.6.0 build error

  CC      arch/mips/mti-malta/malta-init.o
arch/mips/mti-malta/malta-init.c: In function 'prom_init':
arch/mips/mti-malta/malta-init.c:196:6: error: variable 'result' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mti-malta/malta-init.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index 414f0c99b196..31180c321a1a 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -193,8 +193,6 @@ extern struct plat_smp_ops msmtc_smp_ops;
 
 void __init prom_init(void)
 {
-	int result;
-
 	prom_argc = fw_arg0;
 	_prom_argv = (int *) fw_arg1;
 	_prom_envp = (int *) fw_arg2;
@@ -360,20 +358,14 @@ void __init prom_init(void)
 #ifdef CONFIG_SERIAL_8250_CONSOLE
 	console_config();
 #endif
-	/* Early detection of CMP support */
-	result = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
-
 #ifdef CONFIG_MIPS_CMP
-	if (result)
+	/* Early detection of CMP support */
+	if (gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ))
 		register_smp_ops(&cmp_smp_ops);
+	else
 #endif
 #ifdef CONFIG_MIPS_MT_SMP
-#ifdef CONFIG_MIPS_CMP
-	if (!result)
 		register_smp_ops(&vsmp_smp_ops);
-#else
-	register_smp_ops(&vsmp_smp_ops);
-#endif
 #endif
 #ifdef CONFIG_MIPS_MT_SMTC
 	register_smp_ops(&msmtc_smp_ops);
-- 
cgit v1.2.3


From 6be63bbbdab66b9185dc6f67c8b1bacb6f37f946 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 11:48:22 +0200
Subject: MIPS: Malta: Fix GCC 4.6.0 build error

  CC      arch/mips/mti-malta/malta-int.o
arch/mips/mti-malta/malta-int.c: In function 'mips_pcibios_iack':
arch/mips/mti-malta/malta-int.c:59:6: error: variable 'dummy' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mti-malta/malta-int.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 9027061f0ead..e85c977328da 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -56,7 +56,6 @@ static DEFINE_RAW_SPINLOCK(mips_irq_lock);
 static inline int mips_pcibios_iack(void)
 {
 	int irq;
-	u32 dummy;
 
 	/*
 	 * Determine highest priority pending interrupt by performing
@@ -83,7 +82,7 @@ static inline int mips_pcibios_iack(void)
 		BONITO_PCIMAP_CFG = 0x20000;
 
 		/* Flush Bonito register block */
-		dummy = BONITO_PCIMAP_CFG;
+		(void) BONITO_PCIMAP_CFG;
 		iob();    /* sync */
 
 		irq = __raw_readl((u32 *)_pcictrl_bonito_pcicfg);
-- 
cgit v1.2.3


From 11b9d0eca559d087f3d49282033f2865cceacedd Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 11:57:11 +0200
Subject: MIPS: SNI: Fix GCC 4.6.0 build error

  CC      arch/mips/sni/time.o
arch/mips/sni/time.c: In function 'dosample':
arch/mips/sni/time.c:98:19: error: variable 'lsb' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sni/time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index c76151b56568..0904d4d30cb3 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -95,7 +95,7 @@ static void __init sni_a20r_timer_setup(void)
 static __init unsigned long dosample(void)
 {
 	u32 ct0, ct1;
-	volatile u8 msb, lsb;
+	volatile u8 msb;
 
 	/* Start the counter. */
 	outb_p(0x34, 0x43);
@@ -108,7 +108,7 @@ static __init unsigned long dosample(void)
 	/* Latch and spin until top byte of counter0 is zero */
 	do {
 		outb(0x00, 0x43);
-		lsb = inb(0x40);
+		(void) inb(0x40);
 		msb = inb(0x40);
 		ct1 = read_c0_count();
 	} while (msb);
-- 
cgit v1.2.3


From 84d3b0dbac103fc1b3aff1e71cb723b5456a849c Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 12:09:51 +0200
Subject: MIPS: Jazz: Fix GCC 4.6.0 build error

  CC      arch/mips/jazz/jazzdma.o
arch/mips/jazz/jazzdma.c: In function 'vdma_remap':
arch/mips/jazz/jazzdma.c:214:20: error: variable 'npages' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/jazz/jazzdma.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 9ce9f64cb76f..2d8e447cb828 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(vdma_free);
  */
 int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size)
 {
-	int first, pages, npages;
+	int first, pages;
 
 	if (laddr > 0xffffff) {
 		if (vdma_debug)
@@ -228,8 +228,7 @@ int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size)
 		return -EINVAL;	/* invalid physical address */
 	}
 
-	npages = pages =
-	    (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1;
+	pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1;
 	first = laddr >> 12;
 	if (vdma_debug)
 		printk("vdma_remap: first=%x, pages=%x\n", first, pages);
-- 
cgit v1.2.3


From c87444af6fc853dd5571a830efff7e07c46a544e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 12:32:55 +0200
Subject: MIPS: Loongson: Fix GCC 2.6.0 build error.

  CC      arch/mips/loongson/common/env.o
arch/mips/loongson/common/env.c: In function 'prom_init_env':
arch/mips/loongson/common/env.c:50:12: error: variable 'ret' set but not used [-Werror=unused-but-set-variable]
arch/mips/loongson/common/env.c:51:12: error: variable 'ret' set but not used [-Werror=unused-but-set-variable]
arch/mips/loongson/common/env.c:52:12: error: variable 'ret' set but not used [-Werror=unused-but-set-variable]
arch/mips/loongson/common/env.c:53:12: error: variable 'ret' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/loongson/common/env.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c
index 11b193f848f8..d93830ad6113 100644
--- a/arch/mips/loongson/common/env.c
+++ b/arch/mips/loongson/common/env.c
@@ -29,9 +29,10 @@ unsigned long memsize, highmemsize;
 
 #define parse_even_earlier(res, option, p)				\
 do {									\
-	int ret;							\
+	unsigned int tmp __maybe_unused;				\
+									\
 	if (strncmp(option, (char *)p, strlen(option)) == 0)		\
-		ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \
+		tmp = strict_strtol((char *)p + strlen(option"="), 10, &res); \
 } while (0)
 
 void __init prom_init_env(void)
-- 
cgit v1.2.3


From 088a42acc4f0e28fc6d8b823cafb03a00ff61aec Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Tue, 29 Mar 2011 15:53:56 +0900
Subject: MIPS: MSP71xx: Fix typo in msp_per_irq_controller

  CC      arch/mips/pmc-sierra/msp71xx/msp_irq_per.o
arch/mips/pmc-sierra/msp71xx/msp_irq_per.c:101:2: error: expected identifier before '.' token
make[2]: *** [arch/mips/pmc-sierra/msp71xx/msp_irq_per.o] Error 1

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/2246/
Cc: linux-mips <linux-mips@linux-mips.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/pmc-sierra/msp71xx/msp_irq_per.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
index f9b9dcdfa9dd..98fd0099d964 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
@@ -97,7 +97,7 @@ static int msp_per_irq_set_affinity(struct irq_data *d,
 
 static struct irq_chip msp_per_irq_controller = {
 	.name = "MSP_PER",
-	.irq_enable = unmask_per_irq.
+	.irq_enable = unmask_per_irq,
 	.irq_disable = mask_per_irq,
 	.irq_ack = msp_per_irq_ack,
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 866d7f5622cf5830b085a4471e67d4ed9106cb2e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 29 Mar 2011 16:09:25 +0200
Subject: MIPS: MSP: Fix build error

Reported and original patch by Yoichi Yuasa <yuasa@linux-mips.org>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cevt-r4k.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cevt-r4k.h b/arch/mips/include/asm/cevt-r4k.h
index fa4328f9124f..65f9bdd02f1f 100644
--- a/arch/mips/include/asm/cevt-r4k.h
+++ b/arch/mips/include/asm/cevt-r4k.h
@@ -14,6 +14,9 @@
 #ifndef __ASM_CEVT_R4K_H
 #define __ASM_CEVT_R4K_H
 
+#include <linux/clockchips.h>
+#include <asm/time.h>
+
 DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device);
 
 void mips_event_handler(struct clock_event_device *dev);
-- 
cgit v1.2.3


From f8bec75acdadd3a6597fe0acb5c3161b71cc2ea0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 29 Mar 2011 11:40:06 +0100
Subject: MIPS: Rename .data..mostly and properly handle it in linker script

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cache.h  | 2 +-
 arch/mips/kernel/vmlinux.lds.S | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h
index 650ac9ba734c..b4db69fbc40c 100644
--- a/arch/mips/include/asm/cache.h
+++ b/arch/mips/include/asm/cache.h
@@ -17,6 +17,6 @@
 #define SMP_CACHE_SHIFT		L1_CACHE_SHIFT
 #define SMP_CACHE_BYTES		L1_CACHE_BYTES
 
-#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #endif /* _ASM_CACHE_H */
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 832afbb87588..e4b0b0bec039 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -74,6 +74,7 @@ SECTIONS
 		INIT_TASK_DATA(PAGE_SIZE)
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
+		READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
 		DATA_DATA
 		CONSTRUCTORS
 	}
-- 
cgit v1.2.3


From e3fb3f27a7600982478e1ec415bf265c744d2ae4 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 17 Feb 2011 14:04:33 -0800
Subject: MIPS: Octeon: Cleanup Kconfig IRQ_CPU* symbols.

Octeon doesn't use IRQ_CPU, so don't select it.

IRQ_CPU_OCTEON is a completely unused symbol, remove it completely.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2086/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8e256cc5dcd9..351c80fbba7e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -997,9 +997,6 @@ config IRQ_GT641XX
 config IRQ_GIC
 	bool
 
-config IRQ_CPU_OCTEON
-	bool
-
 config MIPS_BOARDS_GEN
 	bool
 
@@ -1359,8 +1356,6 @@ config CPU_SB1
 config CPU_CAVIUM_OCTEON
 	bool "Cavium Octeon processor"
 	depends on SYS_HAS_CPU_CAVIUM_OCTEON
-	select IRQ_CPU
-	select IRQ_CPU_OCTEON
 	select CPU_HAS_PREFETCH
 	select CPU_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_SMP
-- 
cgit v1.2.3


From 23a271ecdf463e5b0198f78b0a0d5763598972b1 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 17 Feb 2011 18:23:32 -0800
Subject: MIPS: Octeon: Guard the Kconfig body with CPU_CAVIUM_OCTEON

Instead of making each Octeon specific option depend on
CPU_CAVIUM_OCTEON, gate the body of the entire file with
CPU_CAVIUM_OCTEON.  With this change, CAVIUM_OCTEON_SPECIFIC_OPTIONS
becomes useless, so get rid of it as well.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2091/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/Kconfig | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index caae22858163..cad555ebeca3 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -1,11 +1,7 @@
-config CAVIUM_OCTEON_SPECIFIC_OPTIONS
-	bool "Enable Octeon specific options"
-	depends on CPU_CAVIUM_OCTEON
-	default "y"
+if CPU_CAVIUM_OCTEON
 
 config CAVIUM_CN63XXP1
 	bool "Enable CN63XXP1 errata worarounds"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "n"
 	help
 	  The CN63XXP1 chip requires build time workarounds to
@@ -16,7 +12,6 @@ config CAVIUM_CN63XXP1
 
 config CAVIUM_OCTEON_2ND_KERNEL
 	bool "Build the kernel to be used as a 2nd kernel on the same chip"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "n"
 	help
 	  This option configures this kernel to be linked at a different
@@ -26,7 +21,6 @@ config CAVIUM_OCTEON_2ND_KERNEL
 
 config CAVIUM_OCTEON_HW_FIX_UNALIGNED
 	bool "Enable hardware fixups of unaligned loads and stores"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "y"
 	help
 	  Configure the Octeon hardware to automatically fix unaligned loads
@@ -38,7 +32,6 @@ config CAVIUM_OCTEON_HW_FIX_UNALIGNED
 
 config CAVIUM_OCTEON_CVMSEG_SIZE
 	int "Number of L1 cache lines reserved for CVMSEG memory"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	range 0 54
 	default 1
 	help
@@ -50,7 +43,6 @@ config CAVIUM_OCTEON_CVMSEG_SIZE
 
 config CAVIUM_OCTEON_LOCK_L2
 	bool "Lock often used kernel code in the L2"
-	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
 	default "y"
 	help
 	  Enable locking parts of the kernel into the L2 cache.
@@ -93,7 +85,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_STATIC
-	depends on CPU_CAVIUM_OCTEON
 
 config CAVIUM_OCTEON_HELPER
 	def_bool y
@@ -107,6 +98,8 @@ config NEED_SG_DMA_LENGTH
 
 config SWIOTLB
 	def_bool y
-	depends on CPU_CAVIUM_OCTEON
 	select IOMMU_HELPER
 	select NEED_SG_DMA_LENGTH
+
+
+endif # CPU_CAVIUM_OCTEON
-- 
cgit v1.2.3


From 7da34c1dac0db934913d0e81d2fd548e4973a326 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Fri, 8 Apr 2011 14:32:15 +0200
Subject: MIPS: bcm63xx: Fix header_crc comment in bcm963xx_tag.h

The CRC32 actually includes the tag_version.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2275/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
index 32978d32561a..ed72e6a26b73 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
@@ -88,7 +88,7 @@ struct bcm_tag {
 	char kernel_crc[CRC_LEN];
 	/* 228-235: Unused at present */
 	char reserved1[8];
-	/* 236-239: CRC32 of header excluding tagVersion */
+	/* 236-239: CRC32 of header excluding last 20 bytes */
 	char header_crc[CRC_LEN];
 	/* 240-255: Unused at present */
 	char reserved2[16];
-- 
cgit v1.2.3


From a6ab5ca39404e04d46b1bae133cd059d84926a2d Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 11 Apr 2011 11:37:15 +0200
Subject: MIPS: IP27: Fix GCC 4.6.0 build error.

  CC      arch/mips/sgi-ip27/ip27-hubio.o
arch/mips/sgi-ip27/ip27-hubio.c: In function 'hub_pio_map':
arch/mips/sgi-ip27/ip27-hubio.c:32:20: error: variable 'junk' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip27/ip27-klnuma.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip27/ip27-klnuma.c b/arch/mips/sgi-ip27/ip27-klnuma.c
index c3d30a88daf3..1d1919a44e88 100644
--- a/arch/mips/sgi-ip27/ip27-klnuma.c
+++ b/arch/mips/sgi-ip27/ip27-klnuma.c
@@ -54,11 +54,8 @@ void __init setup_replication_mask(void)
 
 static __init void set_ktext_source(nasid_t client_nasid, nasid_t server_nasid)
 {
-	cnodeid_t client_cnode;
 	kern_vars_t *kvp;
 
-	client_cnode = NASID_TO_COMPACT_NODEID(client_nasid);
-
 	kvp = &hub_data(client_nasid)->kern_vars;
 
 	KERN_VARS_ADDR(client_nasid) = (unsigned long)kvp;
-- 
cgit v1.2.3


From e12f47ef1680d8bd6449a8e4e98165d2590617eb Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 11 Apr 2011 11:48:31 +0200
Subject: MIPS: IP27: Fix GCC 4.6.0 build error.

  CC      arch/mips/sgi-ip27/ip27-hubio.o
arch/mips/sgi-ip27/ip27-hubio.c: In function 'hub_pio_map':
arch/mips/sgi-ip27/ip27-hubio.c:32:20: error: variable 'junk' set but not used [-Werror=unused-but-set-variable]
cc1: all warnings being treated as errors

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip27/ip27-hubio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip27/ip27-hubio.c b/arch/mips/sgi-ip27/ip27-hubio.c
index a1fa4abb3f6a..cd0d5b06cd83 100644
--- a/arch/mips/sgi-ip27/ip27-hubio.c
+++ b/arch/mips/sgi-ip27/ip27-hubio.c
@@ -29,7 +29,6 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 			  unsigned long xtalk_addr, size_t size)
 {
 	nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
-	volatile hubreg_t junk;
 	unsigned i;
 
 	/* use small-window mapping if possible */
@@ -64,7 +63,7 @@ unsigned long hub_pio_map(cnodeid_t cnode, xwidgetnum_t widget,
 		 * after we write it.
 		 */
 		IIO_ITTE_PUT(nasid, i, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr);
-		junk = HUB_L(IIO_ITTE_GET(nasid, i));
+		(void) HUB_L(IIO_ITTE_GET(nasid, i));
 
 		return NODE_BWIN_BASE(nasid, widget) + (xtalk_addr % BWIN_SIZE);
 	}
-- 
cgit v1.2.3


From 8bdd51429da5aec173ab6f0e431b13ee6782a888 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 13 Apr 2011 20:50:46 +0200
Subject: MIPS: Document former use of timerfd(2) syscall number.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/scall32-o32.S | 2 +-
 arch/mips/kernel/scall64-64.S  | 2 +-
 arch/mips/kernel/scall64-n32.S | 2 +-
 arch/mips/kernel/scall64-o32.S | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f5468b38d4c..7f1377eb22d3 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -565,7 +565,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_ioprio_get		2	/* 4315 */
 	sys	sys_utimensat		4
 	sys	sys_signalfd		3
-	sys	sys_ni_syscall		0
+	sys	sys_ni_syscall		0	/* was timerfd */
 	sys	sys_eventfd		1
 	sys	sys_fallocate		6	/* 4320 */
 	sys	sys_timerfd_create	2
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a2e1fcbc41dc..7c0ef7f128bf 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -404,7 +404,7 @@ sys_call_table:
 	PTR	sys_ioprio_get
 	PTR	sys_utimensat			/* 5275 */
 	PTR	sys_signalfd
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	PTR	sys_timerfd_create		/* 5280 */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index b2c7624995b8..de6c5563beab 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -403,7 +403,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_ioprio_get
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd		/* 6280 */
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys_fallocate
 	PTR	sys_timerfd_create
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 049a9c8c49a0..b0541dda8830 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -522,7 +522,7 @@ sys_call_table:
 	PTR	sys_ioprio_get			/* 4315 */
 	PTR	compat_sys_utimensat
 	PTR	compat_sys_signalfd
-	PTR	sys_ni_syscall
+	PTR	sys_ni_syscall			/* was timerfd */
 	PTR	sys_eventfd
 	PTR	sys32_fallocate			/* 4320 */
 	PTR	sys_timerfd_create
-- 
cgit v1.2.3


From 403fbdff96057ad312b672408ec676782a802b74 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 13 Apr 2011 21:15:09 +0200
Subject: MIPS: Alchemy: Fix GCC 4.6.0 build error.

  CC      arch/mips/alchemy/devboards/db1x00/board_setup.o
arch/mips/alchemy/devboards/db1x00/board_setup.c: In function 'board_setup':
arch/mips/alchemy/devboards/db1x00/board_setup.c:130:6: error: variable 'pin_func' set but not used [-Werror=unused-but-set-variable]

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/devboards/db1x00/board_setup.c | 61 +++++++++++++-----------
 1 file changed, 33 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/devboards/db1x00/board_setup.c b/arch/mips/alchemy/devboards/db1x00/board_setup.c
index 05f120ff90f9..5c956fe8760f 100644
--- a/arch/mips/alchemy/devboards/db1x00/board_setup.c
+++ b/arch/mips/alchemy/devboards/db1x00/board_setup.c
@@ -127,13 +127,10 @@ const char *get_system_type(void)
 void __init board_setup(void)
 {
 	unsigned long bcsr1, bcsr2;
-	u32 pin_func;
 
 	bcsr1 = DB1000_BCSR_PHYS_ADDR;
 	bcsr2 = DB1000_BCSR_PHYS_ADDR + DB1000_BCSR_HEXLED_OFS;
 
-	pin_func = 0;
-
 #ifdef CONFIG_MIPS_DB1000
 	printk(KERN_INFO "AMD Alchemy Au1000/Db1000 Board\n");
 #endif
@@ -164,12 +161,16 @@ void __init board_setup(void)
 	/* Not valid for Au1550 */
 #if defined(CONFIG_IRDA) && \
    (defined(CONFIG_SOC_AU1000) || defined(CONFIG_SOC_AU1100))
-	/* Set IRFIRSEL instead of GPIO15 */
-	pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF;
-	au_writel(pin_func, SYS_PINFUNC);
-	/* Power off until the driver is in use */
-	bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK,
-				BCSR_RESETS_IRDA_MODE_OFF);
+	{
+		u32 pin_func;
+
+		/* Set IRFIRSEL instead of GPIO15 */
+		pin_func = au_readl(SYS_PINFUNC) | SYS_PF_IRF;
+		au_writel(pin_func, SYS_PINFUNC);
+		/* Power off until the driver is in use */
+		bcsr_mod(BCSR_RESETS, BCSR_RESETS_IRDA_MODE_MASK,
+			 BCSR_RESETS_IRDA_MODE_OFF);
+	}
 #endif
 	bcsr_write(BCSR_PCMCIA, 0);	/* turn off PCMCIA power */
 
@@ -177,31 +178,35 @@ void __init board_setup(void)
 	alchemy_gpio1_input_enable();
 
 #ifdef CONFIG_MIPS_MIRAGE
-	/* GPIO[20] is output */
-	alchemy_gpio_direction_output(20, 0);
+	{
+		u32 pin_func;
 
-	/* Set GPIO[210:208] instead of SSI_0 */
-	pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0;
+		/* GPIO[20] is output */
+		alchemy_gpio_direction_output(20, 0);
 
-	/* Set GPIO[215:211] for LEDs */
-	pin_func |= 5 << 2;
+		/* Set GPIO[210:208] instead of SSI_0 */
+		pin_func = au_readl(SYS_PINFUNC) | SYS_PF_S0;
 
-	/* Set GPIO[214:213] for more LEDs */
-	pin_func |= 5 << 12;
+		/* Set GPIO[215:211] for LEDs */
+		pin_func |= 5 << 2;
 
-	/* Set GPIO[207:200] instead of PCMCIA/LCD */
-	pin_func |= SYS_PF_LCD | SYS_PF_PC;
-	au_writel(pin_func, SYS_PINFUNC);
+		/* Set GPIO[214:213] for more LEDs */
+		pin_func |= 5 << 12;
 
-	/*
-	 * Enable speaker amplifier.  This should
-	 * be part of the audio driver.
-	 */
-	alchemy_gpio_direction_output(209, 1);
+		/* Set GPIO[207:200] instead of PCMCIA/LCD */
+		pin_func |= SYS_PF_LCD | SYS_PF_PC;
+		au_writel(pin_func, SYS_PINFUNC);
 
-	pm_power_off = mirage_power_off;
-	_machine_halt = mirage_power_off;
-	_machine_restart = (void(*)(char *))mips_softreset;
+		/*
+		 * Enable speaker amplifier.  This should
+		 * be part of the audio driver.
+		 */
+		alchemy_gpio_direction_output(209, 1);
+
+		pm_power_off = mirage_power_off;
+		_machine_halt = mirage_power_off;
+		_machine_restart = (void(*)(char *))mips_softreset;
+	}
 #endif
 
 #ifdef CONFIG_MIPS_BOSPORUS
-- 
cgit v1.2.3


From 893d20fbae483913250a5d8bd9b4ce861a3adf2a Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 13 Apr 2011 21:49:54 +0200
Subject: MIPS: Fix calc_vmlinuz_load_addr build warnings.

  HOSTCC  arch/mips/boot/compressed/calc_vmlinuz_load_addr
arch/mips/boot/compressed/calc_vmlinuz_load_addr.c: In function 'main':
arch/mips/boot/compressed/calc_vmlinuz_load_addr.c:35:2: warning: format '%llx' expects type 'long long unsigned int *', but argument 3 has type 'uint64_t *'
arch/mips/boot/compressed/calc_vmlinuz_load_addr.c:54:2: warning: format '%llx' expects type 'long long unsigned int', but argument 2 has type 'uint64_t'

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/boot/compressed/calc_vmlinuz_load_addr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
index 88c9d963be88..9a6243676e22 100644
--- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
+++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c
@@ -16,8 +16,8 @@
 
 int main(int argc, char *argv[])
 {
+	unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr;
 	struct stat sb;
-	uint64_t vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr;
 
 	if (argc != 3) {
 		fprintf(stderr, "Usage: %s <pathname> <vmlinux_load_addr>\n",
-- 
cgit v1.2.3


From b20bff02b21ac7b725fd09590d5724d306552529 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 13 Apr 2011 23:51:23 +0200
Subject: MIPS: Audit: Fix success success argument pass to audit_syscall_exit

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/ptrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d21c388c0116..584e6b55c865 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -540,8 +540,8 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 		secure_computing(regs->regs[2]);
 
 	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]),
-		                   regs->regs[2]);
+		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
+		                   -regs->regs[2]);
 
 	if (!(current->ptrace & PT_PTRACED))
 		goto out;
-- 
cgit v1.2.3


From f1b6a5054c5c5c1770863b781de9b721fc99c3e3 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 18 Apr 2011 11:16:42 +0100
Subject: MIPS: JZ4740: Fix GCC 4.6.0 build error.

  CC      arch/mips/jz4740/dma.o
arch/mips/jz4740/dma.c: In function 'jz4740_dma_chan_irq':
arch/mips/jz4740/dma.c:245:11: error: variable 'status' set but not used [-Werro
r=unused-but-set-variable]

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/jz4740/dma.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/jz4740/dma.c b/arch/mips/jz4740/dma.c
index 5ebe75a68350..d7feb898692c 100644
--- a/arch/mips/jz4740/dma.c
+++ b/arch/mips/jz4740/dma.c
@@ -242,9 +242,7 @@ EXPORT_SYMBOL_GPL(jz4740_dma_get_residue);
 
 static void jz4740_dma_chan_irq(struct jz4740_dma_chan *dma)
 {
-	uint32_t status;
-
-	status = jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id));
+	(void) jz4740_dma_read(JZ_REG_DMA_STATUS_CTRL(dma->id));
 
 	jz4740_dma_write_mask(JZ_REG_DMA_STATUS_CTRL(dma->id), 0,
 		JZ_DMA_STATUS_CTRL_ENABLE | JZ_DMA_STATUS_CTRL_TRANSFER_DONE);
-- 
cgit v1.2.3


From aa7ce1c3038814801c5d7712f7403b15fea5d77d Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 18 Apr 2011 11:19:32 +0100
Subject: MIPS: JZ4740: Export symbols to the watchdog driver module

  MODPOST 356 modules
ERROR: "jz4740_timer_disable_watchdog" [drivers/watchdog/jz4740_wdt.ko] undefine
d!
ERROR: "jz4740_timer_enable_watchdog" [drivers/watchdog/jz4740_wdt.ko] undefined
!
make[1]: *** [__modpost] Error 1

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/jz4740/timer.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c
index b2c015129055..654d5c3900b6 100644
--- a/arch/mips/jz4740/timer.c
+++ b/arch/mips/jz4740/timer.c
@@ -27,11 +27,13 @@ void jz4740_timer_enable_watchdog(void)
 {
 	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR);
 }
+EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog);
 
 void jz4740_timer_disable_watchdog(void)
 {
 	writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET);
 }
+EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog);
 
 void __init jz4740_timer_init(void)
 {
-- 
cgit v1.2.3


From 1e2bbde4afd97b8d6a3f1f6c7bf3b6a9d226ba2e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 31 Mar 2011 20:52:20 +0200
Subject: MIPS: JZ4740: Set one-shot feature flag for the clockevent

The code for supporting one-shot mode for the clockevent is already there,
only the feature flag was not set.  Setting the one-shot flag allows the
kernel to run in tickless mode.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2261/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/jz4740/time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index fe01678d94fd..eaa853a54af6 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -89,7 +89,7 @@ static int jz4740_clockevent_set_next(unsigned long evt,
 
 static struct clock_event_device jz4740_clockevent = {
 	.name = "jz4740-timer",
-	.features = CLOCK_EVT_FEAT_PERIODIC,
+	.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_next_event = jz4740_clockevent_set_next,
 	.set_mode = jz4740_clockevent_set_mode,
 	.rating = 200,
-- 
cgit v1.2.3


From f850548ef88e5ff9e40bae9e1a7140bef0653e6b Mon Sep 17 00:00:00 2001
From: Wu Zhangjin <wuzhangjin@gmail.com>
Date: Sun, 24 Apr 2011 05:56:59 +0800
Subject: MIPS: Hibernation: Fixes for PAGE_SIZE >= 64kb

PAGE_SIZE >= 64kb (1 << 16) is too big to be the immediate of the
addiu/daddiu instruction, so, use addu/daddu instruction instead.

The following compiling error is fixed:

AS      arch/mips/power/hibernate.o
arch/mips/power/hibernate.S: Assembler messages:
arch/mips/power/hibernate.S:38: Error: expression out of range
make[2]: *** [arch/mips/power/hibernate.o] Error 1
make[1]: *** [arch/mips/power] Error 2

Reported-by: Roman Mamedov <rm@romanrm.ru>
Signed-off-by: Wu Zhangjin <wuzhangjin@gmail.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2313/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/power/hibernate.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
index dbb5c7b4b70f..f8a751c03282 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate.S
@@ -35,7 +35,7 @@ LEAF(swsusp_arch_resume)
 0:
 	PTR_L t1, PBE_ADDRESS(t0)   /* source */
 	PTR_L t2, PBE_ORIG_ADDRESS(t0) /* destination */
-	PTR_ADDIU t3, t1, PAGE_SIZE
+	PTR_ADDU t3, t1, PAGE_SIZE
 1:
 	REG_L t8, (t1)
 	REG_S t8, (t2)
-- 
cgit v1.2.3


From 310f1303390758ee7688e350e117a7b50ba5fa05 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Wed, 27 Apr 2011 16:39:28 -0700
Subject: MIPS: Invalidate old TLB mappings when updating huge page PTEs.

Without this, stale Icache or TLB entries may be used.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
https://patchwork.linux-mips.org/patch/2318/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index f5e856015329..c565b7c3f0b5 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -70,6 +70,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
 					 unsigned long addr, pte_t *ptep)
 {
+	flush_tlb_mm(vma->vm_mm);
 }
 
 static inline int huge_pte_none(pte_t pte)
-- 
cgit v1.2.3


From 780914c3cf691a75a0b7fe89f4466eeff8058165 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sat, 7 May 2011 13:55:19 +0200
Subject: MIPS: Alchemy: fix xxs1500 build error

This fixes:
alchemy/xxs1500/init.c: In function 'prom_init':
alchemy/xxs1500/init.c:57:17: error: ignoring return value of 'kstrtoul', declared with attribute warn_unused_result

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
Cc: Linux-MIPS <linux-mips@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/2340/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/xxs1500/init.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c
index 15125c2fda7d..34a90a4bb6f4 100644
--- a/arch/mips/alchemy/xxs1500/init.c
+++ b/arch/mips/alchemy/xxs1500/init.c
@@ -51,10 +51,9 @@ void __init prom_init(void)
 	prom_init_cmdline();
 
 	memsize_str = prom_getenv("memsize");
-	if (!memsize_str)
+	if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize))
 		memsize = 0x04000000;
-	else
-		strict_strtoul(memsize_str, 0, &memsize);
+
 	add_memory_region(0, memsize, BOOT_MEM_RAM);
 }
 
-- 
cgit v1.2.3


From 9bbeacf52f66d165739a4bbe9c018d17493a74b5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 11 May 2011 13:06:13 +0200
Subject: kprobes, x86: Disable irqs during optimized callback

Disable irqs during optimized callback, so we dont miss any in-irq kprobes.

The following commands:

 # cd /debug/tracing/
 # echo "p mutex_unlock" >> kprobe_events
 # echo "p _raw_spin_lock" >> kprobe_events
 # echo "p smp_apic_timer_interrupt" >> ./kprobe_events
 # echo 1 > events/enable

Cause the optimized kprobes to be missed. None is missed
with the fix applied.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Link: http://lkml.kernel.org/r/20110511110613.GB2390@jolsa.brq.redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/kprobes.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c969fd9d1566..f1a6244d7d93 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 					 struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long flags;
 
 	/* This is possible if op is under delayed unoptimizing */
 	if (kprobe_disabled(&op->kp))
 		return;
 
-	preempt_disable();
+	local_irq_save(flags);
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(&op->kp);
 	} else {
@@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 		opt_pre_handler(&op->kp, regs);
 		__this_cpu_write(current_kprobe, NULL);
 	}
-	preempt_enable_no_resched();
+	local_irq_restore(flags);
 }
 
 static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
-- 
cgit v1.2.3


From c56b2ddd5ff4352cdb0df07eefba8068d043382e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 10 May 2011 16:56:46 +0200
Subject: omap: iommu: Return IRQ_HANDLED in fault handler when no fault
 occured

Commit d594f1f31afe13edd8c02f3854a65cc58cfb3b74 (omap: IOMMU: add
support to callback during fault handling) broke interrupt line sharing
between the OMAP3 ISP and its IOMMU. Because of this, every interrupt
generated by the OMAP3 ISP is handled by the IOMMU driver instead of
being passed to the OMAP3 ISP driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/plat-omap/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c
index 8a51fd58f656..34fc31ee9081 100644
--- a/arch/arm/plat-omap/iommu.c
+++ b/arch/arm/plat-omap/iommu.c
@@ -793,6 +793,8 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 	clk_enable(obj->clk);
 	errs = iommu_report_fault(obj, &da);
 	clk_disable(obj->clk);
+	if (errs == 0)
+		return IRQ_HANDLED;
 
 	/* Fault callback or TLB/PTE Dynamic loading */
 	if (obj->isr && !obj->isr(obj, da, errs, obj->isr_priv))
-- 
cgit v1.2.3


From f25f4f522a9d2e18595da9df0bf1b6f282040e08 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 26 Apr 2011 19:14:35 +0200
Subject: PM / AVR32: Use struct syscore_ops instead of sysdevs for PM

Convert some AVR32 architecture's code to using struct syscore_ops
objects for power management instead of sysdev classes and sysdevs.

This simplifies the code and reduces the kernel's memory footprint.
It also is necessary for removing sysdevs from the kernel entirely in
the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
---
 arch/avr32/mach-at32ap/intc.c | 38 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
index 21ce35f33aa5..3e3646186c9f 100644
--- a/arch/avr32/mach-at32ap/intc.c
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/io.h>
 
@@ -21,7 +21,6 @@
 struct intc {
 	void __iomem		*regs;
 	struct irq_chip		chip;
-	struct sys_device	sysdev;
 #ifdef CONFIG_PM
 	unsigned long		suspend_ipr;
 	unsigned long		saved_ipr[64];
@@ -146,9 +145,8 @@ void intc_set_suspend_handler(unsigned long offset)
 	intc0.suspend_ipr = offset;
 }
 
-static int intc_suspend(struct sys_device *sdev, pm_message_t state)
+static int intc_suspend(void)
 {
-	struct intc *intc = container_of(sdev, struct intc, sysdev);
 	int i;
 
 	if (unlikely(!irqs_disabled())) {
@@ -156,28 +154,25 @@ static int intc_suspend(struct sys_device *sdev, pm_message_t state)
 		return -EINVAL;
 	}
 
-	if (unlikely(!intc->suspend_ipr)) {
+	if (unlikely(!intc0.suspend_ipr)) {
 		pr_err("intc_suspend: suspend_ipr not initialized\n");
 		return -EINVAL;
 	}
 
 	for (i = 0; i < 64; i++) {
-		intc->saved_ipr[i] = intc_readl(intc, INTPR0 + 4 * i);
-		intc_writel(intc, INTPR0 + 4 * i, intc->suspend_ipr);
+		intc0.saved_ipr[i] = intc_readl(&intc0, INTPR0 + 4 * i);
+		intc_writel(&intc0, INTPR0 + 4 * i, intc0.suspend_ipr);
 	}
 
 	return 0;
 }
 
-static int intc_resume(struct sys_device *sdev)
+static int intc_resume(void)
 {
-	struct intc *intc = container_of(sdev, struct intc, sysdev);
 	int i;
 
-	WARN_ON(!irqs_disabled());
-
 	for (i = 0; i < 64; i++)
-		intc_writel(intc, INTPR0 + 4 * i, intc->saved_ipr[i]);
+		intc_writel(&intc0, INTPR0 + 4 * i, intc0.saved_ipr[i]);
 
 	return 0;
 }
@@ -186,27 +181,18 @@ static int intc_resume(struct sys_device *sdev)
 #define intc_resume	NULL
 #endif
 
-static struct sysdev_class intc_class = {
-	.name		= "intc",
+static struct syscore_ops intc_syscore_ops = {
 	.suspend	= intc_suspend,
 	.resume		= intc_resume,
 };
 
-static int __init intc_init_sysdev(void)
+static int __init intc_init_syscore(void)
 {
-	int ret;
-
-	ret = sysdev_class_register(&intc_class);
-	if (ret)
-		return ret;
+	register_syscore_ops(&intc_syscore_ops);
 
-	intc0.sysdev.id = 0;
-	intc0.sysdev.cls = &intc_class;
-	ret = sysdev_register(&intc0.sysdev);
-
-	return ret;
+	return 0;
 }
-device_initcall(intc_init_sysdev);
+device_initcall(intc_init_syscore);
 
 unsigned long intc_get_pending(unsigned int group)
 {
-- 
cgit v1.2.3


From f98bf4aa39ecce96020631cba910be094c87ac3c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 26 Apr 2011 19:14:47 +0200
Subject: PM / UNICORE32: Use struct syscore_ops instead of sysdevs for PM

Make some UNICORE32 architecture's code use struct syscore_ops
objects for power management instead of sysdev classes and sysdevs.

This simplifies the code and reduces the kernel's memory footprint.
It also is necessary for removing sysdevs from the kernel entirely in
the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Guan Xuetao <gxt@mprc.pku.edu.cn>
---
 arch/unicore32/kernel/irq.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c
index 2aa30a364bbe..d4efa7d679ff 100644
--- a/arch/unicore32/kernel/irq.c
+++ b/arch/unicore32/kernel/irq.c
@@ -23,7 +23,7 @@
 #include <linux/list.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 
 #include <asm/system.h>
@@ -237,7 +237,7 @@ static struct puv3_irq_state {
 	unsigned int	iccr;
 } puv3_irq_state;
 
-static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state)
+static int puv3_irq_suspend(void)
 {
 	struct puv3_irq_state *st = &puv3_irq_state;
 
@@ -265,7 +265,7 @@ static int puv3_irq_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int puv3_irq_resume(struct sys_device *dev)
+static void puv3_irq_resume(void)
 {
 	struct puv3_irq_state *st = &puv3_irq_state;
 
@@ -278,27 +278,20 @@ static int puv3_irq_resume(struct sys_device *dev)
 
 		writel(st->icmr, INTC_ICMR);
 	}
-	return 0;
 }
 
-static struct sysdev_class puv3_irq_sysclass = {
-	.name		= "pkunity-irq",
+static struct syscore_ops puv3_irq_syscore_ops = {
 	.suspend	= puv3_irq_suspend,
 	.resume		= puv3_irq_resume,
 };
 
-static struct sys_device puv3_irq_device = {
-	.id		= 0,
-	.cls		= &puv3_irq_sysclass,
-};
-
-static int __init puv3_irq_init_devicefs(void)
+static int __init puv3_irq_init_syscore(void)
 {
-	sysdev_class_register(&puv3_irq_sysclass);
-	return sysdev_register(&puv3_irq_device);
+	register_syscore_ops(&puv3_irq_syscore_ops);
+	return 0;
 }
 
-device_initcall(puv3_irq_init_devicefs);
+device_initcall(puv3_irq_init_syscore);
 
 void __init init_IRQ(void)
 {
-- 
cgit v1.2.3


From f5a592f7d74e38c5007876c731e6bf5580072e63 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 26 Apr 2011 19:14:57 +0200
Subject: PM / PowerPC: Use struct syscore_ops instead of sysdevs for PM

Make some PowerPC architecture's code use struct syscore_ops
objects for power management instead of sysdev classes and sysdevs.

This simplifies the code and reduces the kernel's memory footprint.
It also is necessary for removing sysdevs from the kernel entirely in
the future.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/include/asm/mpic.h        |  3 ---
 arch/powerpc/platforms/cell/spu_base.c | 28 +++++++++++++-------
 arch/powerpc/platforms/powermac/pic.c  | 42 ++++++++---------------------
 arch/powerpc/sysdev/ipic.c             | 36 +++++++------------------
 arch/powerpc/sysdev/mpic.c             | 48 ++++++++++++++++++----------------
 5 files changed, 64 insertions(+), 93 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 7005ee0b074d..49baddcdd14e 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -3,7 +3,6 @@
 #ifdef __KERNEL__
 
 #include <linux/irq.h>
-#include <linux/sysdev.h>
 #include <asm/dcr.h>
 #include <asm/msi_bitmap.h>
 
@@ -320,8 +319,6 @@ struct mpic
 	/* link */
 	struct mpic		*next;
 
-	struct sys_device	sysdev;
-
 #ifdef CONFIG_PM
 	struct mpic_irq_save	*save_data;
 #endif
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index acfaccea5f4f..3675da73623f 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
 #include <asm/spu_csa.h>
@@ -521,18 +522,8 @@ void spu_init_channels(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_init_channels);
 
-static int spu_shutdown(struct sys_device *sysdev)
-{
-	struct spu *spu = container_of(sysdev, struct spu, sysdev);
-
-	spu_free_irqs(spu);
-	spu_destroy_spu(spu);
-	return 0;
-}
-
 static struct sysdev_class spu_sysdev_class = {
 	.name = "spu",
-	.shutdown = spu_shutdown,
 };
 
 int spu_add_sysdev_attr(struct sysdev_attribute *attr)
@@ -797,6 +788,22 @@ static inline void crash_register_spus(struct list_head *list)
 }
 #endif
 
+static void spu_shutdown(void)
+{
+	struct spu *spu;
+
+	mutex_lock(&spu_full_list_mutex);
+	list_for_each_entry(spu, &spu_full_list, full_list) {
+		spu_free_irqs(spu);
+		spu_destroy_spu(spu);
+	}
+	mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+	.shutdown = spu_shutdown,
+};
+
 static int __init init_spu_base(void)
 {
 	int i, ret = 0;
@@ -830,6 +837,7 @@ static int __init init_spu_base(void)
 	crash_register_spus(&spu_full_list);
 	mutex_unlock(&spu_full_list_mutex);
 	spu_add_sysdev_attr(&attr_stat);
+	register_syscore_ops(&spu_syscore_ops);
 
 	spu_init_affinity();
 
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 023f24086a0a..7c18a1607d1c 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -21,7 +21,7 @@
 #include <linux/signal.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
 #include <linux/module.h>
@@ -677,7 +677,7 @@ not_found:
 	return viaint;
 }
 
-static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
+static int pmacpic_suspend(void)
 {
 	int viaint = pmacpic_find_viaint();
 
@@ -698,7 +698,7 @@ static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
         return 0;
 }
 
-static int pmacpic_resume(struct sys_device *sysdev)
+static void pmacpic_resume(void)
 {
 	int i;
 
@@ -709,39 +709,19 @@ static int pmacpic_resume(struct sys_device *sysdev)
 	for (i = 0; i < max_real_irqs; ++i)
 		if (test_bit(i, sleep_save_mask))
 			pmac_unmask_irq(irq_get_irq_data(i));
-
-	return 0;
 }
 
-#endif /* CONFIG_PM && CONFIG_PPC32 */
-
-static struct sysdev_class pmacpic_sysclass = {
-	.name = "pmac_pic",
+static struct syscore_ops pmacpic_syscore_ops = {
+	.suspend	= pmacpic_suspend,
+	.resume		= pmacpic_resume,
 };
 
-static struct sys_device device_pmacpic = {
-	.id		= 0,
-	.cls		= &pmacpic_sysclass,
-};
-
-static struct sysdev_driver driver_pmacpic = {
-#if defined(CONFIG_PM) && defined(CONFIG_PPC32)
-	.suspend	= &pmacpic_suspend,
-	.resume		= &pmacpic_resume,
-#endif /* CONFIG_PM && CONFIG_PPC32 */
-};
-
-static int __init init_pmacpic_sysfs(void)
+static int __init init_pmacpic_syscore(void)
 {
-#ifdef CONFIG_PPC32
-	if (max_irqs == 0)
-		return -ENODEV;
-#endif
-	printk(KERN_DEBUG "Registering pmac pic with sysfs...\n");
-	sysdev_class_register(&pmacpic_sysclass);
-	sysdev_register(&device_pmacpic);
-	sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic);
+	register_syscore_ops(&pmacpic_syscore_ops);
 	return 0;
 }
-machine_subsys_initcall(powermac, init_pmacpic_sysfs);
 
+machine_subsys_initcall(powermac, init_pmacpic_syscore);
+
+#endif /* CONFIG_PM && CONFIG_PPC32 */
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index fa438be962b7..596554a8725e 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -18,7 +18,7 @@
 #include <linux/stddef.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/bootmem.h>
 #include <linux/spinlock.h>
@@ -902,7 +902,7 @@ static struct {
 	u32 sercr;
 } ipic_saved_state;
 
-static int ipic_suspend(struct sys_device *sdev, pm_message_t state)
+static int ipic_suspend(void)
 {
 	struct ipic *ipic = primary_ipic;
 
@@ -933,7 +933,7 @@ static int ipic_suspend(struct sys_device *sdev, pm_message_t state)
 	return 0;
 }
 
-static int ipic_resume(struct sys_device *sdev)
+static void ipic_resume(void)
 {
 	struct ipic *ipic = primary_ipic;
 
@@ -949,44 +949,26 @@ static int ipic_resume(struct sys_device *sdev)
 	ipic_write(ipic->regs, IPIC_SECNR, ipic_saved_state.secnr);
 	ipic_write(ipic->regs, IPIC_SERMR, ipic_saved_state.sermr);
 	ipic_write(ipic->regs, IPIC_SERCR, ipic_saved_state.sercr);
-
-	return 0;
 }
 #else
 #define ipic_suspend NULL
 #define ipic_resume NULL
 #endif
 
-static struct sysdev_class ipic_sysclass = {
-	.name = "ipic",
+static struct syscore_ops ipic_syscore_ops = {
 	.suspend = ipic_suspend,
 	.resume = ipic_resume,
 };
 
-static struct sys_device device_ipic = {
-	.id		= 0,
-	.cls		= &ipic_sysclass,
-};
-
-static int __init init_ipic_sysfs(void)
+static int __init init_ipic_syscore(void)
 {
-	int rc;
-
 	if (!primary_ipic || !primary_ipic->regs)
 		return -ENODEV;
-	printk(KERN_DEBUG "Registering ipic with sysfs...\n");
 
-	rc = sysdev_class_register(&ipic_sysclass);
-	if (rc) {
-		printk(KERN_ERR "Failed registering ipic sys class\n");
-		return -ENODEV;
-	}
-	rc = sysdev_register(&device_ipic);
-	if (rc) {
-		printk(KERN_ERR "Failed registering ipic sys device\n");
-		return -ENODEV;
-	}
+	printk(KERN_DEBUG "Registering ipic system core operations\n");
+	register_syscore_ops(&ipic_syscore_ops);
+
 	return 0;
 }
 
-subsys_initcall(init_ipic_sysfs);
+subsys_initcall(init_ipic_syscore);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f91c065bed5a..7e5dc8f4984a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -27,6 +27,7 @@
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/ptrace.h>
 #include <asm/signal.h>
@@ -1702,9 +1703,8 @@ void mpic_reset_core(int cpu)
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PM
-static int mpic_suspend(struct sys_device *dev, pm_message_t state)
+static void mpic_suspend_one(struct mpic *mpic)
 {
-	struct mpic *mpic = container_of(dev, struct mpic, sysdev);
 	int i;
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -1713,13 +1713,22 @@ static int mpic_suspend(struct sys_device *dev, pm_message_t state)
 		mpic->save_data[i].dest =
 			mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION));
 	}
+}
+
+static int mpic_suspend(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_suspend_one(mpic);
+		mpic = mpic->next;
+	}
 
 	return 0;
 }
 
-static int mpic_resume(struct sys_device *dev)
+static void mpic_resume_one(struct mpic *mpic)
 {
-	struct mpic *mpic = container_of(dev, struct mpic, sysdev);
 	int i;
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -1746,33 +1755,28 @@ static int mpic_resume(struct sys_device *dev)
 	}
 #endif
 	} /* end for loop */
+}
 
-	return 0;
+static void mpic_resume(void)
+{
+	struct mpic *mpic = mpics;
+
+	while (mpic) {
+		mpic_resume_one(mpic);
+		mpic = mpic->next;
+	}
 }
-#endif
 
-static struct sysdev_class mpic_sysclass = {
-#ifdef CONFIG_PM
+static struct syscore_ops mpic_syscore_ops = {
 	.resume = mpic_resume,
 	.suspend = mpic_suspend,
-#endif
-	.name = "mpic",
 };
 
 static int mpic_init_sys(void)
 {
-	struct mpic *mpic = mpics;
-	int error, id = 0;
-
-	error = sysdev_class_register(&mpic_sysclass);
-
-	while (mpic && !error) {
-		mpic->sysdev.cls = &mpic_sysclass;
-		mpic->sysdev.id = id++;
-		error = sysdev_register(&mpic->sysdev);
-		mpic = mpic->next;
-	}
-	return error;
+	register_syscore_ops(&mpic_syscore_ops);
+	return 0;
 }
 
 device_initcall(mpic_init_sys);
+#endif
-- 
cgit v1.2.3


From 2e711c04dbbf7a7732a3f7073b1fc285d12b369d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 26 Apr 2011 19:15:07 +0200
Subject: PM: Remove sysdev suspend, resume and shutdown operations

Since suspend, resume and shutdown operations in struct sysdev_class
and struct sysdev_driver are not used any more, remove them.  Also
drop sysdev_suspend(), sysdev_resume() and sysdev_shutdown() used
for executing those operations and modify all of their users
accordingly.  This reduces kernel code size quite a bit and reduces
its complexity.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/sh/Kconfig          |   1 -
 arch/x86/Kconfig         |   1 -
 arch/x86/kernel/apm_32.c |   4 -
 drivers/base/Kconfig     |   7 --
 drivers/base/base.h      |   2 -
 drivers/base/sys.c       | 202 +----------------------------------------------
 drivers/xen/manage.c     |   8 +-
 include/linux/device.h   |   7 --
 include/linux/pm.h       |   8 --
 include/linux/sysdev.h   |  11 ---
 kernel/kexec.c           |   9 +--
 kernel/power/hibernate.c |  18 +----
 kernel/power/suspend.c   |   8 +-
 kernel/sys.c             |   3 -
 14 files changed, 7 insertions(+), 282 deletions(-)

(limited to 'arch')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4b89da248d17..bc439de48cd1 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,6 @@ config SUPERH
 	select RTC_LIB
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
-	select ARCH_NO_SYSDEV_OPS
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..140e254fe546 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -71,7 +71,6 @@ config X86
 	select GENERIC_IRQ_SHOW
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
-	select ARCH_NO_SYSDEV_OPS
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index adee12e0da1f..3bfa02235965 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1238,7 +1238,6 @@ static int suspend(int vetoable)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 
 	local_irq_enable();
@@ -1258,7 +1257,6 @@ static int suspend(int vetoable)
 	err = (err == APM_SUCCESS) ? 0 : -EIO;
 
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
@@ -1282,7 +1280,6 @@ static void standby(void)
 	dpm_suspend_noirq(PMSG_SUSPEND);
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_SUSPEND);
 	syscore_suspend();
 	local_irq_enable();
 
@@ -1292,7 +1289,6 @@ static void standby(void)
 
 	local_irq_disable();
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 
 	dpm_resume_noirq(PMSG_RESUME);
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index e9e5238f3106..d57e8d0fb823 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -168,11 +168,4 @@ config SYS_HYPERVISOR
 	bool
 	default n
 
-config ARCH_NO_SYSDEV_OPS
-	bool
-	---help---
-	  To be selected by architectures that don't use sysdev class or
-	  sysdev driver power management (suspend/resume) and shutdown
-	  operations.
-
 endmenu
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 19f49e41ce5d..a34dca0ad041 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -111,8 +111,6 @@ static inline int driver_match_device(struct device_driver *drv,
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
 
-extern void sysdev_shutdown(void);
-
 extern char *make_class_name(const char *name, struct kobject *kobj);
 
 extern int devres_release_all(struct device *dev);
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index acde9b5ee131..9dff77bfe1e3 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -328,203 +328,8 @@ void sysdev_unregister(struct sys_device *sysdev)
 	kobject_put(&sysdev->kobj);
 }
 
-
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/**
- *	sysdev_shutdown - Shut down all system devices.
- *
- *	Loop over each class of system devices, and the devices in each
- *	of those classes. For each device, we call the shutdown method for
- *	each driver registered for the device - the auxiliaries,
- *	and the class driver.
- *
- *	Note: The list is iterated in reverse order, so that we shut down
- *	child devices before we shut down their parents. The list ordering
- *	is guaranteed by virtue of the fact that child devices are registered
- *	after their parents.
- */
-void sysdev_shutdown(void)
-{
-	struct sysdev_class *cls;
-
-	pr_debug("Shutting Down System Devices\n");
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Shutting down type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			struct sysdev_driver *drv;
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->shutdown)
-					drv->shutdown(sysdev);
-			}
-
-			/* Now call the generic one */
-			if (cls->shutdown)
-				cls->shutdown(sysdev);
-		}
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-}
-
-static void __sysdev_resume(struct sys_device *dev)
-{
-	struct sysdev_class *cls = dev->cls;
-	struct sysdev_driver *drv;
-
-	/* First, call the class-specific one */
-	if (cls->resume)
-		cls->resume(dev);
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled after %pF\n", cls->resume);
-
-	/* Call auxiliary drivers next. */
-	list_for_each_entry(drv, &cls->drivers, entry) {
-		if (drv->resume)
-			drv->resume(dev);
-		WARN_ONCE(!irqs_disabled(),
-			"Interrupts enabled after %pF\n", drv->resume);
-	}
-}
-
-/**
- *	sysdev_suspend - Suspend all system devices.
- *	@state:		Power state to enter.
- *
- *	We perform an almost identical operation as sysdev_shutdown()
- *	above, though calling ->suspend() instead. Interrupts are disabled
- *	when this called. Devices are responsible for both saving state and
- *	quiescing or powering down the device.
- *
- *	This is only called by the device PM core, so we let them handle
- *	all synchronization.
- */
-int sysdev_suspend(pm_message_t state)
-{
-	struct sysdev_class *cls;
-	struct sys_device *sysdev, *err_dev;
-	struct sysdev_driver *drv, *err_drv;
-	int ret;
-
-	pr_debug("Checking wake-up interrupts\n");
-
-	/* Return error code if there are any wake-up interrupts pending */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while suspending system devices\n");
-
-	pr_debug("Suspending System Devices\n");
-
-	list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) {
-		pr_debug("Suspending type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			/* Call auxiliary drivers first */
-			list_for_each_entry(drv, &cls->drivers, entry) {
-				if (drv->suspend) {
-					ret = drv->suspend(sysdev, state);
-					if (ret)
-						goto aux_driver;
-				}
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					drv->suspend);
-			}
-
-			/* Now call the generic one */
-			if (cls->suspend) {
-				ret = cls->suspend(sysdev, state);
-				if (ret)
-					goto cls_driver;
-				WARN_ONCE(!irqs_disabled(),
-					"Interrupts enabled after %pF\n",
-					cls->suspend);
-			}
-		}
-	}
-	return 0;
-	/* resume current sysdev */
-cls_driver:
-	drv = NULL;
-	printk(KERN_ERR "Class suspend failed for %s: %d\n",
-		kobject_name(&sysdev->kobj), ret);
-
-aux_driver:
-	if (drv)
-		printk(KERN_ERR "Class driver suspend failed for %s: %d\n",
-				kobject_name(&sysdev->kobj), ret);
-	list_for_each_entry(err_drv, &cls->drivers, entry) {
-		if (err_drv == drv)
-			break;
-		if (err_drv->resume)
-			err_drv->resume(sysdev);
-	}
-
-	/* resume other sysdevs in current class */
-	list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-		if (err_dev == sysdev)
-			break;
-		pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-		__sysdev_resume(err_dev);
-	}
-
-	/* resume other classes */
-	list_for_each_entry_continue(cls, &system_kset->list, kset.kobj.entry) {
-		list_for_each_entry(err_dev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&err_dev->kobj));
-			__sysdev_resume(err_dev);
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sysdev_suspend);
-
-/**
- *	sysdev_resume - Bring system devices back to life.
- *
- *	Similar to sysdev_suspend(), but we iterate the list forwards
- *	to guarantee that parent devices are resumed before their children.
- *
- *	Note: Interrupts are disabled when called.
- */
-int sysdev_resume(void)
-{
-	struct sysdev_class *cls;
-
-	WARN_ONCE(!irqs_disabled(),
-		"Interrupts enabled while resuming system devices\n");
-
-	pr_debug("Resuming System Devices\n");
-
-	list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) {
-		struct sys_device *sysdev;
-
-		pr_debug("Resuming type '%s':\n",
-			 kobject_name(&cls->kset.kobj));
-
-		list_for_each_entry(sysdev, &cls->kset.list, kobj.entry) {
-			pr_debug(" %s\n", kobject_name(&sysdev->kobj));
-
-			__sysdev_resume(sysdev);
-		}
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sysdev_resume);
-#endif /* CONFIG_ARCH_NO_SYSDEV_OPS */
+EXPORT_SYMBOL_GPL(sysdev_register);
+EXPORT_SYMBOL_GPL(sysdev_unregister);
 
 int __init system_bus_init(void)
 {
@@ -534,9 +339,6 @@ int __init system_bus_init(void)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
 #define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
 
 ssize_t sysdev_store_ulong(struct sys_device *sysdev,
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index a2eee574784e..0b5366b5be20 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -70,12 +70,7 @@ static int xen_suspend(void *data)
 
 	BUG_ON(!irqs_disabled());
 
-	err = sysdev_suspend(PMSG_FREEZE);
-	if (!err) {
-		err = syscore_suspend();
-		if (err)
-			sysdev_resume();
-	}
+	err = syscore_suspend();
 	if (err) {
 		printk(KERN_ERR "xen_suspend: system core suspend failed: %d\n",
 			err);
@@ -102,7 +97,6 @@ static int xen_suspend(void *data)
 	}
 
 	syscore_resume();
-	sysdev_resume();
 
 	return 0;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..ea9db9b0f248 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -633,13 +633,6 @@ static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-/* drivers/base/sys.c */
-extern void sysdev_shutdown(void);
-#else
-static inline void sysdev_shutdown(void) { }
-#endif
-
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 512e09177e57..3c053e2beb84 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -529,14 +529,6 @@ struct dev_power_domain {
  */
 
 #ifdef CONFIG_PM_SLEEP
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-extern int sysdev_suspend(pm_message_t state);
-extern int sysdev_resume(void);
-#else
-static inline int sysdev_suspend(pm_message_t state) { return 0; }
-static inline int sysdev_resume(void) { return 0; }
-#endif
-
 extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index dfb078db8ebb..d35e783a598c 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -34,12 +34,6 @@ struct sysdev_class {
 	struct list_head	drivers;
 	struct sysdev_class_attribute **attrs;
 	struct kset		kset;
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	/* Default operations for these types of devices */
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 struct sysdev_class_attribute {
@@ -77,11 +71,6 @@ struct sysdev_driver {
 	struct list_head	entry;
 	int	(*add)(struct sys_device *);
 	int	(*remove)(struct sys_device *);
-#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
-	int	(*shutdown)(struct sys_device *);
-	int	(*suspend)(struct sys_device *, pm_message_t state);
-	int	(*resume)(struct sys_device *);
-#endif
 };
 
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 87b77de03dd3..8d814cbc8109 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1531,13 +1531,7 @@ int kernel_kexec(void)
 		if (error)
 			goto Enable_cpus;
 		local_irq_disable();
-		/* Suspend system devices */
-		error = sysdev_suspend(PMSG_FREEZE);
-		if (!error) {
-			error = syscore_suspend();
-			if (error)
-				sysdev_resume();
-		}
+		error = syscore_suspend();
 		if (error)
 			goto Enable_irqs;
 	} else
@@ -1553,7 +1547,6 @@ int kernel_kexec(void)
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
 		syscore_resume();
-		sysdev_resume();
  Enable_irqs:
 		local_irq_enable();
  Enable_cpus:
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 50aae660174d..554d3b049f35 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -272,12 +272,7 @@ static int create_image(int platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_FREEZE);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (error) {
 		printk(KERN_ERR "PM: Some system devices failed to power down, "
 			"aborting hibernation\n");
@@ -302,7 +297,6 @@ static int create_image(int platform_mode)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	/* NOTE:  dpm_resume_noirq() is just a resume() for devices
 	 * that suspended with irqs off ... no overall powerup.
 	 */
@@ -409,12 +403,7 @@ static int resume_target_kernel(bool platform_mode)
 
 	local_irq_disable();
 
-	error = sysdev_suspend(PMSG_QUIESCE);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (error)
 		goto Enable_irqs;
 
@@ -442,7 +431,6 @@ static int resume_target_kernel(bool platform_mode)
 	touch_softlockup_watchdog();
 
 	syscore_resume();
-	sysdev_resume();
 
  Enable_irqs:
 	local_irq_enable();
@@ -528,7 +516,6 @@ int hibernation_platform_enter(void)
 		goto Platform_finish;
 
 	local_irq_disable();
-	sysdev_suspend(PMSG_HIBERNATE);
 	syscore_suspend();
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
@@ -541,7 +528,6 @@ int hibernation_platform_enter(void)
 
  Power_up:
 	syscore_resume();
-	sysdev_resume();
 	local_irq_enable();
 	enable_nonboot_cpus();
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8935369d503a..732d77a957e7 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -163,19 +163,13 @@ static int suspend_enter(suspend_state_t state)
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
 
-	error = sysdev_suspend(PMSG_SUSPEND);
-	if (!error) {
-		error = syscore_suspend();
-		if (error)
-			sysdev_resume();
-	}
+	error = syscore_suspend();
 	if (!error) {
 		if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
 			error = suspend_ops->enter(state);
 			events_check_enabled = false;
 		}
 		syscore_resume();
-		sysdev_resume();
 	}
 
 	arch_suspend_enable_irqs();
diff --git a/kernel/sys.c b/kernel/sys.c
index af468edf096a..f0c10385f30c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -315,7 +315,6 @@ void kernel_restart_prepare(char *cmd)
 	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
-	sysdev_shutdown();
 	syscore_shutdown();
 }
 
@@ -354,7 +353,6 @@ static void kernel_shutdown_prepare(enum system_states state)
 void kernel_halt(void)
 {
 	kernel_shutdown_prepare(SYSTEM_HALT);
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "System halted.\n");
 	kmsg_dump(KMSG_DUMP_HALT);
@@ -374,7 +372,6 @@ void kernel_power_off(void)
 	if (pm_power_off_prepare)
 		pm_power_off_prepare();
 	disable_nonboot_cpus();
-	sysdev_shutdown();
 	syscore_shutdown();
 	printk(KERN_EMERG "Power down.\n");
 	kmsg_dump(KMSG_DUMP_POWEROFF);
-- 
cgit v1.2.3


From b1054282d752c5a026e2c0450616ebf37fc0413e Mon Sep 17 00:00:00 2001
From: Tkhai Kirill <tkhai@yandex.ru>
Date: Tue, 10 May 2011 02:31:41 +0000
Subject: sparc32: Fixed unaligned memory copying in function
 __csum_partial_copy_sparc_generic

When we are in the label cc_dword_align, registers %o0 and %o1 have the same last 2 bits,
but it's not guaranteed one of them is zero. So we can get unaligned memory access
in label ccte. Example of parameters which lead to this:
%o0=0x7ff183e9, %o1=0x8e709e7d, %g1=3

With the parameters I had a memory corruption, when the additional 5 bytes were rewritten.
This patch corrects the error.

One comment to the patch. We don't care about the third bit in %o1, because cc_end_cruft
stores word or less.

Signed-off-by: Tkhai Kirill <tkhai@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/lib/checksum_32.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 3632cb34e914..0084c3361e15 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -289,10 +289,16 @@ cc_end_cruft:
 
 	/* Also, handle the alignment code out of band. */
 cc_dword_align:
-	cmp	%g1, 6
-	bl,a	ccte
+	cmp	%g1, 16
+	bge	1f
+	 srl	%g1, 1, %o3
+2:	cmp	%o3, 0
+	be,a	ccte
 	 andcc	%g1, 0xf, %o3
-	andcc	%o0, 0x1, %g0
+	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits)
+	be,a	2b
+	 srl	%o3, 1, %o3
+1:	andcc	%o0, 0x1, %g0
 	bne	ccslow
 	 andcc	%o0, 0x2, %g0
 	be	1f
-- 
cgit v1.2.3


From 9af386c8dc5a9dce56f36b484647ad6401758c85 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 28 Apr 2011 18:44:31 +0100
Subject: ARM: 6890/1: memmap: only free allocated memmap entries when using
 SPARSEMEM

The SPARSEMEM code allocates memmap entries only for sections which are
present (i.e. those which contain some valid memory). The membank checks
in free_unused_memmap do not take this into account and can incorrectly
attempt to free memory which is not allocated, resulting in a BUG() in
the bootmem code.

However, if memory is configured as follows:

    |<----section---->|<----hole---->|<----section---->|
    +--------+--------+--------------+--------+--------+
    | bank 0 | unused |              | bank 1 | unused |
    +--------+--------+--------------+--------+--------+

where a bank only occupies part of a section, the memmap allocated for
the remainder of the section *can* be freed.

This patch modifies the checks in free_unused_memmap so that only valid
memmap entries are considered for removal.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/init.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index e5f6fc428348..e591513bb53e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -392,7 +392,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * Convert start_pfn/end_pfn to a struct page pointer.
 	 */
 	start_pg = pfn_to_page(start_pfn - 1) + 1;
-	end_pg = pfn_to_page(end_pfn);
+	end_pg = pfn_to_page(end_pfn - 1) + 1;
 
 	/*
 	 * Convert to physical addresses, and
@@ -426,6 +426,14 @@ static void __init free_unused_memmap(struct meminfo *mi)
 
 		bank_start = bank_pfn_start(bank);
 
+#ifdef CONFIG_SPARSEMEM
+		/*
+		 * Take care not to free memmap entries that don't exist
+		 * due to SPARSEMEM sections which aren't present.
+		 */
+		bank_start = min(bank_start,
+				 ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#endif
 		/*
 		 * If we had a previous bank, and there is a space
 		 * between the current bank and the previous, free it.
@@ -440,6 +448,12 @@ static void __init free_unused_memmap(struct meminfo *mi)
 		 */
 		prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
 	}
+
+#ifdef CONFIG_SPARSEMEM
+	if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION))
+		free_memmap(prev_bank_end,
+			    ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#endif
 }
 
 static void __init free_highpages(void)
-- 
cgit v1.2.3


From 2af68df02fe5ccd644f4312ba2401996f52faab3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 3 May 2011 18:32:55 +0100
Subject: ARM: 6892/1: handle ptrace requests to change PC during interrupted
 system calls

GDB's interrupt.exp test cases currenly fail on ARM.  The problem is how do_signal
handled restarting interrupted system calls:

The entry.S assembler code determines that we come from a system call; and that
information is passed as "syscall" parameter to do_signal.  That routine then
calls get_signal_to_deliver [*] and if a signal is to be delivered, calls into
handle_signal.  If a system call is to be restarted either after the signal
handler returns, or if no handler is to be called in the first place, the PC
is updated after the get_signal_to_deliver call, either in handle_signal (if
we have a handler) or at the end of do_signal (otherwise).

Now the problem is that during [*], the call to get_signal_to_deliver, a ptrace
intercept may happen.  During this intercept, the debugger may change registers,
including the PC.  This is done by GDB if it wants to execute an "inferior call",
i.e. the execution of some code in the debugged program triggered by GDB.

To this purpose, GDB will save all registers, allocate a stack frame, set up
PC and arguments as appropriate for the call, and point the link register to
a dummy breakpoint instruction.  Once the process is restarted, it will execute
the call and then trap back to the debugger, at which point GDB will restore
all registers and continue original execution.

This generally works fine.  However, now consider what happens when GDB attempts
to do exactly that while the process was interrupted during execution of a to-be-
restarted system call:  do_signal is called with the syscall flag set; it calls
get_signal_to_deliver, at which point the debugger takes over and changes the PC
to point to a completely different place.  Now get_signal_to_deliver returns
without a signal to deliver; but now do_signal decides it should be restarting
a system call, and decrements the PC by 2 or 4 -- so it now points to 2 or 4
bytes before the function GDB wants to call -- which leads to a subsequent crash.

To fix this problem, two things need to be supported:
- do_signal must be able to recognize that get_signal_to_deliver changed the PC
  to a different location, and skip the restart-syscall sequence
- once the debugger has restored all registers at the end of the inferior call
  sequence, do_signal must recognize that *now* it needs to restart the pending
  system call, even though it was now entered from a breakpoint instead of an
  actual svc instruction

This set of issues is solved on other platforms, usually by one of two
mechanisms:

- The status information "do_signal is handling a system call that may need
  restarting" is itself carried in some register that can be accessed via
  ptrace.  This is e.g. on Intel the "orig_eax" register; on Sparc the kernel
  defines a magic extra bit in the flags register for this purpose.
  This allows GDB to manage that state: reset it when doing an inferior call,
  and restore it after the call is finished.

- On s390, do_signal transparently handles this problem without requiring
  GDB interaction, by performing system call restarting in the following
  way: first, adjust the PC as necessary for restarting the call.  Then,
  call get_signal_to_deliver; and finally just continue execution at the
  PC.  This way, if GDB does not change the PC, everything is as before.
  If GDB *does* change the PC, execution will simply continue there --
  and once GDB restores the PC it saved at that point, it will automatically
  point to the *restarted* system call.  (There is the minor twist how to
  handle system calls that do *not* need restarting -- do_signal will undo
  the PC change in this case, after get_signal_to_deliver has returned, and
  only if ptrace did not change the PC during that call.)

Because there does not appear to be any obvious register to carry the
syscall-restart information on ARM, we'd either have to introduce a new
artificial ptrace register just for that purpose, or else handle the issue
transparently like on s390.  The patch below implements the second option;
using this patch makes the interrupt.exp test cases pass on ARM, with no
regression in the GDB test suite otherwise.

Cc: patches@linaro.org
Signed-off-by: Ulrich Weigand <ulrich.weigand@linaro.org>
Signed-off-by: Arnd Bergmann <arnd.bergmann@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/signal.c | 90 ++++++++++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 37 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index cb8398317644..0340224cf73c 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -597,45 +597,19 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
 	return err;
 }
 
-static inline void setup_syscall_restart(struct pt_regs *regs)
-{
-	regs->ARM_r0 = regs->ARM_ORIG_r0;
-	regs->ARM_pc -= thumb_mode(regs) ? 2 : 4;
-}
-
 /*
  * OK, we're invoking a handler
  */	
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset,
-	      struct pt_regs * regs, int syscall)
+	      struct pt_regs * regs)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
 	int usig = sig;
 	int ret;
 
-	/*
-	 * If we were from a system call, check for system call restarting...
-	 */
-	if (syscall) {
-		switch (regs->ARM_r0) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			regs->ARM_r0 = -EINTR;
-			break;
-		case -ERESTARTSYS:
-			if (!(ka->sa.sa_flags & SA_RESTART)) {
-				regs->ARM_r0 = -EINTR;
-				break;
-			}
-			/* fallthrough */
-		case -ERESTARTNOINTR:
-			setup_syscall_restart(regs);
-		}
-	}
-
 	/*
 	 * translate the signal
 	 */
@@ -685,6 +659,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
  */
 static void do_signal(struct pt_regs *regs, int syscall)
 {
+	unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
 	struct k_sigaction ka;
 	siginfo_t info;
 	int signr;
@@ -698,18 +673,61 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	if (!user_mode(regs))
 		return;
 
+	/*
+	 * If we were from a system call, check for system call restarting...
+	 */
+	if (syscall) {
+		continue_addr = regs->ARM_pc;
+		restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4);
+		retval = regs->ARM_r0;
+
+		/*
+		 * Prepare for system call restart.  We do this here so that a
+		 * debugger will see the already changed PSW.
+		 */
+		switch (retval) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->ARM_r0 = regs->ARM_ORIG_r0;
+			regs->ARM_pc = restart_addr;
+			break;
+		case -ERESTART_RESTARTBLOCK:
+			regs->ARM_r0 = -EINTR;
+			break;
+		}
+	}
+
 	if (try_to_freeze())
 		goto no_signal;
 
+	/*
+	 * Get the signal to deliver.  When running under ptrace, at this
+	 * point the debugger may change all our registers ...
+	 */
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		sigset_t *oldset;
 
+		/*
+		 * Depending on the signal settings we may need to revert the
+		 * decision to restart the system call.  But skip this if a
+		 * debugger has chosen to restart at a different PC.
+		 */
+		if (regs->ARM_pc == restart_addr) {
+			if (retval == -ERESTARTNOHAND
+			    || (retval == -ERESTARTSYS
+				&& !(ka.sa.sa_flags & SA_RESTART))) {
+				regs->ARM_r0 = -EINTR;
+				regs->ARM_pc = continue_addr;
+			}
+		}
+
 		if (test_thread_flag(TIF_RESTORE_SIGMASK))
 			oldset = &current->saved_sigmask;
 		else
 			oldset = &current->blocked;
-		if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) {
+		if (handle_signal(signr, &ka, &info, oldset, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
@@ -723,11 +741,14 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	}
 
  no_signal:
-	/*
-	 * No signal to deliver to the process - restart the syscall.
-	 */
 	if (syscall) {
-		if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) {
+		/*
+		 * Handle restarting a different system call.  As above,
+		 * if a debugger has chosen to restart at a different PC,
+		 * ignore the restart.
+		 */
+		if (retval == -ERESTART_RESTARTBLOCK
+		    && regs->ARM_pc == continue_addr) {
 			if (thumb_mode(regs)) {
 				regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE;
 				regs->ARM_pc -= 2;
@@ -750,11 +771,6 @@ static void do_signal(struct pt_regs *regs, int syscall)
 #endif
 			}
 		}
-		if (regs->ARM_r0 == -ERESTARTNOHAND ||
-		    regs->ARM_r0 == -ERESTARTSYS ||
-		    regs->ARM_r0 == -ERESTARTNOINTR) {
-			setup_syscall_restart(regs);
-		}
 
 		/* If there's no signal to deliver, we just put the saved sigmask
 		 * back.
-- 
cgit v1.2.3


From a904f5f9eb7a55baacb2f4c1423cac8a8eb78a3a Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 6 Apr 2011 16:18:47 +0100
Subject: ARM: 6870/1: The mandatory barrier rmb() must be a dsb() in for
 device accesses

Since mandatory barriers may be used (explicitly or implicitly via readl
etc.) to ensure the ordering between Device and Normal memory accesses,
a DMB is not enough. This patch converts it to a DSB.

Cc: Colin Cross <ccross@android.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/system.h                  | 2 +-
 arch/arm/mach-realview/include/mach/barriers.h | 2 +-
 arch/arm/mach-tegra/include/mach/barriers.h    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 885be097769d..832888d0c20c 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -159,7 +159,7 @@ extern unsigned int user_debug;
 #include <mach/barriers.h>
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		mb()
 #else
 #include <asm/memory.h>
diff --git a/arch/arm/mach-realview/include/mach/barriers.h b/arch/arm/mach-realview/include/mach/barriers.h
index 0c5d749d7b5f..9a732195aa1c 100644
--- a/arch/arm/mach-realview/include/mach/barriers.h
+++ b/arch/arm/mach-realview/include/mach/barriers.h
@@ -4,5 +4,5 @@
  * operation to deadlock the system.
  */
 #define mb()		dsb()
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		mb()
diff --git a/arch/arm/mach-tegra/include/mach/barriers.h b/arch/arm/mach-tegra/include/mach/barriers.h
index cc115174899b..425b42e91ef6 100644
--- a/arch/arm/mach-tegra/include/mach/barriers.h
+++ b/arch/arm/mach-tegra/include/mach/barriers.h
@@ -23,7 +23,7 @@
 
 #include <asm/outercache.h>
 
-#define rmb()		dmb()
+#define rmb()		dsb()
 #define wmb()		do { dsb(); outer_sync(); } while (0)
 #define mb()		wmb()
 
-- 
cgit v1.2.3


From 449a66fd1fa75d36dca917704827c40c8f416bca Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 12 May 2011 15:11:12 +0200
Subject: x86: Remove warning and warning_symbol from struct stacktrace_ops

Both warning and warning_symbol are nowhere used.
Let's get rid of them.

Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Soeren Sandmann Pedersen <ssp@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: x86 <x86@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Link: http://lkml.kernel.org/r/1305205872-10321-2-git-send-email-richard@nod.at
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/stacktrace.h |  3 ---
 arch/x86/kernel/cpu/perf_event.c  | 13 -------------
 arch/x86/kernel/dumpstack.c       | 16 ----------------
 arch/x86/kernel/stacktrace.c      | 13 -------------
 arch/x86/oprofile/backtrace.c     | 13 -------------
 5 files changed, 58 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index d7e89c83645d..70bbe39043a9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -37,9 +37,6 @@ print_context_stack_bp(struct thread_info *tinfo,
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
-	void (*warning)(void *data, char *msg);
-	/* msg must contain %s for the symbol */
-	void (*warning_symbol)(void *data, char *msg, unsigned long symbol);
 	void (*address)(void *data, unsigned long address, int reliable);
 	/* On negative return stop dumping */
 	int (*stack)(void *data, char *name);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0de6b2b31f61..3a0338b4b179 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1773,17 +1773,6 @@ static struct pmu pmu = {
  * callchain support
  */
 
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
 static int backtrace_stack(void *data, char *name)
 {
 	return 0;
@@ -1797,8 +1786,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops backtrace_ops = {
-	.warning		= backtrace_warning,
-	.warning_symbol		= backtrace_warning_symbol,
 	.stack			= backtrace_stack,
 	.address		= backtrace_address,
 	.walk_stack		= print_context_stack_bp,
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..f478ff6877ef 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,20 +135,6 @@ print_context_stack_bp(struct thread_info *tinfo,
 }
 EXPORT_SYMBOL_GPL(print_context_stack_bp);
 
-
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	printk(data);
-	print_symbol(msg, symbol);
-	printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
-	printk("%s%s\n", (char *)data, msg);
-}
-
 static int print_trace_stack(void *data, char *name)
 {
 	printk("%s <%s> ", (char *)data, name);
@@ -166,8 +152,6 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops print_trace_ops = {
-	.warning		= print_trace_warning,
-	.warning_symbol		= print_trace_warning_symbol,
 	.stack			= print_trace_stack,
 	.address		= print_trace_address,
 	.walk_stack		= print_context_stack,
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 6515733a289d..55d9bc03f696 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -9,15 +9,6 @@
 #include <linux/uaccess.h>
 #include <asm/stacktrace.h>
 
-static void save_stack_warning(void *data, char *msg)
-{
-}
-
-static void
-save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-}
-
 static int save_stack_stack(void *data, char *name)
 {
 	return 0;
@@ -53,16 +44,12 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops save_stack_ops = {
-	.warning	= save_stack_warning,
-	.warning_symbol	= save_stack_warning_symbol,
 	.stack		= save_stack_stack,
 	.address	= save_stack_address,
 	.walk_stack	= print_context_stack,
 };
 
 static const struct stacktrace_ops save_stack_ops_nosched = {
-	.warning	= save_stack_warning,
-	.warning_symbol	= save_stack_warning_symbol,
 	.stack		= save_stack_stack,
 	.address	= save_stack_address_nosched,
 	.walk_stack	= print_context_stack,
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..a5b64ab4cd6e 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,17 +16,6 @@
 #include <asm/stacktrace.h>
 #include <linux/compat.h>
 
-static void backtrace_warning_symbol(void *data, char *msg,
-				     unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
 static int backtrace_stack(void *data, char *name)
 {
 	/* Yes, we want all stacks */
@@ -42,8 +31,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 }
 
 static struct stacktrace_ops backtrace_ops = {
-	.warning	= backtrace_warning,
-	.warning_symbol	= backtrace_warning_symbol,
 	.stack		= backtrace_stack,
 	.address	= backtrace_address,
 	.walk_stack	= print_context_stack,
-- 
cgit v1.2.3


From 92bdaef7b2c5d3cb8abc902faa1f7670a183dcdc Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 5 May 2011 13:50:43 -0400
Subject: Revert "xen/mmu: Add workaround "x86-64, mm: Put early page table
 high""

This reverts commit a38647837a411f7df79623128421eef2118b5884.

It does not work with certain AMD machines.

last_pfn = 0x100000 max_arch_pfn = 0x400000000
initial memory mapped : 0 - 02c3a000
Base memory trampoline at [ffff88000009b000] 9b000 size 20480
init_memory_mapping: 0000000000000000-0000000100000000
 0000000000 - 0100000000 page 4k
kernel direct mapping tables up to 100000000 @ ff7fb000-100000000
init_memory_mapping: 0000000100000000-00000001e0800000
 0100000000 - 01e0800000 page 4k
kernel direct mapping tables up to 1e0800000 @ 1df0f3000-1e0000000
xen: setting RW the range fffdc000 - 100000000
RAMDISK: 0203b000 - 02c3a000
No NUMA configuration found
Faking a node at 0000000000000000-00000001e0800000
NUMA: Using 63 for the hash shift.
Initmem setup node 0 0000000000000000-00000001e0800000
  NODE_DATA [00000001dfffb000 - 00000001dfffffff]
BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff81cf6a75>] setup_node_bootmem+0x18a/0x1ea
PGD 0
Oops: 0003 [#1] SMP
last sysfs file:
CPU 0
Modules linked in:

Pid: 0, comm: swapper Not tainted 2.6.39-0-virtual #6~smb1
RIP: e030:[<ffffffff81cf6a75>]  [<ffffffff81cf6a75>] setup_node_bootmem+0x18a/0x1ea
RSP: e02b:ffffffff81c01e38  EFLAGS: 00010046
RAX: 0000000000000000 RBX: 00000001e0800000 RCX: 0000000000001040
RDX: 0000000000004100 RSI: 0000000000000000 RDI: ffff8801dfffb000
RBP: ffffffff81c01e58 R08: 0000000000000020 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000bfe400
FS:  0000000000000000(0000) GS:ffffffff81cca000(0000) knlGS:0000000000000000
CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000001c03000 CR4: 0000000000000660
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffffffff81c00000, task ffffffff81c0b020)
Stack:
 0000000000000040 0000000000000001 0000000000000000 ffffffffffffffff
 ffffffff81c01e88 ffffffff81cf6c25 0000000000000000 0000000000000000
 ffffffff81cf687f 0000000000000000 ffffffff81c01ea8 ffffffff81cf6e45
Call Trace:
 [<ffffffff81cf6c25>] numa_register_memblks.constprop.3+0x150/0x181
 [<ffffffff81cf687f>] ? numa_add_memblk+0x7c/0x7c
 [<ffffffff81cf6e45>] numa_init.part.2+0x1c/0x7c
 [<ffffffff81cf687f>] ? numa_add_memblk+0x7c/0x7c
 [<ffffffff81cf6f67>] numa_init+0x6c/0x70
 [<ffffffff81cf7057>] initmem_init+0x39/0x3b
 [<ffffffff81ce5865>] setup_arch+0x64e/0x769
 [<ffffffff815e43c1>] ? printk+0x51/0x53
 [<ffffffff81cdf92b>] start_kernel+0xd4/0x3f3
 [<ffffffff81cdf388>] x86_64_start_reservations+0x132/0x136
 [<ffffffff81ce2ed4>] xen_start_kernel+0x588/0x58f
Code: 41 00 00 48 8b 3c c5 a0 24 cc 81 31 c0 40 f6 c7 01 74 05 aa 66 ba ff 40 40 f6 c7 02 74 05 66 ab 83 ea 02 89 d1 c1 e9 02 f6 c2 02 <f3> ab 74 02 66 ab 80 e2 01 74 01 aa 49 63 c4 48 c1 eb 0c 44 89
RIP  [<ffffffff81cf6a75>] setup_node_bootmem+0x18a/0x1ea
 RSP <ffffffff81c01e38>
CR2: 0000000000000000
---[ end trace a7919e7f17c0a725 ]---
Kernel panic - not syncing: Attempted to kill the idle task!
Pid: 0, comm: swapper Tainted: G      D     2.6.39-0-virtual #6~smb1

Reported-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 123 -----------------------------------------------------
 1 file changed, 123 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 55c965b38c27..cf4ef61e425b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1463,119 +1463,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
 	return ret;
 }
 
-#ifdef CONFIG_X86_64
-static __initdata u64 __last_pgt_set_rw = 0;
-static __initdata u64 __pgt_buf_start = 0;
-static __initdata u64 __pgt_buf_end = 0;
-static __initdata u64 __pgt_buf_top = 0;
-/*
- * As a consequence of the commit:
- * 
- * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
- * Author: Yinghai Lu <yinghai@kernel.org>
- * Date:   Fri Dec 17 16:58:28 2010 -0800
- * 
- *     x86-64, mm: Put early page table high
- * 
- * at some point init_memory_mapping is going to reach the pagetable pages
- * area and map those pages too (mapping them as normal memory that falls
- * in the range of addresses passed to init_memory_mapping as argument).
- * Some of those pages are already pagetable pages (they are in the range
- * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
- * everything is fine.
- * Some of these pages are not pagetable pages yet (they fall in the range
- * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
- * are going to be mapped RW.  When these pages become pagetable pages and
- * are hooked into the pagetable, xen will find that the guest has already
- * a RW mapping of them somewhere and fail the operation.
- * The reason Xen requires pagetables to be RO is that the hypervisor needs
- * to verify that the pagetables are valid before using them. The validation
- * operations are called "pinning".
- * 
- * In order to fix the issue we mark all the pages in the entire range
- * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
- * is completed only the range pgt_buf_start-pgt_buf_end is reserved by
- * init_memory_mapping. Hence the kernel is going to crash as soon as one
- * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
- * ranges are RO).
- * 
- * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_
- * the init_memory_mapping has completed (in a perfect world we would
- * call this function from init_memory_mapping, but lets ignore that).
- * 
- * Because we are called _after_ init_memory_mapping the pgt_buf_[start,
- * end,top] have all changed to new values (b/c init_memory_mapping
- * is called and setting up another new page-table). Hence, the first time
- * we enter this function, we save away the pgt_buf_start value and update
- * the pgt_buf_[end,top].
- * 
- * When we detect that the "old" pgt_buf_start through pgt_buf_end
- * PFNs have been reserved (so memblock_x86_reserve_range has been called),
- * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top.
- * 
- * And then we update those "old" pgt_buf_[end|top] with the new ones
- * so that we can redo this on the next pagetable.
- */
-static __init void mark_rw_past_pgt(void) {
-
-	if (pgt_buf_end > pgt_buf_start) {
-		u64 addr, size;
-
-		/* Save it away. */
-		if (!__pgt_buf_start) {
-			__pgt_buf_start = pgt_buf_start;
-			__pgt_buf_end = pgt_buf_end;
-			__pgt_buf_top = pgt_buf_top;
-			return;
-		}
-		/* If we get the range that starts at __pgt_buf_end that means
-		 * the range is reserved, and that in 'init_memory_mapping'
-		 * the 'memblock_x86_reserve_range' has been called with the
-		 * outdated __pgt_buf_start, __pgt_buf_end (the "new"
-		 * pgt_buf_[start|end|top] refer now to a new pagetable.
-		 * Note: we are called _after_ the pgt_buf_[..] have been
-		 * updated.*/
-
-		addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start),
-						       &size, PAGE_SIZE);
-
-		/* Still not reserved, meaning 'memblock_x86_reserve_range'
-		 * hasn't been called yet. Update the _end and _top.*/
-		if (addr == PFN_PHYS(__pgt_buf_start)) {
-			__pgt_buf_end = pgt_buf_end;
-			__pgt_buf_top = pgt_buf_top;
-			return;
-		}
-
-		/* OK, the area is reserved, meaning it is time for us to
-		 * set RW for the old end->top PFNs. */
-
-		/* ..unless we had already done this. */
-		if (__pgt_buf_end == __last_pgt_set_rw)
-			return;
-
-		addr = PFN_PHYS(__pgt_buf_end);
-		
-		/* set as RW the rest */
-		printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n",
-			PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top));
-		
-		while (addr < PFN_PHYS(__pgt_buf_top)) {
-			make_lowmem_page_readwrite(__va(addr));
-			addr += PAGE_SIZE;
-		}
-		/* And update everything so that we are ready for the next
-		 * pagetable (the one created for regions past 4GB) */
-		__last_pgt_set_rw = __pgt_buf_end;
-		__pgt_buf_start = pgt_buf_start;
-		__pgt_buf_end = pgt_buf_end;
-		__pgt_buf_top = pgt_buf_top;
-	}
-	return;
-}
-#else
-static __init void mark_rw_past_pgt(void) { }
-#endif
 static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 #ifdef CONFIG_X86_64
@@ -1601,14 +1488,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	unsigned long pfn = pte_pfn(pte);
 
-	/*
-	 * A bit of optimization. We do not need to call the workaround
-	 * when xen_set_pte_init is called with a PTE with 0 as PFN.
-	 * That is b/c the pagetable at that point are just being populated
-	 * with empty values and we can save some cycles by not calling
-	 * the 'memblock' code.*/
-	if (pfn)
-		mark_rw_past_pgt();
 	/*
 	 * If the new pfn is within the range of the newly allocated
 	 * kernel pagetable, and it isn't being mapped into an
@@ -2118,8 +1997,6 @@ __init void xen_ident_map_ISA(void)
 
 static __init void xen_post_allocator_init(void)
 {
-	mark_rw_past_pgt();
-
 #ifdef CONFIG_XEN_DEBUG
 	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
 #endif
-- 
cgit v1.2.3


From 279b706bf800b5967037f492dbe4fc5081ad5d0f Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Thu, 14 Apr 2011 15:49:41 +0100
Subject: x86,xen: introduce x86_init.mapping.pagetable_reserve

Introduce a new x86_init hook called pagetable_reserve that at the end
of init_memory_mapping is used to reserve a range of memory addresses for
the kernel pagetable pages we used and free the other ones.

On native it just calls memblock_x86_reserve_range while on xen it also
takes care of setting the spare memory previously allocated
for kernel pagetable pages from RO to RW, so that it can be used for
other purposes.

A detailed explanation of the reason why this hook is needed follows.

As a consequence of the commit:

commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
Author: Yinghai Lu <yinghai@kernel.org>
Date:   Fri Dec 17 16:58:28 2010 -0800

    x86-64, mm: Put early page table high

at some point init_memory_mapping is going to reach the pagetable pages
area and map those pages too (mapping them as normal memory that falls
in the range of addresses passed to init_memory_mapping as argument).
Some of those pages are already pagetable pages (they are in the range
pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
everything is fine.
Some of these pages are not pagetable pages yet (they fall in the range
pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
are going to be mapped RW.  When these pages become pagetable pages and
are hooked into the pagetable, xen will find that the guest has already
a RW mapping of them somewhere and fail the operation.
The reason Xen requires pagetables to be RO is that the hypervisor needs
to verify that the pagetables are valid before using them. The validation
operations are called "pinning" (more details in arch/x86/xen/mmu.c).

In order to fix the issue we mark all the pages in the entire range
pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
is completed only the range pgt_buf_start-pgt_buf_end is reserved by
init_memory_mapping. Hence the kernel is going to crash as soon as one
of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
ranges are RO).

For this reason we need a hook to reserve the kernel pagetable pages we
used and free the other ones so that they can be reused for other
purposes.
On native it just means calling memblock_x86_reserve_range, on Xen it
also means marking RW the pagetable pages that we allocated before but
that haven't been used before.

Another way to fix this is without using the hook is by adding a 'if
(xen_pv_domain)' in the 'init_memory_mapping' code and calling the Xen
counterpart, but that is just nasty.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/pgtable_types.h |  1 +
 arch/x86/include/asm/x86_init.h      | 12 ++++++++++++
 arch/x86/kernel/x86_init.c           |  4 ++++
 arch/x86/mm/init.c                   | 24 ++++++++++++++++++++++--
 arch/x86/xen/mmu.c                   | 15 +++++++++++++++
 5 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723d1f32..d56187c6b838 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
+extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
 extern void native_pagetable_setup_start(pgd_t *base);
 extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2e2ad8..d3d859035af9 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -67,6 +67,17 @@ struct x86_init_oem {
 	void (*banner)(void);
 };
 
+/**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve:	reserve a range of addresses for kernel pagetable usage
+ *
+ * For more details on the purpose of this hook, look in
+ * init_memory_mapping and the commit that added it.
+ */
+struct x86_init_mapping {
+	void (*pagetable_reserve)(u64 start, u64 end);
+};
+
 /**
  * struct x86_init_paging - platform specific paging functions
  * @pagetable_setup_start:	platform specific pre paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
 	struct x86_init_mpparse		mpparse;
 	struct x86_init_irqs		irqs;
 	struct x86_init_oem		oem;
+	struct x86_init_mapping		mapping;
 	struct x86_init_paging		paging;
 	struct x86_init_timers		timers;
 	struct x86_init_iommu		iommu;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e9128b..75ef4b18e9b7 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
 		.banner			= default_banner,
 	},
 
+	.mapping = {
+		.pagetable_reserve		= native_pagetable_reserve,
+	},
+
 	.paging = {
 		.pagetable_setup_start	= native_pagetable_setup_start,
 		.pagetable_setup_done	= native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289b039b..722a4c372ce3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 		end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
 }
 
+void native_pagetable_reserve(u64 start, u64 end)
+{
+	memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
 struct map_range {
 	unsigned long start;
 	unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
 
 	__flush_tlb_all();
 
+	/*
+	 * Reserve the kernel pagetable pages we used (pgt_buf_start -
+	 * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+	 * so that they can be reused for other purposes.
+	 *
+	 * On native it just means calling memblock_x86_reserve_range, on Xen it
+	 * also means marking RW the pagetable pages that we allocated before
+	 * but that haven't been used.
+	 *
+	 * In fact on xen we mark RO the whole range pgt_buf_start -
+	 * pgt_buf_top, because we have to make sure that when
+	 * init_memory_mapping reaches the pagetable pages area, it maps
+	 * RO all the pagetable pages, including the ones that are beyond
+	 * pgt_buf_end at that time.
+	 */
 	if (!after_bootmem && pgt_buf_end > pgt_buf_start)
-		memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
-				 pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+		x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+				PFN_PHYS(pgt_buf_end));
 
 	if (!after_bootmem)
 		early_memtest(start, end);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cf4ef61e425b..0684f3c74d53 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
 {
 }
 
+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+	/* reserve the range used */
+	native_pagetable_reserve(start, end);
+
+	/* set as RW the rest */
+	printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+			PFN_PHYS(pgt_buf_top));
+	while (end < PFN_PHYS(pgt_buf_top)) {
+		make_lowmem_page_readwrite(__va(end));
+		end += PAGE_SIZE;
+	}
+}
+
 static void xen_post_allocator_init(void);
 
 static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -2105,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 void __init xen_init_mmu_ops(void)
 {
+	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
 	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
 	pv_mmu_ops = xen_mmu_ops;
-- 
cgit v1.2.3


From 53f8023febf9b3e18d8fb0d99c55010e473ce53d Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Sun, 17 Apr 2011 16:17:34 +0200
Subject: x86/mm: Fix section mismatch derived from native_pagetable_reserve()

With CONFIG_DEBUG_SECTION_MISMATCH=y I see these warnings in next-20110415:

  LD      vmlinux.o
  MODPOST vmlinux.o
WARNING: vmlinux.o(.text+0x1ba48): Section mismatch in reference from the function native_pagetable_reserve() to the function .init.text:memblock_x86_reserve_range()
The function native_pagetable_reserve() references
the function __init memblock_x86_reserve_range().
This is often because native_pagetable_reserve lacks a __init
annotation or the annotation of memblock_x86_reserve_range is wrong.

This patch fixes the issue.
Thanks to pipacs from PaX project for help on IRC.

Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 722a4c372ce3..37b8b0fe8320 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,7 +81,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 		end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
 }
 
-void native_pagetable_reserve(u64 start, u64 end)
+void __init native_pagetable_reserve(u64 start, u64 end)
 {
 	memblock_x86_reserve_range(start, end, "PGTABLE");
 }
-- 
cgit v1.2.3


From 7899891c7d161752f29abcc9bc0a9c6c3a3af26c Mon Sep 17 00:00:00 2001
From: "Tian, Kevin" <kevin.tian@intel.com>
Date: Thu, 12 May 2011 10:56:08 +0800
Subject: xen mmu: fix a race window causing leave_mm BUG()

There's a race window in xen_drop_mm_ref, where remote cpu may exit
dirty bitmap between the check on this cpu and the point where remote
cpu handles drop request. So in drop_other_mm_ref we need check
whether TLB state is still lazy before calling into leave_mm. This
bug is rarely observed in earlier kernel, but exaggerated by the
commit 831d52bc153971b70e64eccfbed2b232394f22f8
("x86, mm: avoid possible bogus tlb entries by clearing prev mm_cpumask after switching mm")
which clears bitmap after changing the TLB state. the call trace is as below:

---------------------------------
kernel BUG at arch/x86/mm/tlb.c:61!
invalid opcode: 0000 [#1] SMP
last sysfs file: /sys/devices/system/xen_memory/xen_memory0/info/current_kb
CPU 1
Modules linked in: 8021q garp xen_netback xen_blkback blktap blkback_pagemap nbd bridge stp llc autofs4 ipmi_devintf ipmi_si ipmi_msghandler lockd sunrpc bonding ipv6 xenfs dm_multipath video output sbs sbshc parport_pc lp parport ses enclosure snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device serio_raw bnx2 snd_pcm_oss snd_mixer_oss snd_pcm snd_timer iTCO_wdt snd soundcore snd_page_alloc i2c_i801 iTCO_vendor_support i2c_core pcs pkr pata_acpi ata_generic ata_piix shpchp mptsas mptscsih mptbase [last unloaded: freq_table]
Pid: 25581, comm: khelper Not tainted 2.6.32.36fixxen #1 Tecal RH2285
RIP: e030:[<ffffffff8103a3cb>]  [<ffffffff8103a3cb>] leave_mm+0x15/0x46
RSP: e02b:ffff88002805be48  EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88015f8e2da0
RDX: ffff88002805be78 RSI: 0000000000000000 RDI: 0000000000000001
RBP: ffff88002805be48 R08: ffff88009d662000 R09: dead000000200200
R10: dead000000100100 R11: ffffffff814472b2 R12: ffff88009bfc1880
R13: ffff880028063020 R14: 00000000000004f6 R15: 0000000000000000
FS:  00007f62362d66e0(0000) GS:ffff880028058000(0000) knlGS:0000000000000000
CS:  e033 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000003aabc11909 CR3: 000000009b8ca000 CR4: 0000000000002660
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 00000000000000 00
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process khelper (pid: 25581, threadinfo ffff88007691e000, task ffff88009b92db40)
Stack:
 ffff88002805be68 ffffffff8100e4ae 0000000000000001 ffff88009d733b88
<0> ffff88002805be98 ffffffff81087224 ffff88002805be78 ffff88002805be78
<0> ffff88015f808360 00000000000004f6 ffff88002805bea8 ffffffff81010108
Call Trace:
 <IRQ>
 [<ffffffff8100e4ae>] drop_other_mm_ref+0x2a/0x53
 [<ffffffff81087224>] generic_smp_call_function_single_interrupt+0xd8/0xfc
 [<ffffffff81010108>] xen_call_function_single_interrupt+0x13/0x28
 [<ffffffff810a936a>] handle_IRQ_event+0x66/0x120
 [<ffffffff810aac5b>] handle_percpu_irq+0x41/0x6e
 [<ffffffff8128c1c0>] __xen_evtchn_do_upcall+0x1ab/0x27d
 [<ffffffff8128dd11>] xen_evtchn_do_upcall+0x33/0x46
 [<ffffffff81013efe>] xen_do_hyper visor_callback+0x1e/0x30
 <EOI>
 [<ffffffff814472b2>] ? _spin_unlock_irqrestore+0x15/0x17
 [<ffffffff8100f8cf>] ? xen_restore_fl_direct_end+0x0/0x1
 [<ffffffff81113f71>] ? flush_old_exec+0x3ac/0x500
 [<ffffffff81150dc5>] ? load_elf_binary+0x0/0x17ef
 [<ffffffff81150dc5>] ? load_elf_binary+0x0/0x17ef
 [<ffffffff8115115d>] ? load_elf_binary+0x398/0x17ef
 [<ffffffff81042fcf>] ? need_resched+0x23/0x2d
 [<ffffffff811f4648>] ? process_measurement+0xc0/0xd7
 [<ffffffff81150dc5>] ? load_elf_binary+0x0/0x17ef
 [<ffffffff81113094>] ? search_binary_handler+0xc8/0x255
 [<ffffffff81114362>] ? do_execve+0x1c3/0x29e
 [<ffffffff8101155d>] ? sys_execve+0x43/0x5d
 [<ffffffff8106fc45>] ? __call_usermodehelper+0x0/0x6f
 [<ffffffff81013e28>] ? kernel_execve+0x68/0xd0
 [<ffffffff 8106fc45>] ? __call_usermodehelper+0x0/0x6f
 [<ffffffff8100f8cf>] ? xen_restore_fl_direct_end+0x0/0x1
 [<ffffffff8106fb64>] ? ____call_usermodehelper+0x113/0x11e
 [<ffffffff81013daa>] ? child_rip+0xa/0x20
 [<ffffffff8106fc45>] ? __call_usermodehelper+0x0/0x6f
 [<ffffffff81012f91>] ? int_ret_from_sys_call+0x7/0x1b
 [<ffffffff8101371d>] ? retint_restore_args+0x5/0x6
 [<ffffffff81013da0>] ? child_rip+0x0/0x20
Code: 41 5e 41 5f c9 c3 55 48 89 e5 0f 1f 44 00 00 e8 17 ff ff ff c9 c3 55 48 89 e5 0f 1f 44 00 00 65 8b 04 25 c8 55 01 00 ff c8 75 04 <0f> 0b eb fe 65 48 8b 34 25 c0 55 01 00 48 81 c6 b8 02 00 00 e8
RIP  [<ffffffff8103a3cb>] leave_mm+0x15/0x46
 RSP <ffff88002805be48>
---[ end trace ce9cee6832a9c503 ]---

Tested-by: Maoxiaoyun<tinnycloud@hotmail.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
[v1: Fleshed out the git description a bit]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..4fd7387222bf 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1140,7 +1140,7 @@ static void drop_other_mm_ref(void *info)
 
 	active_mm = percpu_read(cpu_tlbstate.active_mm);
 
-	if (active_mm == mm)
+	if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
 		leave_mm(smp_processor_id());
 
 	/* If this cpu still has a stale cr3 reference, then make sure
-- 
cgit v1.2.3


From 15bfc094517db2ddf38ca7ed47f3a1c0ad24f7c4 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 12 Apr 2011 07:57:15 -0400
Subject: xen/setup: Ignore E820_UNUSABLE when setting 1-1 mappings.

When we parse the raw E820, the Xen hypervisor can set "E820_RAM"
to "E820_UNUSABLE" if the mem=X argument is used. As such we
should _not_ consider the E820_UNUSABLE as an 1-1 identity
mapping, but instead use the same case as for E820_RAM.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 90bac0aac3a5..fba4a6cc3a2d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -166,7 +166,7 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
 		if (last > end)
 			continue;
 
-		if (entry->type == E820_RAM) {
+		if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
 			if (start > start_pci)
 				identity += set_phys_range_identity(
 						PFN_UP(start_pci), PFN_DOWN(start));
-- 
cgit v1.2.3


From 0f16d0dfcdb5aab97d9e368f008b070b5b3ec6d3 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 11 May 2011 22:34:38 +0200
Subject: xen/setup: Fix for incorrect xen_extra_mem_start initialization under
 32-bit

git commit 24bdb0b62cc82120924762ae6bc85afc8c3f2b26 (xen: do not create
the extra e820 region at an addr lower than 4G) does not take into
account that ifdef CONFIG_X86_32 instead of e820_end_of_low_ram_pfn()
find_low_pfn_range() is called (both calls are from arch/x86/kernel/setup.c).
find_low_pfn_range() behaves correctly and does not require change in
xen_extra_mem_start initialization. Additionally, if xen_extra_mem_start
is initialized in the same way as ifdef CONFIG_X86_64 then memory hotplug
support for Xen balloon driver (under development) is broken.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index fba4a6cc3a2d..ca6297bd4e3c 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -227,7 +227,11 @@ char * __init xen_memory_setup(void)
 
 	memcpy(map_raw, map, sizeof(map));
 	e820.nr_map = 0;
+#ifdef CONFIG_X86_32
+	xen_extra_mem_start = mem_end;
+#else
 	xen_extra_mem_start = max((1ULL << 32), mem_end);
+#endif
 	for (i = 0; i < memmap.nr_entries; i++) {
 		unsigned long long end;
 
-- 
cgit v1.2.3


From 8c5950881c3b5e6e350e4b0438a8ccc513d90df9 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 1 Apr 2011 15:18:48 -0400
Subject: xen/p2m: Create entries in the P2M_MFN trees's to track 1-1 mappings

.. when applicable. We need to track in the p2m_mfn and
p2m_mfn_p the MFNs and pointers, respectivly, for the P2M entries
that are allocated for the identity mappings. Without this,
a PV domain with an E820 that triggers the 1-1 mapping to kick in,
won't be able to be restored as the P2M won't have the identity
mappings.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/p2m.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141eb0de8b06..a01e6532b46a 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -522,11 +522,20 @@ static bool __init __early_alloc_p2m(unsigned long pfn)
 	/* Boundary cross-over for the edges: */
 	if (idx) {
 		unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		unsigned long *mid_mfn_p;
 
 		p2m_init(p2m);
 
 		p2m_top[topidx][mididx] = p2m;
 
+		/* For save/restore we need to MFN of the P2M saved */
+		
+		mid_mfn_p = p2m_top_mfn_p[topidx];
+		WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing),
+			"P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n",
+			topidx, mididx);
+		mid_mfn_p[mididx] = virt_to_mfn(p2m);
+
 	}
 	return idx != 0;
 }
@@ -549,12 +558,29 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 		pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
 	{
 		unsigned topidx = p2m_top_index(pfn);
-		if (p2m_top[topidx] == p2m_mid_missing) {
-			unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+		unsigned long *mid_mfn_p;
+		unsigned long **mid;
+
+		mid = p2m_top[topidx];
+		mid_mfn_p = p2m_top_mfn_p[topidx];
+		if (mid == p2m_mid_missing) {
+			mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
 
 			p2m_mid_init(mid);
 
 			p2m_top[topidx] = mid;
+
+			BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
+		}
+		/* And the save/restore P2M tables.. */
+		if (mid_mfn_p == p2m_mid_missing_mfn) {
+			mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE);
+			p2m_mid_mfn_init(mid_mfn_p);
+
+			p2m_top_mfn_p[topidx] = mid_mfn_p;
+			p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
+			/* Note: we don't set mid_mfn_p[midix] here,
+		 	 * look in __early_alloc_p2m */
 		}
 	}
 
-- 
cgit v1.2.3


From 251511a18dcfe1868e3edfdc490777dcfaa1f851 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 4 May 2011 20:16:07 +0200
Subject: arch/x86/xen/irq: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 6a6fe8939645..8bbb465b6f0a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -113,7 +113,7 @@ static void xen_halt(void)
 		xen_safe_halt();
 }
 
-static const struct pv_irq_ops xen_irq_ops __initdata = {
+static const struct pv_irq_ops xen_irq_ops __initconst = {
 	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
 	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
 	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
-- 
cgit v1.2.3


From ad3062a0f438a5f436dae267f795c0a9686f11d2 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 4 May 2011 20:15:18 +0200
Subject: arch/x86/xen/enlighten: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3c6a06cf725..dd7b88f2ec7a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -235,7 +235,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	*dx &= maskedx;
 }
 
-static __init void xen_init_cpuid_mask(void)
+static void __init xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
 	unsigned int xsave_mask;
@@ -400,7 +400,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 /*
  * load_gdt for early boot, when the gdt is only mapped once
  */
-static __init void xen_load_gdt_boot(const struct desc_ptr *dtr)
+static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 {
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
@@ -662,7 +662,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
  * Version of write_gdt_entry for use at early boot-time needed to
  * update an entry as simply as possible.
  */
-static __init void xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
+static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
 					    const void *desc, int type)
 {
 	switch (type) {
@@ -933,18 +933,18 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 	return ret;
 }
 
-static const struct pv_info xen_info __initdata = {
+static const struct pv_info xen_info __initconst = {
 	.paravirt_enabled = 1,
 	.shared_kernel_pmd = 0,
 
 	.name = "Xen",
 };
 
-static const struct pv_init_ops xen_init_ops __initdata = {
+static const struct pv_init_ops xen_init_ops __initconst = {
 	.patch = xen_patch,
 };
 
-static const struct pv_cpu_ops xen_cpu_ops __initdata = {
+static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.cpuid = xen_cpuid,
 
 	.set_debugreg = xen_set_debugreg,
@@ -1004,7 +1004,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.end_context_switch = xen_end_context_switch,
 };
 
-static const struct pv_apic_ops xen_apic_ops __initdata = {
+static const struct pv_apic_ops xen_apic_ops __initconst = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,
 #endif
@@ -1055,7 +1055,7 @@ int xen_panic_handler_init(void)
 	return 0;
 }
 
-static const struct machine_ops __initdata xen_machine_ops = {
+static const struct machine_ops xen_machine_ops __initconst = {
 	.restart = xen_restart,
 	.halt = xen_machine_halt,
 	.power_off = xen_machine_halt,
@@ -1332,7 +1332,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __cpuinitdata xen_hvm_cpu_notifier = {
+static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
 	.notifier_call	= xen_hvm_cpu_notify,
 };
 
@@ -1381,7 +1381,7 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
+const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
 	.name			= "Xen HVM",
 	.detect			= xen_hvm_platform,
 	.init_platform		= xen_hvm_guest_init,
-- 
cgit v1.2.3


From ae15a3b4d1374b733016ce4b4148b2ba42bbeb0f Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 4 May 2011 20:17:21 +0200
Subject: arch/x86/xen/setup: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 90bac0aac3a5..d3663df2f967 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -50,7 +50,7 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
  */
 #define EXTRA_MEM_RATIO		(10)
 
-static __init void xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(unsigned long pages)
 {
 	unsigned long pfn;
 
@@ -336,7 +336,7 @@ static void __init fiddle_vdso(void)
 #endif
 }
 
-static __cpuinit int register_callback(unsigned type, const void *func)
+static int __cpuinit register_callback(unsigned type, const void *func)
 {
 	struct callback_register callback = {
 		.type = type,
-- 
cgit v1.2.3


From 77ed23f8d995a01cd8101d84351b567bf5177a30 Mon Sep 17 00:00:00 2001
From: Cliff Wickman <cpw@sgi.com>
Date: Tue, 10 May 2011 08:26:43 -0500
Subject: x86: Fix UV BAU for non-consecutive nasids

This is a fix for the SGI Altix-UV Broadcast Assist Unit code,
which is used for TLB flushing.

Certain hardware configurations (that customers are ordering)
cause nasids (numa address space id's) to be non-consecutive.
Specifically, once you have more than 4 blades in a IRU
(Individual Rack Unit - or 1/2 rack) but less than the maximum
of 16, the nasid numbering becomes non-consecutive.  This
currently results in a 'catastrophic error' (CATERR) detected by
the firmware during OS boot.  The BAU is generating an 'INTD'
request that is targeting a non-existent nasid value. Such
configurations may also occur when a blade is configured off
because of hardware errors. (There is one UV hub per blade.)

This patch is required to support such configurations.

The problem with the tlb_uv.c code is that is using the
consecutive hub numbers as indices to the BAU distribution bit
map. These are simply the ordinal position of the hub or blade
within its partition.  It should be using physical node numbers
(pnodes), which correspond to the physical nasid values. Use of
the hub number only works as long as the nasids in the partition
are consecutive and increase with a stride of 1.

This patch changes the index to be the pnode number, thus
allowing nasids to be non-consecutive.
It also provides a table in local memory for each cpu to
translate target cpu number to target pnode and nasid.
And it improves naming to properly reflect 'node' and 'uvhub'
versus 'nasid'.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/E1QJmxX-0002Mz-Fk@eag09.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_bau.h | 17 ++++++--
 arch/x86/platform/uv/tlb_uv.c    | 92 +++++++++++++++++++++++++++-------------
 2 files changed, 76 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 3e094af443c3..130f1eeee5fe 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -94,6 +94,8 @@
 /* after this # consecutive successes, bump up the throttle if it was lowered */
 #define COMPLETE_THRESHOLD 5
 
+#define UV_LB_SUBNODEID 0x10
+
 /*
  * number of entries in the destination side payload queue
  */
@@ -124,7 +126,7 @@
  * The distribution specification (32 bytes) is interpreted as a 256-bit
  * distribution vector. Adjacent bits correspond to consecutive even numbered
  * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nodeid' field of the header corresponds to the
+ * 'base_dest_nasid' field of the header corresponds to the
  * destination nodeID associated with that specified bit.
  */
 struct bau_target_uvhubmask {
@@ -176,7 +178,7 @@ struct bau_msg_payload {
 struct bau_msg_header {
 	unsigned int dest_subnodeid:6;	/* must be 0x10, for the LB */
 	/* bits 5:0 */
-	unsigned int base_dest_nodeid:15; /* nasid of the */
+	unsigned int base_dest_nasid:15; /* nasid of the */
 	/* bits 20:6 */			  /* first bit in uvhub map */
 	unsigned int command:8;	/* message type */
 	/* bits 28:21 */
@@ -378,6 +380,10 @@ struct ptc_stats {
 	unsigned long d_rcanceled; /* number of messages canceled by resets */
 };
 
+struct hub_and_pnode {
+	short uvhub;
+	short pnode;
+};
 /*
  * one per-cpu; to locate the software tables
  */
@@ -399,10 +405,12 @@ struct bau_control {
 	int baudisabled;
 	int set_bau_off;
 	short cpu;
+	short osnode;
 	short uvhub_cpu;
 	short uvhub;
 	short cpus_in_socket;
 	short cpus_in_uvhub;
+	short partition_base_pnode;
 	unsigned short message_number;
 	unsigned short uvhub_quiesce;
 	short socket_acknowledge_count[DEST_Q_SIZE];
@@ -422,15 +430,16 @@ struct bau_control {
 	int congested_period;
 	cycles_t period_time;
 	long period_requests;
+	struct hub_and_pnode *target_hub_and_pnode;
 };
 
 static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
 {
 	return constant_test_bit(uvhub, &dstp->bits[0]);
 }
-static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp)
+static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
 {
-	__set_bit(uvhub, &dstp->bits[0]);
+	__set_bit(pnode, &dstp->bits[0]);
 }
 static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
 				    int nbits)
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 7cb6424317f6..c58e0ea39ef5 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 					  struct mm_struct *mm,
 					  unsigned long va, unsigned int cpu)
 {
-	int tcpu;
-	int uvhub;
 	int locals = 0;
 	int remotes = 0;
 	int hubs = 0;
+	int tcpu;
+	int tpnode;
 	struct bau_desc *bau_desc;
 	struct cpumask *flush_mask;
 	struct ptc_stats *stat;
 	struct bau_control *bcp;
 	struct bau_control *tbcp;
+	struct hub_and_pnode *hpp;
 
 	/* kernel was booted 'nobau' */
 	if (nobau)
@@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
 	bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu;
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 
-	/* cpu statistics */
 	for_each_cpu(tcpu, flush_mask) {
-		uvhub = uv_cpu_to_blade_id(tcpu);
-		bau_uvhub_set(uvhub, &bau_desc->distribution);
-		if (uvhub == bcp->uvhub)
+		/*
+		 * The distribution vector is a bit map of pnodes, relative
+		 * to the partition base pnode (and the partition base nasid
+		 * in the header).
+		 * Translate cpu to pnode and hub using an array stored
+		 * in local memory.
+		 */
+		hpp = &bcp->socket_master->target_hub_and_pnode[tcpu];
+		tpnode = hpp->pnode - bcp->partition_base_pnode;
+		bau_uvhub_set(tpnode, &bau_desc->distribution);
+		if (hpp->uvhub == bcp->uvhub)
 			locals++;
 		else
 			remotes++;
@@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
  * an interrupt, but causes an error message to be returned to
  * the sender.
  */
-static void uv_enable_timeouts(void)
+static void __init uv_enable_timeouts(void)
 {
 	int uvhub;
 	int nuvhubs;
@@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void)
 }
 
 /*
- * initialize the sending side's sending buffers
+ * Initialize the sending side's sending buffers.
  */
 static void
-uv_activation_descriptor_init(int node, int pnode)
+uv_activation_descriptor_init(int node, int pnode, int base_pnode)
 {
 	int i;
 	int cpu;
@@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode)
 	n = pa >> uv_nshift;
 	m = pa & uv_mmask;
 
+	/* the 14-bit pnode */
 	uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
 			      (n << UV_DESC_BASE_PNODE_SHIFT | m));
-
 	/*
-	 * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+	 * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
 	 * cpu even though we only use the first one; one descriptor can
 	 * describe a broadcast to 256 uv hubs.
 	 */
@@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode)
 		memset(bd2, 0, sizeof(struct bau_desc));
 		bd2->header.sw_ack_flag = 1;
 		/*
-		 * base_dest_nodeid is the nasid of the first uvhub
-		 * in the partition. The bit map will indicate uvhub numbers,
-		 * which are 0-N in a partition. Pnodes are unique system-wide.
+		 * The base_dest_nasid set in the message header is the nasid
+		 * of the first uvhub in the partition. The bit map will
+		 * indicate destination pnode numbers relative to that base.
+		 * They may not be consecutive if nasid striding is being used.
 		 */
-		bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
-		bd2->header.dest_subnodeid = 0x10; /* the LB */
+		bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
+		bd2->header.dest_subnodeid = UV_LB_SUBNODEID;
 		bd2->header.command = UV_NET_ENDPOINT_INTD;
 		bd2->header.int_both = 1;
 		/*
@@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode)
 /*
  * Initialization of each UV hub's structures
  */
-static void __init uv_init_uvhub(int uvhub, int vector)
+static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode)
 {
 	int node;
 	int pnode;
@@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector)
 
 	node = uvhub_to_first_node(uvhub);
 	pnode = uv_blade_to_pnode(uvhub);
-	uv_activation_descriptor_init(node, pnode);
+	uv_activation_descriptor_init(node, pnode, base_pnode);
 	uv_payload_queue_init(node, pnode);
 	/*
-	 * the below initialization can't be in firmware because the
-	 * messaging IRQ will be determined by the OS
+	 * The below initialization can't be in firmware because the
+	 * messaging IRQ will be determined by the OS.
 	 */
 	apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
 	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -1491,10 +1500,11 @@ calculate_destination_timeout(void)
 /*
  * initialize the bau_control structure for each cpu
  */
-static int __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode)
 {
 	int i;
 	int cpu;
+	int tcpu;
 	int pnode;
 	int uvhub;
 	int have_hmaster;
@@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs)
 		bcp = &per_cpu(bau_control, cpu);
 		memset(bcp, 0, sizeof(struct bau_control));
 		pnode = uv_cpu_hub_info(cpu)->pnode;
+		if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) {
+			printk(KERN_EMERG
+				"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
+				cpu, pnode, base_part_pnode,
+				UV_DISTRIBUTION_SIZE);
+			return 1;
+		}
+		bcp->osnode = cpu_to_node(cpu);
+		bcp->partition_base_pnode = uv_partition_base_pnode;
 		uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
 		*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
 		bdp = &uvhub_descs[uvhub];
@@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs)
 		bdp->pnode = pnode;
 		/* kludge: 'assuming' one node per socket, and assuming that
 		   disabling a socket just leaves a gap in node numbers */
-		socket = (cpu_to_node(cpu) & 1);
+		socket = bcp->osnode & 1;
 		bdp->socket_mask |= (1 << socket);
 		sdp = &bdp->socket[socket];
 		sdp->cpu_number[sdp->num_cpus] = cpu;
@@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs)
 nextsocket:
 			socket++;
 			socket_mask = (socket_mask >> 1);
+			/* each socket gets a local array of pnodes/hubs */
+			bcp = smaster;
+			bcp->target_hub_and_pnode = kmalloc_node(
+				sizeof(struct hub_and_pnode) *
+				num_possible_cpus(), GFP_KERNEL, bcp->osnode);
+			memset(bcp->target_hub_and_pnode, 0,
+				sizeof(struct hub_and_pnode) *
+				num_possible_cpus());
+			for_each_present_cpu(tcpu) {
+				bcp->target_hub_and_pnode[tcpu].pnode =
+					uv_cpu_hub_info(tcpu)->pnode;
+				bcp->target_hub_and_pnode[tcpu].uvhub =
+					uv_cpu_hub_info(tcpu)->numa_blade_id;
+			}
 		}
 	}
 	kfree(uvhub_descs);
@@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void)
 	spin_lock_init(&disable_lock);
 	congested_cycles = microsec_2_cycles(congested_response_us);
 
-	if (uv_init_per_cpu(nuvhubs)) {
-		nobau = 1;
-		return 0;
-	}
-
 	uv_partition_base_pnode = 0x7fffffff;
-	for (uvhub = 0; uvhub < nuvhubs; uvhub++)
+	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
 		if (uv_blade_nr_possible_cpus(uvhub) &&
 			(uv_blade_to_pnode(uvhub) < uv_partition_base_pnode))
 			uv_partition_base_pnode = uv_blade_to_pnode(uvhub);
+	}
+
+	if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) {
+		nobau = 1;
+		return 0;
+	}
 
 	vector = UV_BAU_MESSAGE;
 	for_each_possible_blade(uvhub)
 		if (uv_blade_nr_possible_cpus(uvhub))
-			uv_init_uvhub(uvhub, vector);
+			uv_init_uvhub(uvhub, vector, uv_partition_base_pnode);
 
 	uv_enable_timeouts();
 	alloc_intr_gate(vector, uv_bau_message_intr1);
-- 
cgit v1.2.3


From 5fd2a84ab3c8b87176e25db1d98c5cc34043a669 Mon Sep 17 00:00:00 2001
From: "Avinash H.M" <avinashhm@ti.com>
Date: Mon, 9 May 2011 12:29:40 +0000
Subject: OMAP3: set the core dpll clk rate in its set_rate function

The debug l3_ick/rate is not displaying the actual rate of the clock in
hardware. This is because, the core dpll set_rate function doesn't update the
clk.rate. After fixing, the l3_ick/rate is displaying proper values.

Signed-off-by: Shweta Gulati <shweta.gulati@ti.com>
Signed-off-by: Avinash.H.M <avinashhm@ti.com>
Cc: Rajendra Nayak <rnayak@ti.com>
Cc: Paul Wamsley <paul@pwsan.com>
Acked-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/clkt34xx_dpll3m2.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
index b2b1e37bb6bb..d6e34dd9e7e7 100644
--- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
+++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
@@ -115,6 +115,7 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
 				  sdrc_cs0->rfr_ctrl, sdrc_cs0->actim_ctrla,
 				  sdrc_cs0->actim_ctrlb, sdrc_cs0->mr,
 				  0, 0, 0, 0);
+	clk->rate = rate;
 
 	return 0;
 }
-- 
cgit v1.2.3


From d9a5ac9ef306eb5cc874f285185a15c303c50009 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 13 May 2011 15:52:09 +0200
Subject: x86, mce, AMD: Fix leaving freed data in a list

b may be added to a list, but is not removed before being freed
in the case of an error.  This is done in the corresponding
deallocation function, so the code here has been changed to
follow that.

The sematic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E,E1,E2;
identifier l;
@@

*list_add(&E->l,E1);
... when != E1
    when != list_del(&E->l)
    when != list_del_init(&E->l)
    when != E = E2
*kfree(E);// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1305294731-12127-1-git-send-email-julia@diku.dk
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 167f97b5596e..bb0adad35143 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -509,6 +509,7 @@ recurse:
 out_free:
 	if (b) {
 		kobject_put(&b->kobj);
+		list_del(&b->miscj);
 		kfree(b);
 	}
 	return err;
-- 
cgit v1.2.3


From 82a3242e11d9e63c8195be46c954efaefee35e22 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 12 May 2011 16:01:02 -0700
Subject: sysfs: remove "last sysfs file:" line from the oops messages

On some arches (x86, sh, arm, unicore, powerpc) the oops message would
print out the last sysfs file accessed.

This was very useful in finding a number of sysfs and driver core bugs
in the 2.5 and early 2.6 development days, but it has been a number of
years since this file has actually helped in debugging anything that
couldn't also be trivially determined from the stack traceback.

So it's time to delete the line.  This is good as we need all the space
we can get for oops messages at times on consoles.

Acked-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/kernel/traps.c       |  1 -
 arch/powerpc/kernel/traps.c   |  1 -
 arch/sh/kernel/traps_32.c     |  1 -
 arch/unicore32/kernel/traps.c |  1 -
 arch/x86/kernel/dumpstack.c   |  1 -
 fs/sysfs/file.c               | 12 ------------
 include/linux/sysfs.h         |  5 -----
 7 files changed, 22 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3b54ad19d489..d52eec268b47 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 5ddb801bc154..d782cd71c07c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err)
 #endif
 		printk("%s\n", ppc_md.name ? ppc_md.name : "");
 
-		sysfs_printk_last_file();
 		if (notify_die(DIE_OOPS, str, regs, err, 255,
 			       SIGSEGV) == NOTIFY_STOP)
 			return 1;
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 3484c2f65aba..b51a17104b5f 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -87,7 +87,6 @@ void die(const char * str, struct pt_regs * regs, long err)
 	bust_spinlocks(1);
 
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-	sysfs_printk_last_file();
 	print_modules();
 	show_regs(regs);
 
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 254e36fa9513..b9a26465e728 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -192,7 +192,6 @@ static int __die(const char *str, int err, struct thread_info *thread,
 
 	printk(KERN_EMERG "Internal error: %s: %x [#%d]\n",
 	       str, err, ++die_counter);
-	sysfs_printk_last_file();
 
 	/* trap and error numbers are mostly meaningless on UniCore */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, \
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index e2a3f0606da4..f72e7193acc5 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -279,7 +279,6 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
-	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
 
 #include "sysfs.h"
 
-/* used in crash dumps to help with debugging */
-static char last_sysfs_file[PATH_MAX];
-void sysfs_printk_last_file(void)
-{
-	printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
-}
-
 /*
  * There's one sysfs_buffer for each open file and one
  * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_buffer *buffer;
 	const struct sysfs_ops *ops;
 	int error = -EACCES;
-	char *p;
-
-	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
-	if (!IS_ERR(p))
-		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active(attr_sd))
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 30b881555fa5..c3acda60eee0 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -176,7 +176,6 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 				      const unsigned char *name);
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
-void sysfs_printk_last_file(void);
 
 /* Called to clear a ns tag when it is no longer valid */
 void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
@@ -348,10 +347,6 @@ static inline int __must_check sysfs_init(void)
 	return 0;
 }
 
-static inline void sysfs_printk_last_file(void)
-{
-}
-
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3


From f550806a7fbca06b487238442546aceb7ecbb0c9 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 15 Feb 2011 22:34:49 -0800
Subject: alpha: convert to clocksource_register_hz

Converts alpha to use clocksource_register_hz.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
CC: Richard Henderson <rth@twiddle.net>
CC: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
CC: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/kernel/time.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 918e8e0b72ff..818e74ed45dc 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = {
 
 static inline void register_rpcc_clocksource(long cycle_freq)
 {
-	clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4);
-	clocksource_register(&clocksource_rpcc);
+	clocksource_register_hz(&clocksource_rpcc, cycle_freq);
 }
 #else /* !CONFIG_SMP */
 static inline void register_rpcc_clocksource(long cycle_freq)
-- 
cgit v1.2.3


From 90b57f35164aa715dcc7d939a88780a23231f84e Mon Sep 17 00:00:00 2001
From: Michael Cree <mcree@orcon.net.nz>
Date: Wed, 4 May 2011 08:14:50 +0000
Subject: alpha: Wire up syscalls new to 2.6.39

Wire up the syscalls:
   name_to_handle_at
   open_by_handle_at
   clock_adjtime
   syncfs
and adjust some whitespace in the neighbourhood to align commments.

Signed-off-by: Michael Cree <mcree@orcon.net.nz>
Signed-off-by: Matt Turner <mattst88@gmail.com>
---
 arch/alpha/include/asm/unistd.h |  6 +++++-
 arch/alpha/kernel/systbls.S     | 12 ++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 058937bf5a77..b1834166922d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -452,10 +452,14 @@
 #define __NR_fanotify_init		494
 #define __NR_fanotify_mark		495
 #define __NR_prlimit64			496
+#define __NR_name_to_handle_at		497
+#define __NR_open_by_handle_at		498
+#define __NR_clock_adjtime		499
+#define __NR_syncfs			500
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			497
+#define NR_SYSCALLS			501
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index a6a1de9db16f..15f999d41c75 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -498,23 +498,27 @@ sys_call_table:
 	.quad sys_ni_syscall			/* sys_timerfd */
 	.quad sys_eventfd
 	.quad sys_recvmmsg
-	.quad sys_fallocate				/* 480 */
+	.quad sys_fallocate			/* 480 */
 	.quad sys_timerfd_create
 	.quad sys_timerfd_settime
 	.quad sys_timerfd_gettime
 	.quad sys_signalfd4
-	.quad sys_eventfd2				/* 485 */
+	.quad sys_eventfd2			/* 485 */
 	.quad sys_epoll_create1
 	.quad sys_dup3
 	.quad sys_pipe2
 	.quad sys_inotify_init1
-	.quad sys_preadv				/* 490 */
+	.quad sys_preadv			/* 490 */
 	.quad sys_pwritev
 	.quad sys_rt_tgsigqueueinfo
 	.quad sys_perf_event_open
 	.quad sys_fanotify_init
-	.quad sys_fanotify_mark				/* 495 */
+	.quad sys_fanotify_mark			/* 495 */
 	.quad sys_prlimit64
+	.quad sys_name_to_handle_at
+	.quad sys_open_by_handle_at
+	.quad sys_clock_adjtime
+	.quad sys_syncfs			/* 500 */
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
-- 
cgit v1.2.3


From 8c414ff3f4dcdde228c6a668385218290d73a265 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 18:50:20 +0100
Subject: clocksource: convert footbridge to generic i8253 clocksource

Convert the footbridge isa-timer code to use generic i8253 clocksource.

Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/i8253.h         | 15 ++++++++++++
 arch/arm/mach-footbridge/Kconfig     |  2 ++
 arch/arm/mach-footbridge/isa-timer.c | 45 ++++--------------------------------
 include/linux/clocksource.h          |  2 ++
 4 files changed, 23 insertions(+), 41 deletions(-)
 create mode 100644 arch/arm/include/asm/i8253.h

(limited to 'arch')

diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h
new file mode 100644
index 000000000000..70656b69d5ce
--- /dev/null
+++ b/arch/arm/include/asm/i8253.h
@@ -0,0 +1,15 @@
+#ifndef __ASMARM_I8253_H
+#define __ASMARM_I8253_H
+
+/* i8253A PIT registers */
+#define PIT_MODE	0x43
+#define PIT_CH0		0x40
+
+#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
+
+extern raw_spinlock_t i8253_lock;
+
+#define outb_pit	outb_p
+#define inb_pit		inb_p
+
+#endif
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index bdd257921cfb..46adca068f2c 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -4,6 +4,7 @@ menu "Footbridge Implementations"
 
 config ARCH_CATS
 	bool "CATS"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
@@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST
 
 config ARCH_NETWINDER
 	bool "NetWinder"
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index 441c6ce0d555..7020f1a3feca 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -10,53 +10,16 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/spinlock.h>
 #include <linux/timex.h>
 
 #include <asm/irq.h>
-
+#include <asm/i8253.h>
 #include <asm/mach/time.h>
 
 #include "common.h"
 
-#define PIT_MODE	0x43
-#define PIT_CH0		0x40
-
-#define PIT_LATCH	((PIT_TICK_RATE + HZ / 2) / HZ)
-
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	static int old_count;
-	static u32 old_jifs;
-	int count;
-	u32 jifs;
-
-	raw_local_irq_save(flags);
-
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);		/* latch the count */
-	count = inb_p(PIT_CH0);		/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_local_irq_restore(flags);
-
-	count = (PIT_LATCH - 1) - count;
-
-	return (cycle_t)(jifs * PIT_LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-};
+DEFINE_RAW_SPINLOCK(i8253_lock);
 
 static void pit_set_mode(enum clock_event_mode mode,
 	struct clock_event_device *evt)
@@ -121,7 +84,7 @@ static void __init isa_timer_init(void)
 	pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce);
 	pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce);
 
-	clocksource_register_hz(&pit_cs, PIT_TICK_RATE);
+	clocksource_i8253_init();
 
 	setup_irq(pit_ce.irq, &pit_timer_irq);
 	clockevents_register_device(&pit_ce);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c37b21ad5a3b..f13469b3df86 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -341,4 +341,6 @@ static inline void update_vsyscall_tz(void)
 
 extern void timekeeping_notify(struct clocksource *clock);
 
+extern int clocksource_i8253_init(void);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From 82491451dd25a3abe8496ddbd04ddb3f77d285c2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 18:55:19 +0100
Subject: clocksource: convert x86 to generic i8253 clocksource

Convert x86 i8253 clocksource code to use generic i8253 clocksource.

Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/x86/Kconfig             |  1 +
 arch/x86/include/asm/i8253.h |  2 ++
 arch/x86/kernel/i8253.c      | 79 +-------------------------------------------
 3 files changed, 4 insertions(+), 78 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..01115d54c5be 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -8,6 +8,7 @@ config 64BIT
 
 config X86_32
 	def_bool !64BIT
+	select CLKSRC_I8253
 
 config X86_64
 	def_bool 64BIT
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h
index fc1f579fb965..65aaa91d5850 100644
--- a/arch/x86/include/asm/i8253.h
+++ b/arch/x86/include/asm/i8253.h
@@ -6,6 +6,8 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
+#define PIT_LATCH	LATCH
+
 extern raw_spinlock_t i8253_lock;
 
 extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 2dfd31597443..b904dfbf6dbc 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -117,81 +117,6 @@ void __init setup_pit_timer(void)
 }
 
 #ifndef CONFIG_X86_64
-/*
- * Since the PIT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static cycle_t pit_read(struct clocksource *cs)
-{
-	static int old_count;
-	static u32 old_jifs;
-	unsigned long flags;
-	int count;
-	u32 jifs;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	outb_pit(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_pit(PIT_CH0);	/* read the latched count */
-	count |= inb_pit(PIT_CH0) << 8;
-
-	/* VIA686a test code... reset the latch if count > max + 1 */
-	if (count > LATCH) {
-		outb_pit(0x34, PIT_MODE);
-		outb_pit(LATCH & 0xff, PIT_CH0);
-		outb_pit(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	/*
-	 * It's possible for count to appear to go the wrong way for a
-	 * couple of reasons:
-	 *
-	 *  1. The timer counter underflows, but we haven't handled the
-	 *     resulting interrupt and incremented jiffies yet.
-	 *  2. Hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 *
-	 * Previous attempts to handle these cases intelligently were
-	 * buggy, so we just do the simple thing now.
-	 */
-	if (count > old_count && jifs == old_jifs)
-		count = old_count;
-
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = (LATCH - 1) - count;
-
-	return (cycle_t)(jifs * LATCH) + count;
-}
-
-static struct clocksource pit_cs = {
-	.name		= "pit",
-	.rating		= 110,
-	.read		= pit_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-	.mult		= 0,
-	.shift		= 20,
-};
-
 static int __init init_pit_clocksource(void)
 {
 	 /*
@@ -205,9 +130,7 @@ static int __init init_pit_clocksource(void)
 	    pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
-	pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
-
-	return clocksource_register(&pit_cs);
+	return clocksource_i8253_init();
 }
 arch_initcall(init_pit_clocksource);
 
-- 
cgit v1.2.3


From 798778b8653f64b7b2162ac70eca10367cff6ce8 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sun, 8 May 2011 19:03:03 +0100
Subject: clocksource: convert mips to generic i8253 clocksource

Convert MIPS i8253 clocksource code to use generic i8253 clocksource.

Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/mips/Kconfig             |  1 +
 arch/mips/include/asm/i8253.h |  5 +++
 arch/mips/kernel/i8253.c      | 78 +------------------------------------------
 3 files changed, 7 insertions(+), 77 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8e256cc5dcd9..f7f6419e4c84 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2339,6 +2339,7 @@ config MMU
 
 config I8253
 	bool
+	select CLKSRC_I8253
 	select MIPS_EXTERNAL_TIMER
 
 config ZONE_DMA32
diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h
index 48bb82372994..9ad011366f73 100644
--- a/arch/mips/include/asm/i8253.h
+++ b/arch/mips/include/asm/i8253.h
@@ -12,8 +12,13 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
+#define PIT_LATCH		LATCH
+
 extern raw_spinlock_t i8253_lock;
 
 extern void setup_pit_timer(void);
 
+#define inb_pit inb_p
+#define outb_pit outb_p
+
 #endif /* __ASM_I8253_H */
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index 2392a7a296d4..391221b6a6aa 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -125,87 +125,11 @@ void __init setup_pit_timer(void)
 	setup_irq(0, &irq0);
 }
 
-/*
- * Since the PIT overflows every tick, its not very useful
- * to just read by itself. So use jiffies to emulate a free
- * running counter:
- */
-static cycle_t pit_read(struct clocksource *cs)
-{
-	unsigned long flags;
-	int count;
-	u32 jifs;
-	static int old_count;
-	static u32 old_jifs;
-
-	raw_spin_lock_irqsave(&i8253_lock, flags);
-	/*
-	 * Although our caller may have the read side of xtime_lock,
-	 * this is now a seqlock, and we are cheating in this routine
-	 * by having side effects on state that we cannot undo if
-	 * there is a collision on the seqlock and our caller has to
-	 * retry.  (Namely, old_jifs and old_count.)  So we must treat
-	 * jiffies as volatile despite the lock.  We read jiffies
-	 * before latching the timer count to guarantee that although
-	 * the jiffies value might be older than the count (that is,
-	 * the counter may underflow between the last point where
-	 * jiffies was incremented and the point where we latch the
-	 * count), it cannot be newer.
-	 */
-	jifs = jiffies;
-	outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
-	count = inb_p(PIT_CH0);	/* read the latched count */
-	count |= inb_p(PIT_CH0) << 8;
-
-	/* VIA686a test code... reset the latch if count > max + 1 */
-	if (count > LATCH) {
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff, PIT_CH0);
-		outb(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	/*
-	 * It's possible for count to appear to go the wrong way for a
-	 * couple of reasons:
-	 *
-	 *  1. The timer counter underflows, but we haven't handled the
-	 *     resulting interrupt and incremented jiffies yet.
-	 *  2. Hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 *
-	 * Previous attempts to handle these cases intelligently were
-	 * buggy, so we just do the simple thing now.
-	 */
-	if (count > old_count && jifs == old_jifs) {
-		count = old_count;
-	}
-	old_count = count;
-	old_jifs = jifs;
-
-	raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = (LATCH - 1) - count;
-
-	return (cycle_t)(jifs * LATCH) + count;
-}
-
-static struct clocksource clocksource_pit = {
-	.name	= "pit",
-	.rating = 110,
-	.read	= pit_read,
-	.mask	= CLOCKSOURCE_MASK(32),
-	.mult	= 0,
-	.shift	= 20,
-};
-
 static int __init init_pit_clocksource(void)
 {
 	if (num_possible_cpus() > 1) /* PIT does not scale! */
 		return 0;
 
-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
-	return clocksource_register(&clocksource_pit);
+	return clocksource_i8253_init();
 }
 arch_initcall(init_pit_clocksource);
-- 
cgit v1.2.3


From b23b64516500df6b70fcafb820970f18538252cf Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@mit.edu>
Date: Mon, 16 May 2011 15:12:47 +1000
Subject: crypto: aesni-intel - Merge with fpu.ko

Loading fpu without aesni-intel does nothing.  Loading aesni-intel
without fpu causes modes like xts to fail.  (Unloading
aesni-intel will restore those modes.)

One solution would be to make aesni-intel depend on fpu, but it
seems cleaner to just combine the modules.

This is probably responsible for bugs like:
https://bugzilla.redhat.com/show_bug.cgi?id=589390

Signed-off-by: Andy Lutomirski <luto@mit.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile           |  4 +---
 arch/x86/crypto/aesni-intel_glue.c |  8 ++++++++
 arch/x86/crypto/fpu.c              | 10 ++--------
 crypto/Kconfig                     |  6 ------
 4 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 1a58ad89fdf7..c04f1b7a9139 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,8 +2,6 @@
 # Arch-specific CryptoAPI modules.
 #
 
-obj-$(CONFIG_CRYPTO_FPU) += fpu.o
-
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -24,6 +22,6 @@ aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2577613fb32b..cbc60ef359bc 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -140,6 +140,9 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
+int crypto_fpu_init(void);
+void crypto_fpu_exit(void);
+
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 {
@@ -1257,6 +1260,8 @@ static int __init aesni_init(void)
 		return -ENODEV;
 	}
 
+	if ((err = crypto_fpu_init()))
+		goto fpu_err;
 	if ((err = crypto_register_alg(&aesni_alg)))
 		goto aes_err;
 	if ((err = crypto_register_alg(&__aesni_alg)))
@@ -1334,6 +1339,7 @@ blk_ecb_err:
 __aes_err:
 	crypto_unregister_alg(&aesni_alg);
 aes_err:
+fpu_err:
 	return err;
 }
 
@@ -1363,6 +1369,8 @@ static void __exit aesni_exit(void)
 	crypto_unregister_alg(&blk_ecb_alg);
 	crypto_unregister_alg(&__aesni_alg);
 	crypto_unregister_alg(&aesni_alg);
+
+	crypto_fpu_exit();
 }
 
 module_init(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 1a8f8649c035..98d7a188f46b 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -150,18 +150,12 @@ static struct crypto_template crypto_fpu_tmpl = {
 	.module = THIS_MODULE,
 };
 
-static int __init crypto_fpu_module_init(void)
+int __init crypto_fpu_init(void)
 {
 	return crypto_register_template(&crypto_fpu_tmpl);
 }
 
-static void __exit crypto_fpu_module_exit(void)
+void __exit crypto_fpu_exit(void)
 {
 	crypto_unregister_template(&crypto_fpu_tmpl);
 }
-
-module_init(crypto_fpu_module_init);
-module_exit(crypto_fpu_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("FPU block cipher wrapper");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 4b7cb0e691cd..87b22ca9c223 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -264,11 +264,6 @@ config CRYPTO_XTS
 	  key size 256, 384 or 512 bits. This implementation currently
 	  can't handle a sectorsize which is not a multiple of 16 bytes.
 
-config CRYPTO_FPU
-	tristate
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_MANAGER
-
 comment "Hash modes"
 
 config CRYPTO_HMAC
@@ -543,7 +538,6 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_AES_586 if !64BIT
 	select CRYPTO_CRYPTD
 	select CRYPTO_ALGAPI
-	select CRYPTO_FPU
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 
-- 
cgit v1.2.3


From e503f9e4b092e2349a9477a333543de8f3c7f5d9 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Fri, 22 Apr 2011 00:22:43 +0800
Subject: x86, apic: Fix spurious error interrupts triggering on all non-boot
 APs

This patch fixes a bug reported by a customer, who found
that many unreasonable error interrupts reported on all
non-boot CPUs (APs) during the system boot stage.

According to Chapter 10 of Intel Software Developer Manual
Volume 3A, Local APIC may signal an illegal vector error when
an LVT entry is set as an illegal vector value (0~15) under
FIXED delivery mode (bits 8-11 is 0), regardless of whether
the mask bit is set or an interrupt actually happen. These
errors are seen as error interrupts.

The initial value of thermal LVT entries on all APs always reads
0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
sequence to them and LVT registers are reset to 0s except for
the mask bits which are set to 1s when APs receive INIT IPI.

When the BIOS takes over the thermal throttling interrupt,
the LVT thermal deliver mode should be SMI and it is required
from the kernel to keep AP's LVT thermal monitoring register
programmed as such as well.

This issue happens when BIOS does not take over thermal throttling
interrupt, AP's LVT thermal monitor register will be restored to
0x10000 which means vector 0 and fixed deliver mode, so all APs will
signal illegal vector error interrupts.

This patch check if interrupt delivery mode is not fixed mode before
restoring AP's LVT thermal monitor register.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Yong Wang <yong.y.wang@intel.com>
Cc: hpa@linux.intel.com
Cc: joe@perches.com
Cc: jbaron@redhat.com
Cc: trenn@suse.de
Cc: kent.liu@intel.com
Cc: chaohong.guo@intel.com
Cc: <stable@kernel.org> # As far back as possible
Link: http://lkml.kernel.org/r/1303402963-17738-1-git-send-email-youquan.song@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/apicdef.h           |  1 +
 arch/x86/kernel/cpu/mcheck/therm_throt.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index d87988bacf3e..34595d5e1038 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -78,6 +78,7 @@
 #define		APIC_DEST_LOGICAL	0x00800
 #define		APIC_DEST_PHYSICAL	0x00000
 #define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_FIXED_MASK	0x00700
 #define		APIC_DM_LOWEST		0x00100
 #define		APIC_DM_SMI		0x00200
 #define		APIC_DM_REMRD		0x00300
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97f..0f034460260d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	 */
 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 
+	h = lvtthmr_init;
 	/*
 	 * The initial value of thermal LVT entries on all APs always reads
 	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
 	 * sequence to them and LVT registers are reset to 0s except for
 	 * the mask bits which are set to 1s when APs receive INIT IPI.
-	 * Always restore the value that BIOS has programmed on AP based on
-	 * BSP's info we saved since BIOS is always setting the same value
-	 * for all threads/cores
+	 * If BIOS takes over the thermal interrupt and sets its interrupt
+	 * delivery mode to SMI (not fixed), it restores the value that the
+	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
+	 * is always setting the same value for all threads/cores.
 	 */
-	apic_write(APIC_LVTTHMR, lvtthmr_init);
+	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
+		apic_write(APIC_LVTTHMR, lvtthmr_init);
 
-	h = lvtthmr_init;
 
 	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
 		printk(KERN_DEBUG
-- 
cgit v1.2.3


From 7c1bfd685bcdc822ab1d7411ea05c82bd2a7b260 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 16 May 2011 13:47:30 -0400
Subject: xen/pci: Fix compiler error when CONFIG_XEN_PRIVILEGED_GUEST is not
 set.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we have CONFIG_XEN and the other parameters to build an
Linux kernel that is non-privileged, the xen_[find|register|unregister]_
device_domain_owner functions should not be compiled. They should
use the nops defined in arch/x86/include/asm/xen/pci.h instead.

This fixes:

arch/x86/pci/xen.c:496: error: redefinition of ‘xen_find_device_domain_owner’
arch/x86/include/asm/xen/pci.h:25: note: previous definition of ‘xen_find_device_domain_owner’ was here
arch/x86/pci/xen.c:510: error: redefinition of ‘xen_register_device_domain_owner’
arch/x86/include/asm/xen/pci.h:29: note: previous definition of ‘xen_register_device_domain_owner’ was here
arch/x86/pci/xen.c:532: error: redefinition of ‘xen_unregister_device_domain_owner’
arch/x86/include/asm/xen/pci.h:34: note: previous definition of ‘xen_unregister_device_domain_owner’ was here

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 arch/x86/pci/xen.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 393981feb12f..8214724ce54d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -473,6 +473,7 @@ void __init xen_setup_pirqs(void)
 }
 #endif
 
+#ifdef CONFIG_XEN_DOM0
 struct xen_device_domain_owner {
 	domid_t domain;
 	struct pci_dev *dev;
@@ -545,3 +546,4 @@ int xen_unregister_device_domain_owner(struct pci_dev *dev)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
+#endif
-- 
cgit v1.2.3


From 600b776eb39a13a28b090ba9efceb0c69d4508aa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 16 May 2011 20:15:36 +0200
Subject: OMAP1 / PM: Use generic clock manipulation routines for runtime PM

Convert OMAP1 to using the new generic clock manipulation routines
and a device power domain for runtime PM instead of overriding the
platform bus type's runtime PM callbacks.  This allows us to simplify
OMAP1-specific code and to share some code with other platforms
(shmobile in particular).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Kevin Hilman <khilman@ti.com>
---
 arch/arm/mach-omap1/pm_bus.c | 69 ++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 6588c22b8a64..fe31d933f0ed 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -24,75 +24,50 @@
 #ifdef CONFIG_PM_RUNTIME
 static int omap1_pm_runtime_suspend(struct device *dev)
 {
-	struct clk *iclk, *fclk;
-	int ret = 0;
+	int ret;
 
 	dev_dbg(dev, "%s\n", __func__);
 
 	ret = pm_generic_runtime_suspend(dev);
+	if (ret)
+		return ret;
 
-	fclk = clk_get(dev, "fck");
-	if (!IS_ERR(fclk)) {
-		clk_disable(fclk);
-		clk_put(fclk);
-	}
-
-	iclk = clk_get(dev, "ick");
-	if (!IS_ERR(iclk)) {
-		clk_disable(iclk);
-		clk_put(iclk);
+	ret = pm_runtime_clk_suspend(dev);
+	if (ret) {
+		pm_generic_runtime_resume(dev);
+		return ret;
 	}
 
 	return 0;
-};
+}
 
 static int omap1_pm_runtime_resume(struct device *dev)
 {
-	struct clk *iclk, *fclk;
-
 	dev_dbg(dev, "%s\n", __func__);
 
-	iclk = clk_get(dev, "ick");
-	if (!IS_ERR(iclk)) {
-		clk_enable(iclk);
-		clk_put(iclk);
-	}
+	pm_runtime_clk_resume(dev);
+	return pm_generic_runtime_resume(dev);
+}
 
-	fclk = clk_get(dev, "fck");
-	if (!IS_ERR(fclk)) {
-		clk_enable(fclk);
-		clk_put(fclk);
-	}
+static struct dev_power_domain default_power_domain = {
+	.ops = {
+		.runtime_suspend = omap1_pm_runtime_suspend,
+		.runtime_resume = omap1_pm_runtime_resume,
+		USE_PLATFORM_PM_SLEEP_OPS
+	},
+};
 
-	return pm_generic_runtime_resume(dev);
+static struct pm_clk_notifier_block platform_bus_notifier = {
+	.pwr_domain = &default_power_domain,
+	.con_ids = { "ick", "fck", NULL, },
 };
 
 static int __init omap1_pm_runtime_init(void)
 {
-	const struct dev_pm_ops *pm;
-	struct dev_pm_ops *omap_pm;
-
 	if (!cpu_class_is_omap1())
 		return -ENODEV;
 
-	pm = platform_bus_get_pm_ops();
-	if (!pm) {
-		pr_err("%s: unable to get dev_pm_ops from platform_bus\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	omap_pm = kmemdup(pm, sizeof(struct dev_pm_ops), GFP_KERNEL);
-	if (!omap_pm) {
-		pr_err("%s: unable to alloc memory for new dev_pm_ops\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	omap_pm->runtime_suspend = omap1_pm_runtime_suspend;
-	omap_pm->runtime_resume = omap1_pm_runtime_resume;
-
-	platform_bus_set_pm_ops(omap_pm);
+	pm_runtime_clk_add_notifier(&platform_bus_type, &platform_bus_notifier);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 50e7534427283afd997d58481778c07bea79eb63 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 16 May 2011 15:39:46 +0200
Subject: x86, AMD, cacheinfo: Fix fallout caused by max3 conversion

732eacc0542d0aa48797f675888b85d6065af837 converted code around the
kernel using nested max() macros to use the new max3 macro but forgot to
remove the old line in intel_cacheinfo.c. Fix it.

Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Frank Arnold <farnold@amd64.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1305553188-21061-2-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1ce1af2899df..31590a001e2a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -327,7 +327,6 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 
-	l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-- 
cgit v1.2.3


From 42be450565b0fc4607fae3e3a7da038d367a23ed Mon Sep 17 00:00:00 2001
From: Frank Arnold <frank.arnold@amd.com>
Date: Mon, 16 May 2011 15:39:47 +0200
Subject: x86, AMD, cacheinfo: Fix L3 cache index disable checks

We provide two slots to disable cache indices, and have a check to
prevent both slots to be used for the same index.

If the user disables the same index on different subcaches, both slots
will hold the same index, e.g.

  $ echo 2047 > /sys/devices/system/cpu/cpu0/cache/index3/cache_disable_0
  $ cat /sys/devices/system/cpu/cpu0/cache/index3/cache_disable_0
  2047
  $ echo 1050623 > /sys/devices/system/cpu/cpu0/cache/index3/cache_disable_1
  $ cat /sys/devices/system/cpu/cpu0/cache/index3/cache_disable_1
  2047

due to the fact that the check was looking only at index bits [11:0]
and was ignoring writes to bits outside that range. The more correct
fix is to simply check whether the index is within the bounds of
[0..l3->indices].

While at it, cleanup comments and drop now-unused local macros.

Signed-off-by: Frank Arnold <frank.arnold@amd.com>
Link: http://lkml.kernel.org/r/1305553188-21061-3-git-send-email-bp@amd64.org
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/cpu/intel_cacheinfo.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 31590a001e2a..c105c533ed94 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -453,27 +453,16 @@ int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
 {
 	int ret = 0;
 
-#define SUBCACHE_MASK	(3UL << 20)
-#define SUBCACHE_INDEX	0xfff
-
-	/*
-	 * check whether this slot is already used or
-	 * the index is already disabled
-	 */
+	/*  check if @slot is already used or the index is already disabled */
 	ret = amd_get_l3_disable_slot(l3, slot);
 	if (ret >= 0)
 		return -EINVAL;
 
-	/*
-	 * check whether the other slot has disabled the
-	 * same index already
-	 */
-	if (index == amd_get_l3_disable_slot(l3, !slot))
+	if (index > l3->indices)
 		return -EINVAL;
 
-	/* do not allow writes outside of allowed bits */
-	if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
-	    ((index & SUBCACHE_INDEX) > l3->indices))
+	/* check whether the other slot has disabled the same index already */
+	if (index == amd_get_l3_disable_slot(l3, !slot))
 		return -EINVAL;
 
 	amd_l3_disable_index(l3, cpu, slot, index);
-- 
cgit v1.2.3


From 2895cd2ab81dfb7bc22637bc110857db44a30b4a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 13 Apr 2011 16:43:29 -0400
Subject: ftrace/x86: Do not trace .discard.text section

The section called .discard.text has tracing attached to it and is
currently ignored by ftrace. But it does include a call to the mcount
stub. Adding a notrace to the code keeps gcc from adding the useless
mcount caller to it.

Link: http://lkml.kernel.org/r/20110421023739.243651696@goodmis.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/setup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index db8aa19a08a2..647d8a06ce4f 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align);
  * executable.)
  */
 #define RESERVE_BRK(name,sz)						\
-	static void __section(.discard.text) __used			\
+	static void __section(.discard.text) __used notrace		\
 	__brk_reservation_fn_##name##__(void) {				\
 		asm volatile (						\
 			".pushsection .brk_reservation,\"aw\",@nobits;" \
-- 
cgit v1.2.3


From 521ccb5c4aece609311bfa7157910a8f0c942af5 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 10 May 2011 10:10:41 +0200
Subject: ftrace/x86: mcount offset calculation

Do the mcount offset adjustment in the recordmcount.pl/recordmcount.[ch]
at compile time and not in ftrace_call_adjust at run time.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h | 7 +++----
 scripts/recordmcount.c        | 2 ++
 scripts/recordmcount.pl       | 2 ++
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be0..268c783ab1c0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -38,11 +38,10 @@ extern void mcount(void);
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
-	 * call mcount is "e8 <4 byte offset>"
-	 * The addr points to the 4 byte offset and the caller of this
-	 * function wants the pointer to e8. Simply subtract one.
+	 * addr is the address of the mcount call instruction.
+	 * recordmcount does the necessary offset calculation.
 	 */
-	return addr - 1;
+	return addr;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 0e18975824f7..7648a5d11154 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -335,6 +335,7 @@ do_file(char const *const fname)
 		reltype = R_386_32;
 		make_nop = make_nop_x86;
 		ideal_nop = ideal_nop5_x86_32;
+		mcount_adjust_32 = -1;
 		break;
 	case EM_ARM:	 reltype = R_ARM_ABS32;
 			 altmcount = "__gnu_mcount_nc";
@@ -350,6 +351,7 @@ do_file(char const *const fname)
 		make_nop = make_nop_x86;
 		ideal_nop = ideal_nop5_x86_64;
 		reltype = R_X86_64_64;
+		mcount_adjust_64 = -1;
 		break;
 	}  /* end switch */
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index a871cd414055..414e7f5e42ec 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -223,6 +223,7 @@ if ($arch eq "x86_64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
     $type = ".quad";
     $alignment = 8;
+    $mcount_adjust = -1;
 
     # force flags for this arch
     $ld .= " -m elf_x86_64";
@@ -232,6 +233,7 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "i386") {
     $alignment = 4;
+    $mcount_adjust = -1;
 
     # force flags for this arch
     $ld .= " -m elf_i386";
-- 
cgit v1.2.3


From f29638868280534ed7e2fdd93b31557232597940 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 10 May 2011 10:10:43 +0200
Subject: ftrace/s390: mcount offset calculation

Do the mcount offset adjustment in the recordmcount.pl/recordmcount.[ch]
at compile time and not in ftrace_call_adjust at run time.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/s390/include/asm/ftrace.h | 4 +---
 scripts/recordmcount.c         | 8 ++++++--
 scripts/recordmcount.pl        | 2 ++
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 3c29be4836ed..b7931faaef6d 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,15 +11,13 @@ struct dyn_arch_ftrace { };
 
 #ifdef CONFIG_64BIT
 #define MCOUNT_INSN_SIZE  12
-#define MCOUNT_OFFSET	   8
 #else
 #define MCOUNT_INSN_SIZE  20
-#define MCOUNT_OFFSET	   4
 #endif
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
-	return addr - MCOUNT_OFFSET;
+	return addr;
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 7648a5d11154..ee52cb8e17ad 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -368,8 +368,10 @@ do_file(char const *const fname)
 				"unrecognized ET_REL file: %s\n", fname);
 			fail_file();
 		}
-		if (w2(ehdr->e_machine) == EM_S390)
+		if (w2(ehdr->e_machine) == EM_S390) {
 			reltype = R_390_32;
+			mcount_adjust_32 = -4;
+		}
 		if (w2(ehdr->e_machine) == EM_MIPS) {
 			reltype = R_MIPS_32;
 			is_fake_mcount32 = MIPS32_is_fake_mcount;
@@ -384,8 +386,10 @@ do_file(char const *const fname)
 				"unrecognized ET_REL file: %s\n", fname);
 			fail_file();
 		}
-		if (w2(ghdr->e_machine) == EM_S390)
+		if (w2(ghdr->e_machine) == EM_S390) {
 			reltype = R_390_64;
+			mcount_adjust_64 = -8;
+		}
 		if (w2(ghdr->e_machine) == EM_MIPS) {
 			reltype = R_MIPS_64;
 			Elf64_r_sym = MIPS64_r_sym;
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 414e7f5e42ec..858966ab019c 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -243,12 +243,14 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "s390" && $bits == 32) {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$";
+    $mcount_adjust = -4;
     $alignment = 4;
     $ld .= " -m elf_s390";
     $cc .= " -m31";
 
 } elsif ($arch eq "s390" && $bits == 64) {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$";
+    $mcount_adjust = -8;
     $alignment = 8;
     $type = ".quad";
     $ld .= " -m elf64_s390";
-- 
cgit v1.2.3


From 865be7a81071a77014c83cd01536c989eed362b4 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Mon, 16 May 2011 21:38:08 +0200
Subject: x86, cpu: Fix detection of Celeron Covington stepping A1 and B0

Steppings A1 and B0 of Celeron Covington are currently misdetected as
Pentium II (Dixon). Fix it by removing the stepping check.

[ hpa: this fixes this specific bug... the CPUID documentation
  specifies that the L2 cache size can disambiguate additional CPUs;
  this patch does not fix that. ]

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Link: http://lkml.kernel.org/r/201105162138.15416.linux@rainbow-software.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/cpu/intel.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index df86bc8c859d..32e86aa52743 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -400,12 +400,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 		switch (c->x86_model) {
 		case 5:
-			if (c->x86_mask == 0) {
-				if (l2 == 0)
-					p = "Celeron (Covington)";
-				else if (l2 == 256)
-					p = "Mobile Pentium II (Dixon)";
-			}
+			if (l2 == 0)
+				p = "Celeron (Covington)";
+			else if (l2 == 256)
+				p = "Mobile Pentium II (Dixon)";
 			break;
 
 		case 6:
-- 
cgit v1.2.3


From dc382fd5bcca7098a984705ed6ac880f539d068e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 16 May 2011 13:54:10 -0700
Subject: x86, mm: Allow ZONE_DMA to be configurable

ZONE_DMA is unnecessary for a large number of machines that do not
require less than 32-bit DMA addressing, e.g. ISA legacy DMA or PCI
cards with a restricted DMA address mask.

This patch allows users to disable ZONE_DMA for x86 if they know they
will not be using such devices with their kernel.

This prevents the VM from unnecessarily reserving a ratio of memory
(defaulting to 1/256th of system capacity) with lowmem_reserve_ratio
for such allocations when it will never be used.

Signed-off-by: David Rientjes <rientjes@google.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1105161353560.4353@chino.kir.corp.google.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig      | 9 ++++++++-
 arch/x86/mm/init_32.c | 2 ++
 arch/x86/mm/init_64.c | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 648fca42ae6a..0eb801a75dee 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,7 +112,14 @@ config MMU
 	def_bool y
 
 config ZONE_DMA
-	def_bool y
+	bool "DMA memory allocation support" if EXPERT
+	default y
+	help
+	  DMA memory allocation support allows devices with less than 32-bit
+	  addressing to allocate within the first 16MB of address space.
+	  Disable if no such devices will be used.
+
+	  If unsure, say Y.
 
 config SBUS
 	bool
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2cde0a34bed6..29f7c6d98179 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] =
 		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+#endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
 	max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0404bb3a077e..d865c4aeec55 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -616,7 +616,9 @@ void __init paging_init(void)
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+#ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+#endif
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-- 
cgit v1.2.3


From 328935e6348c6a7cb34798a68c326f4b8372e68a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 17 May 2011 14:55:18 +0200
Subject: Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E
 processors"

This reverts commit e20a2d205c05cef6b5783df339a7d54adeb50962, as it crashes
certain boxes with specific AMD CPU models.

Moving the lower endpoint of the Erratum 400 check to accomodate
earlier K8 revisions (A-E) opens a can of worms which is simply
not worth to fix properly by tweaking the errata checking
framework:

* missing IntPenging MSR on revisions < CG cause #GP:

http://marc.info/?l=linux-kernel&m=130541471818831

* makes earlier revisions use the LAPIC timer instead of the C1E
idle routine which switches to HPET, thus not waking up in
deeper C-states:

http://lkml.org/lkml/2011/4/24/20

Therefore, leave the original boundary starting with K8-revF.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bb9eb29a52dd..3532d3bf8105 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev);
  */
 
 const int amd_erratum_400[] =
-	AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf),
+	AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
 			    AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
 EXPORT_SYMBOL_GPL(amd_erratum_400);
 
-- 
cgit v1.2.3


From 14fb57dccb6e1defe9f89a66f548fcb24c374c1d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 17 May 2011 14:55:19 +0200
Subject: x86, AMD: Fix ARAT feature setting again

Trying to enable the local APIC timer on early K8 revisions
uncovers a number of other issues with it, in conjunction with
the C1E enter path on AMD. Fixing those causes much more churn
and troubles than the benefit of using that timer brings so
don't enable it on K8 at all, falling back to the original
functionality the kernel had wrt to that.

Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Cc: Boris Ostrovsky <Boris.Ostrovsky@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Hans Rosenfeld <hans.rosenfeld@amd.com>
Cc: Nick Bowler <nbowler@elliptictech.com>
Cc: Joerg-Volker-Peetz <jvpeetz@web.de>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1305636919-31165-3-git-send-email-bp@amd64.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 3532d3bf8105..6f9d1f6063e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #endif
 
 	/* As a rule processors have APIC timer running in deep C states */
-	if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
+	if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400))
 		set_cpu_cap(c, X86_FEATURE_ARAT);
 
 	/*
-- 
cgit v1.2.3


From 2494b030ba9334c7dd7df9b9f7abe4eacc950ec5 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 12:33:26 -0700
Subject: x86, cpufeature: Fix cpuid leaf 7 feature detection

CPUID leaf 7, subleaf 0 returns the maximum subleaf in EAX, not the
number of subleaves.  Since so far only subleaf 0 is defined (and only
the EBX bitfield) we do not need to qualify the test.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305660806-17519-1-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@kernel.org> 2.6.36..39
---
 arch/x86/kernel/cpu/common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2ced0074a45..173f3a3fa1a6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -565,8 +565,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
 
 		cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
 
-		if (eax > 0)
-			c->x86_capability[9] = ebx;
+		c->x86_capability[9] = ebx;
 	}
 
 	/* AMD-defined flags: level 0x80000001 */
-- 
cgit v1.2.3


From c3b0795c98c08351567464150db66d11e05d7611 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 10 May 2011 21:09:38 +0200
Subject: PM / ACPI: Remove acpi_sleep=s4_nonvs

acpi_sleep=s4_nonvs is superseded by acpi_sleep=nonvs, so remove it.

Signed-off-by: WANG Cong <amwang@redhat.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Len Brown <lenb@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/feature-removal-schedule.txt | 8 --------
 Documentation/kernel-parameters.txt        | 2 +-
 arch/x86/kernel/acpi/sleep.c               | 5 -----
 3 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'arch')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 492e81df2968..f6a24e8aa11e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -460,14 +460,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 
 ----------------------------
 
-What:	The acpi_sleep=s4_nonvs command line option
-When:	2.6.37
-Files:	arch/x86/kernel/acpi/sleep.c
-Why:	superseded by acpi_sleep=nonvs
-Who:	Rafael J. Wysocki <rjw@sisk.pl>
-
-----------------------------
-
 What: 	PCI DMA unmap state API
 When:	August 2012
 Why:	PCI DMA unmap state API (include/linux/pci-dma.h) was replaced
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cc85a9278190..259037b873b7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -245,7 +245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs, sci_force_enable }
+				  old_ordering, nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ff93bc1b09c3..18a857ba7a25 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -112,11 +112,6 @@ static int __init acpi_sleep_setup(char *str)
 #ifdef CONFIG_HIBERNATION
 		if (strncmp(str, "s4_nohwsig", 10) == 0)
 			acpi_no_s4_hw_signature();
-		if (strncmp(str, "s4_nonvs", 8) == 0) {
-			pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, "
-					"please use acpi_sleep=nonvs instead");
-			acpi_nvs_nosave();
-		}
 #endif
 		if (strncmp(str, "nonvs", 5) == 0)
 			acpi_nvs_nosave();
-- 
cgit v1.2.3


From c650da23d59d2c82307380414606774c6d49b8bd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 17 May 2011 23:25:10 +0200
Subject: PM: Remove CONFIG_PM_VERBOSE

Now that we have CONFIG_DYNAMIC_DEBUG there is no need for yet
another flag causing dev_dbg() and pr_debug() statements in the
core PM code to produce output.  Moreover, CONFIG_PM_VERBOSE
causes so much output to be generated that it's not really useful
and almost no one sets it.

References: https://bugzilla.kernel.org/show_bug.cgi?id=23182
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/sh/configs/apsh4ad0a_defconfig | 1 -
 arch/sh/configs/sdk7786_defconfig   | 1 -
 drivers/base/power/Makefile         | 3 +--
 kernel/power/Kconfig                | 6 ------
 4 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index e71a531f1e31..77ec0e7b8ddf 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -48,7 +48,6 @@ CONFIG_PREEMPT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_VERBOSE=y
 CONFIG_PM_RUNTIME=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index dc4a2eb6a616..c41650572d79 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -83,7 +83,6 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_VERBOSE=y
 CONFIG_PM_RUNTIME=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 06a7073f9027..3647e114d0e7 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -5,5 +5,4 @@ obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_OPP)	+= opp.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
 
-ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
-ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
+ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
\ No newline at end of file
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index d74ad4a90695..87f4d24b55b0 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -125,12 +125,6 @@ config PM_DEBUG
 	code. This is helpful when debugging and reporting PM bugs, like
 	suspend support.
 
-config PM_VERBOSE
-	bool "Verbose Power Management debugging"
-	depends on PM_DEBUG
-	---help---
-	This option enables verbose messages from the Power Management code.
-
 config PM_ADVANCED_DEBUG
 	bool "Extra PM attributes in sysfs for low-level debugging/testing"
 	depends on PM_DEBUG
-- 
cgit v1.2.3


From 724a92ee45c04cb9d82884a856b03b1e594d9de1 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:10 -0700
Subject: x86, cpufeature: Add CPU feature bit for enhanced REP MOVSB/STOSB

Intel processors are adding enhancements to REP MOVSB/STOSB and the use of
REP MOVSB/STOSB for optimal memcpy/memset or similar functions is recommended.
Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/
STOSB).

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-2-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 91f3e087cf21..7f2f7b123293 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -195,6 +195,7 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
-- 
cgit v1.2.3


From 161ec53c702ce9df2f439804dfb9331807066daa Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:11 -0700
Subject: x86, mem, intel: Initialize Enhanced REP MOVSB/STOSB

If kernel intends to use enhanced REP MOVSB/STOSB, it must ensure
IA32_MISC_ENABLE.Fast_String_Enable (bit 0) is set and CPUID.(EAX=07H, ECX=0H):
EBX[bit 9] also reports 1.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-3-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/cpu/intel.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index df86bc8c859d..fc73a34ba8c9 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,10 +29,10 @@
 
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
+	u64 misc_enable;
+
 	/* Unmask CPUID levels if masked: */
 	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
-		u64 misc_enable;
-
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 
 		if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
@@ -118,8 +118,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	 * (model 2) with the same problem.
 	 */
 	if (c->x86 == 15) {
-		u64 misc_enable;
-
 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
 
 		if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
@@ -130,6 +128,19 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 		}
 	}
 #endif
+
+	/*
+	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
+	 * clear the fast string and enhanced fast string CPU capabilities.
+	 */
+	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
+		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
+		if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
+			printk(KERN_INFO "Disabled fast string operations\n");
+			setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
+			setup_clear_cpu_cap(X86_FEATURE_ERMS);
+		}
+	}
 }
 
 #ifdef CONFIG_X86_32
-- 
cgit v1.2.3


From 509731336313b3799cf03071d72c64fa6383895e Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:12 -0700
Subject: x86, alternative, doc: Add comment for applying alternatives order

Some string operation functions may be patched twice, e.g. on enhanced REP MOVSB
/STOSB processors, memcpy is patched first by fast string alternative function,
then it is patched by enhanced REP MOVSB/STOSB alternative function.

Add comment for applying alternatives order to warn people who may change the
applying alternatives order for any reason.

[ Documentation-only patch ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-4-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/alternative.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4a234677e213..f4fe15ddcf94 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -210,6 +210,15 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
 	u8 insnbuf[MAX_PATCH_LEN];
 
 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+	/*
+	 * The scan order should be from start to end. A later scanned
+	 * alternative code can overwrite a previous scanned alternative code.
+	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+	 * patch code.
+	 *
+	 * So be careful if you want to change the scan order to any other
+	 * order.
+	 */
 	for (a = start; a < end; a++) {
 		u8 *instr = a->instr;
 		BUG_ON(a->replacementlen > a->instrlen);
-- 
cgit v1.2.3


From 9072d11da15a71e086eab3b5085184f2c1d06913 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:13 -0700
Subject: x86, alternative: Add altinstruction_entry macro

Add altinstruction_entry macro to generate .altinstructions section
entries from assembly code.  This should be less failure-prone than
open-coding.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-5-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/alternative-asm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index a63a68be1cce..94d420b360d1 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -15,4 +15,13 @@
 	.endm
 #endif
 
+.macro altinstruction_entry orig alt feature orig_len alt_len
+	.align 8
+	.quad \orig
+	.quad \alt
+	.word \feature
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
 #endif  /*  __ASSEMBLY__  */
-- 
cgit v1.2.3


From e365c9df2f2f001450decf9512412d2d5bd1cdef Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:14 -0700
Subject: x86, mem: clear_page_64.S: Support clear_page() with enhanced REP
 MOVSB/STOSB

Intel processors are adding enhancements to REP MOVSB/STOSB and the use of
REP MOVSB/STOSB for optimal memcpy/memset or similar functions is recommended.
Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/
STOSB).

Support clear_page() with rep stosb for processor supporting enhanced REP MOVSB
/STOSB. On processors supporting enhanced REP MOVSB/STOSB, the alternative
clear_page_c_e function using enhanced REP STOSB overrides the original function
and the fast string function.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-6-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/clear_page_64.S | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index aa4326bfb24a..f2145cfa12a6 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,6 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * Zero a page. 	
@@ -14,6 +15,15 @@ ENTRY(clear_page_c)
 	CFI_ENDPROC
 ENDPROC(clear_page_c)
 
+ENTRY(clear_page_c_e)
+	CFI_STARTPROC
+	movl $4096,%ecx
+	xorl %eax,%eax
+	rep stosb
+	ret
+	CFI_ENDPROC
+ENDPROC(clear_page_c_e)
+
 ENTRY(clear_page)
 	CFI_STARTPROC
 	xorl   %eax,%eax
@@ -38,21 +48,26 @@ ENTRY(clear_page)
 .Lclear_page_end:
 ENDPROC(clear_page)
 
-	/* Some CPUs run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
+	/*
+	 * Some CPUs support enhanced REP MOVSB/STOSB instructions.
+	 * It is recommended to use this when possible.
+	 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+	 * Otherwise, use original function.
+	 *
+	 */
 
 #include <asm/cpufeature.h>
 
 	.section .altinstr_replacement,"ax"
 1:	.byte 0xeb					/* jmp <disp8> */
 	.byte (clear_page_c - clear_page) - (2f - 1b)	/* offset */
-2:
+2:	.byte 0xeb					/* jmp <disp8> */
+	.byte (clear_page_c_e - clear_page) - (3f - 2b)	/* offset */
+3:
 	.previous
 	.section .altinstructions,"a"
-	.align 8
-	.quad clear_page
-	.quad 1b
-	.word X86_FEATURE_REP_GOOD
-	.byte .Lclear_page_end - clear_page
-	.byte 2b - 1b
+	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
+			     .Lclear_page_end-clear_page, 2b-1b
+	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
+			     .Lclear_page_end-clear_page,3b-2b
 	.previous
-- 
cgit v1.2.3


From 4307bec9344aed83f8107c3eb4285bd9d218fc10 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:15 -0700
Subject: x86, mem: copy_user_64.S: Support copy_to/from_user by enhanced REP
 MOVSB/STOSB

Support copy_to_user/copy_from_user() by enhanced REP MOVSB/STOSB.
On processors supporting enhanced REP MOVSB/STOSB, the alternative
copy_user_enhanced_fast_string function using enhanced rep movsb overrides the
original function and the fast string function.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-7-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/copy_user_64.S | 65 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 99e482615195..d17a1170bbf5 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,23 +15,30 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
-	.macro ALTERNATIVE_JUMP feature,orig,alt
+/*
+ * By placing feature2 after feature1 in altinstructions section, we logically
+ * implement:
+ * If CPU has feature2, jmp to alt2 is used
+ * else if CPU has feature1, jmp to alt1 is used
+ * else jmp to orig is used.
+ */
+	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
 0:
 	.byte 0xe9	/* 32bit jump */
 	.long \orig-1f	/* by default jump to orig */
 1:
 	.section .altinstr_replacement,"ax"
 2:	.byte 0xe9			/* near jump with 32bit immediate */
-	.long \alt-1b /* offset */   /* or alternatively to alt */
+	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
+3:	.byte 0xe9			/* near jump with 32bit immediate */
+	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
 	.previous
+
 	.section .altinstructions,"a"
-	.align 8
-	.quad  0b
-	.quad  2b
-	.word  \feature			/* when feature is set */
-	.byte  5
-	.byte  5
+	altinstruction_entry 0b,2b,\feature1,5,5
+	altinstruction_entry 0b,3b,\feature2,5,5
 	.previous
 	.endm
 
@@ -73,7 +80,9 @@ ENTRY(_copy_to_user)
 	jc bad_to_user
 	cmpq TI_addr_limit(%rax),%rcx
 	jae bad_to_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
+		copy_user_generic_unrolled,copy_user_generic_string,	\
+		copy_user_enhanced_fast_string
 	CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -86,7 +95,9 @@ ENTRY(_copy_from_user)
 	jc bad_from_user
 	cmpq TI_addr_limit(%rax),%rcx
 	jae bad_from_user
-	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
+	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
+		copy_user_generic_unrolled,copy_user_generic_string,	\
+		copy_user_enhanced_fast_string
 	CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
@@ -255,3 +266,37 @@ ENTRY(copy_user_generic_string)
 	.previous
 	CFI_ENDPROC
 ENDPROC(copy_user_generic_string)
+
+/*
+ * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
+ * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ENTRY(copy_user_enhanced_fast_string)
+	CFI_STARTPROC
+	andl %edx,%edx
+	jz 2f
+	movl %edx,%ecx
+1:	rep
+	movsb
+2:	xorl %eax,%eax
+	ret
+
+	.section .fixup,"ax"
+12:	movl %ecx,%edx		/* ecx is zerorest also */
+	jmp copy_user_handle_tail
+	.previous
+
+	.section __ex_table,"a"
+	.align 8
+	.quad 1b,12b
+	.previous
+	CFI_ENDPROC
+ENDPROC(copy_user_enhanced_fast_string)
-- 
cgit v1.2.3


From 101068c1f4a947ffa08f2782c78e40097300754d Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:16 -0700
Subject: x86, mem: memcpy_64.S: Optimize memcpy by enhanced REP MOVSB/STOSB

Support memcpy() with enhanced rep movsb. On processors supporting enhanced
rep movsb, the alternative memcpy() function using enhanced rep movsb overrides the original function and the fast string
function.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-8-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/memcpy_64.S | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 75ef61e35e38..daab21dae2d1 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,6 +4,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
+#include <asm/alternative-asm.h>
 
 /*
  * memcpy - Copy a memory block.
@@ -37,6 +38,23 @@
 .Lmemcpy_e:
 	.previous
 
+/*
+ * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
+ * memcpy_c. Use memcpy_c_e when possible.
+ *
+ * This gets patched over the unrolled variant (below) via the
+ * alternative instructions framework:
+ */
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemcpy_c_e:
+	movq %rdi, %rax
+
+	movl %edx, %ecx
+	rep movsb
+	ret
+.Lmemcpy_e_e:
+	.previous
+
 ENTRY(__memcpy)
 ENTRY(memcpy)
 	CFI_STARTPROC
@@ -171,21 +189,22 @@ ENDPROC(memcpy)
 ENDPROC(__memcpy)
 
 	/*
-	 * Some CPUs run faster using the string copy instructions.
-	 * It is also a lot simpler. Use this when possible:
-	 */
-
-	.section .altinstructions, "a"
-	.align 8
-	.quad memcpy
-	.quad .Lmemcpy_c
-	.word X86_FEATURE_REP_GOOD
-
-	/*
+	 * Some CPUs are adding enhanced REP MOVSB/STOSB feature
+	 * If the feature is supported, memcpy_c_e() is the first choice.
+	 * If enhanced rep movsb copy is not available, use fast string copy
+	 * memcpy_c() when possible. This is faster and code is simpler than
+	 * original memcpy().
+	 * Otherwise, original memcpy() is used.
+	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
+         * feature to implement the right patch order.
+	 *
 	 * Replace only beginning, memcpy is used to apply alternatives,
 	 * so it is silly to overwrite itself with nops - reboot is the
 	 * only outcome...
 	 */
-	.byte .Lmemcpy_e - .Lmemcpy_c
-	.byte .Lmemcpy_e - .Lmemcpy_c
+	.section .altinstructions, "a"
+	altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
+			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
+	altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
+			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
 	.previous
-- 
cgit v1.2.3


From 057e05c1d6440117875f455e59da8691e08f65d5 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:17 -0700
Subject: x86, mem: memmove_64.S: Optimize memmove by enhanced REP MOVSB/STOSB

Support memmove() by enhanced rep movsb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memmove() function using enhanced rep movsb
overrides the original function.

The patch doesn't change the backward memmove case to use enhanced rep
movsb.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-9-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/memmove_64.S | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 0ecb8433e5a8..d0ec9c2936d7 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
 #define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 
 #undef memmove
 
@@ -24,6 +25,7 @@
  */
 ENTRY(memmove)
 	CFI_STARTPROC
+
 	/* Handle more 32bytes in loop */
 	mov %rdi, %rax
 	cmp $0x20, %rdx
@@ -31,8 +33,13 @@ ENTRY(memmove)
 
 	/* Decide forward/backward copy mode */
 	cmp %rdi, %rsi
-	jb	2f
+	jge .Lmemmove_begin_forward
+	mov %rsi, %r8
+	add %rdx, %r8
+	cmp %rdi, %r8
+	jg 2f
 
+.Lmemmove_begin_forward:
 	/*
 	 * movsq instruction have many startup latency
 	 * so we handle small size by general register.
@@ -78,6 +85,8 @@ ENTRY(memmove)
 	rep movsq
 	movq %r11, (%r10)
 	jmp 13f
+.Lmemmove_end_forward:
+
 	/*
 	 * Handle data backward by movsq.
 	 */
@@ -194,4 +203,22 @@ ENTRY(memmove)
 13:
 	retq
 	CFI_ENDPROC
+
+	.section .altinstr_replacement,"ax"
+.Lmemmove_begin_forward_efs:
+	/* Forward moving data. */
+	movq %rdx, %rcx
+	rep movsb
+	retq
+.Lmemmove_end_forward_efs:
+	.previous
+
+	.section .altinstructions,"a"
+	.align 8
+	.quad .Lmemmove_begin_forward
+	.quad .Lmemmove_begin_forward_efs
+	.word X86_FEATURE_ERMS
+	.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
+	.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
+	.previous
 ENDPROC(memmove)
-- 
cgit v1.2.3


From 2f19e06ac30771c7cb96fd61d8aeacfa74dac21c Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 15:29:18 -0700
Subject: x86, mem: memset_64.S: Optimize memset by enhanced REP MOVSB/STOSB

Support memset() with enhanced rep stosb. On processors supporting enhanced
REP MOVSB/STOSB, the alternative memset_c_e function using enhanced rep stosb
overrides the fast string alternative memset_c and the original function.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1305671358-14478-10-git-send-email-fenghua.yu@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/lib/memset_64.S | 54 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 09d344269652..79bd454b78a3 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -2,9 +2,13 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 /*
- * ISO C memset - set a memory block to a byte value.
+ * ISO C memset - set a memory block to a byte value. This function uses fast
+ * string to get better performance than the original function. The code is
+ * simpler and shorter than the orignal function as well.
  *	
  * rdi   destination
  * rsi   value (char) 
@@ -31,6 +35,28 @@
 .Lmemset_e:
 	.previous
 
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses
+ * enhanced rep stosb to override the fast string function.
+ * The code is simpler and shorter than the fast string function as well.
+ *
+ * rdi   destination
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
+ * rax   original destination
+ */
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemset_c_e:
+	movq %rdi,%r9
+	movb %sil,%al
+	movl %edx,%ecx
+	rep stosb
+	movq %r9,%rax
+	ret
+.Lmemset_e_e:
+	.previous
+
 ENTRY(memset)
 ENTRY(__memset)
 	CFI_STARTPROC
@@ -112,16 +138,20 @@ ENTRY(__memset)
 ENDPROC(memset)
 ENDPROC(__memset)
 
-	/* Some CPUs run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
+	/* Some CPUs support enhanced REP MOVSB/STOSB feature.
+	 * It is recommended to use this when possible.
+	 *
+	 * If enhanced REP MOVSB/STOSB feature is not available, use fast string
+	 * instructions.
+	 *
+	 * Otherwise, use original memset function.
+	 *
+	 * In .altinstructions section, ERMS feature is placed after REG_GOOD
+         * feature to implement the right patch order.
+	 */
 	.section .altinstructions,"a"
-	.align 8
-	.quad memset
-	.quad .Lmemset_c
-	.word X86_FEATURE_REP_GOOD
-	.byte .Lfinal - memset
-	.byte .Lmemset_e - .Lmemset_c
+	altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
+			     .Lfinal-memset,.Lmemset_e-.Lmemset_c
+	altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
+			     .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
 	.previous
-- 
cgit v1.2.3


From 9bed4aca296fdf9c1b85a8f093e92018dc9864f3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 18 May 2011 09:03:34 +1000
Subject: crypto: aesni-intel - fix aesni build on i386

Fix build error on i386 by moving function prototypes:

arch/x86/crypto/aesni-intel_glue.c: In function 'aesni_init':
arch/x86/crypto/aesni-intel_glue.c:1263: error: implicit declaration of function 'crypto_fpu_init'
arch/x86/crypto/aesni-intel_glue.c: In function 'aesni_exit':
arch/x86/crypto/aesni-intel_glue.c:1373: error: implicit declaration of function 'crypto_fpu_exit'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index cbc60ef359bc..feee8ff1d05e 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -94,6 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
+
+int crypto_fpu_init(void);
+void crypto_fpu_exit(void);
+
 #ifdef CONFIG_X86_64
 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
@@ -140,9 +144,6 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
-int crypto_fpu_init(void);
-void crypto_fpu_exit(void);
-
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 {
-- 
cgit v1.2.3


From d0281a257f370b09c410e466571858b4e12869c9 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 18:44:26 -0700
Subject: x86, cpufeature: Add cpufeature flag for SMEP

Add support for newly documented SMEP (Supervisor Mode Execution Protection) CPU
feature flag.

SMEP prevents the CPU in kernel-mode to jump to an executable page
that has the user flag set in the PTE.  This prevents the kernel from
executing user-space code accidentally or maliciously, so it for
example prevents kernel exploits from jumping to specially prepared
user-mode shell code.

[ hpa: added better description by Ingo Molnar ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
LKML-Reference: <1305683069-25394-2-git-send-email-fenghua.yu@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/cpufeature.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7f2f7b123293..8808cdb96c3f 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -195,6 +195,7 @@
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
 #define X86_FEATURE_FSGSBASE	(9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_SMEP	(9*32+ 7) /* Supervisor Mode Execution Protection */
 #define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-- 
cgit v1.2.3


From dc23c0bccf5eea171c87b3db285d032b9a5f06c4 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 17 May 2011 18:44:27 -0700
Subject: x86, cpu: Add SMEP CPU feature in CR4

Add support for newly documented SMEP (Supervisor Mode Execution Protection)
CPU feature in CR4.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
LKML-Reference: <1305683069-25394-3-git-send-email-fenghua.yu@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/processor-flags.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index a898a2b6e10c..59ab4dffa377 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -60,6 +60,7 @@
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
 #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
+#define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
 
 /*
  * x86-64 Task Priority Register, CR8
-- 
cgit v1.2.3


From de5397ad5b9ad22e2401c4dacdf1bb3b19c05679 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Wed, 11 May 2011 16:51:05 -0700
Subject: x86, cpu: Enable/disable Supervisor Mode Execution Protection

Enable/disable newly documented SMEP (Supervisor Mode Execution Protection) CPU
feature in kernel. CR4.SMEP (bit 20) is 0 at power-on. If the feature is
supported by CPU (X86_FEATURE_SMEP), enable SMEP by setting CR4.SMEP. New kernel
option nosmep disables the feature even if the feature is supported by CPU.

[ hpa: moved the call to setup_smep() until after the vendor-specific
  initialization; that ensures that CPUID features are unmasked.  We
  will still run it before we have userspace (never mind uncontrolled
  userspace). ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
LKML-Reference: <1305157865-31727-1-git-send-email-fenghua.yu@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  4 ++++
 arch/x86/kernel/cpu/common.c        | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'arch')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cc85a9278190..56fb8c16e20b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1664,6 +1664,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			noexec=on: enable non-executable mappings (default)
 			noexec=off: disable non-executable mappings
 
+	nosmep		[X86]
+			Disable SMEP (Supervisor Mode Execution Protection)
+			even if it is supported by processor.
+
 	noexec32	[X86-64]
 			This affects only 32-bit executables.
 			noexec32=on: enable non-executable mappings (default)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 173f3a3fa1a6..cbc70a27430c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -254,6 +254,25 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
+static int disable_smep __initdata;
+static __init int setup_disable_smep(char *arg)
+{
+	disable_smep = 1;
+	return 1;
+}
+__setup("nosmep", setup_disable_smep);
+
+static __init void setup_smep(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_SMEP)) {
+		if (unlikely(disable_smep)) {
+			setup_clear_cpu_cap(X86_FEATURE_SMEP);
+			clear_in_cr4(X86_CR4_SMEP);
+		} else
+			set_in_cr4(X86_CR4_SMEP);
+	}
+}
+
 /*
  * Some CPU features depend on higher CPUID levels, which may not always
  * be available due to CPUID level capping or broken virtualization
@@ -667,6 +686,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	c->cpu_index = 0;
 #endif
 	filter_cpuid_features(c, false);
+
+	setup_smep(c);
 }
 
 void __init early_cpu_init(void)
@@ -752,6 +773,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 #endif
 	}
 
+	setup_smep(c);
+
 	get_model_name(c); /* Default name */
 
 	detect_nopl(c);
-- 
cgit v1.2.3


From b2db21997f43907f54500edaf063253ca2a186f9 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 17 May 2011 15:44:11 -0700
Subject: um: fix abort

os_dump_core() uses abort() to terminate UML in case of an fatal error.

glibc's abort() calls raise(SIGABRT) which makes use of tgkill().
tgkill() has no effect within UML's kernel threads because they are not
pthreads.  As fallback abort() executes an invalid instruction to
terminate the process.  Therefore UML gets killed by SIGSEGV and leaves a
ugly log entry in the host's kernel ring buffer.

To get rid of this we use our own abort routine.

Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/os-Linux/util.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 6ea77979531c..42827cafa6af 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -5,6 +5,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <string.h>
@@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len)
 		 host.release, host.version, host.machine);
 }
 
+/*
+ * We cannot use glibc's abort(). It makes use of tgkill() which
+ * has no effect within UML's kernel threads.
+ * After that glibc would execute an invalid instruction to kill
+ * the calling process and UML crashes with SIGSEGV.
+ */
+static inline void __attribute__ ((noreturn)) uml_abort(void)
+{
+	sigset_t sig;
+
+	fflush(NULL);
+
+	if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT))
+		sigprocmask(SIG_UNBLOCK, &sig, 0);
+
+	for (;;)
+		if (kill(getpid(), SIGABRT) < 0)
+			exit(127);
+}
+
 void os_dump_core(void)
 {
 	int pid;
@@ -116,5 +137,5 @@ void os_dump_core(void)
 	while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0)
 		os_kill_ptraced_process(pid, 0);
 
-	abort();
+	uml_abort();
 }
-- 
cgit v1.2.3


From 26afb7c661080ae3f1f13ddf7f0c58c4f931c22b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 12 May 2011 16:30:30 +0200
Subject: x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace
 address limit

As reported in BZ #30352:

  https://bugzilla.kernel.org/show_bug.cgi?id=30352

there's a kernel bug related to reading the last allowed page on x86_64.

The _copy_to_user() and _copy_from_user() functions use the following
check for address limit:

  if (buf + size >= limit)
	fail();

while it should be more permissive:

  if (buf + size > limit)
	fail();

That's because the size represents the number of bytes being
read/write from/to buf address AND including the buf address.
So the copy function will actually never touch the limit
address even if "buf + size == limit".

Following program fails to use the last page as buffer
due to the wrong limit check:

 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <assert.h>

 #define PAGE_SIZE       (4096)
 #define LAST_PAGE       ((void*)(0x7fffffffe000))

 int main()
 {
        int fds[2], err;
        void * ptr = mmap(LAST_PAGE, PAGE_SIZE, PROT_READ | PROT_WRITE,
                          MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
        assert(ptr == LAST_PAGE);
        err = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
        assert(err == 0);
        err = send(fds[0], ptr, PAGE_SIZE, 0);
        perror("send");
        assert(err == PAGE_SIZE);
        err = recv(fds[1], ptr, PAGE_SIZE, MSG_WAITALL);
        perror("recv");
        assert(err == PAGE_SIZE);
        return 0;
 }

The other place checking the addr limit is the access_ok() function,
which is working properly. There's just a misleading comment
for the __range_not_ok() macro - which this patch fixes as well.

The last page of the user-space address range is a guard page and
Brian Gerst observed that the guard page itself due to an erratum on K8 cpus
(#121 Sequential Execution Across Non-Canonical Boundary Causes Processor
Hang).

However, the test code is using the last valid page before the guard page.
The bug is that the last byte before the guard page can't be read
because of the off-by-one error. The guard page is left in place.

This bug would normally not show up because the last page is
part of the process stack and never accessed via syscalls.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1305210630-7136-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess.h | 2 +-
 arch/x86/lib/copy_user_64.S    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index abd3e0ea762a..99f0ad753f32 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -42,7 +42,7 @@
  * Returns 0 if the range is valid, nonzero otherwise.
  *
  * This is equivalent to the following test:
- * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64)
+ * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64)
  *
  * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
  */
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index d17a1170bbf5..024840266ba0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -79,7 +79,7 @@ ENTRY(_copy_to_user)
 	addq %rdx,%rcx
 	jc bad_to_user
 	cmpq TI_addr_limit(%rax),%rcx
-	jae bad_to_user
+	ja bad_to_user
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
 		copy_user_generic_unrolled,copy_user_generic_string,	\
 		copy_user_enhanced_fast_string
@@ -94,7 +94,7 @@ ENTRY(_copy_from_user)
 	addq %rdx,%rcx
 	jc bad_from_user
 	cmpq TI_addr_limit(%rax),%rcx
-	jae bad_from_user
+	ja bad_from_user
 	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
 		copy_user_generic_unrolled,copy_user_generic_string,	\
 		copy_user_enhanced_fast_string
-- 
cgit v1.2.3


From 3436830af53c38b7674097c00b02b7a4064476f2 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 12 May 2011 13:55:48 +0100
Subject: MIPS: RB532: Fix iomap resource size miscalculation.

This is the MIPS portion of Joe Perches <joe@perches.com>'s
https://patchwork.linux-mips.org/patch/2172/ which seems to have been
lost in time and space.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/rb532/gpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index 37de05d595e7..6c47dfeb7be3 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -185,7 +185,7 @@ int __init rb532_gpio_init(void)
 	struct resource *r;
 
 	r = rb532_gpio_reg0_res;
-	rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start);
+	rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r));
 
 	if (!rb532_gpio_chip->regbase) {
 		printk(KERN_ERR "rb532: cannot remap GPIO register 0\n");
-- 
cgit v1.2.3


From 10423c91ffc8e59d4f99d401f7beb3115cdc117a Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 13 May 2011 10:33:28 +0100
Subject: MIPS: Fix duplicate invocation of notify_die.

Initial patch by Yury Polyanskiy <ypolyans@princeton.edu>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Patchwork: https://patchwork.linux-mips.org/patch/2373/
---
 arch/mips/kernel/traps.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 71350f7f2d88..e9b3af27d844 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	unsigned long dvpret = dvpe();
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-	notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV);
+	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
+		sig = 0;
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
@@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	mips_mt_regdump(dvpret);
 #endif /* CONFIG_MIPS_MT_SMTC */
 
-	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
-		sig = 0;
-
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
 	add_taint(TAINT_DIE);
-- 
cgit v1.2.3


From 3e9957b4866f3767f19bf0e543b322ad7906c564 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Fri, 13 May 2011 17:41:21 +0200
Subject: MIPS: AR7: Fix GPIO register size for Titan variant.

The 'size' variable contains the correct register size for both AR7
and Titan, but we never used it to ioremap the correct register size.
This problem only shows up on Titan.

[ralf@linux-mips.org: Fixed the fix.  The original patch as in patchwork
recognizes the problem correctly then fails to fix it ...]

Reported-by: Alexander Clouter <alex@digriz.org.uk>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Patchwork: https://patchwork.linux-mips.org/patch/2380/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/ar7/gpio.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index 425dfa5d6e12..bb571bcdb8f2 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -325,9 +325,7 @@ int __init ar7_gpio_init(void)
 		size = 0x1f;
 	}
 
-	gpch->regs = ioremap_nocache(AR7_REGS_GPIO,
-					AR7_REGS_GPIO + 0x10);
-
+	gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size);
 	if (!gpch->regs) {
 		printk(KERN_ERR "%s: failed to ioremap regs\n",
 					gpch->chip.label);
-- 
cgit v1.2.3


From a5602a3273774c720aaf165ff670e5b85e5910a5 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 18 May 2011 13:14:36 +0100
Subject: MIPS: Kludge IP27 build for 2.6.39.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/dma-mapping.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 655f849bd08d..7aa37ddfca4b 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -5,7 +5,9 @@
 #include <asm/cache.h>
 #include <asm-generic/dma-coherent.h>
 
+#ifndef CONFIG_SGI_IP27	/* Kludge to fix 2.6.39 build for IP27 */
 #include <dma-coherence.h>
+#endif
 
 extern struct dma_map_ops *mips_dma_map_ops;
 
-- 
cgit v1.2.3


From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 18 May 2011 11:19:24 -0600
Subject: drivercore: revert addition of of_match to struct device

Commit b826291c, "drivercore/dt: add a match table pointer to struct
device" added an of_match pointer to struct device to cache the
of_match_table entry discovered at driver match time.  This was unsafe
because matching is not an atomic operation with probing a driver.  If
two or more drivers are attempted to be matched to a driver at the
same time, then the cached matching entry pointer could get
overwritten.

This patch reverts the of_match cache pointer and reworks all users to
call of_match_device() directly instead.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/83xx/suspend.c      |  7 +++++--
 arch/powerpc/sysdev/fsl_msi.c              |  7 +++++--
 arch/sparc/kernel/pci_sabre.c              |  5 ++++-
 arch/sparc/kernel/pci_schizo.c             |  8 ++++++--
 drivers/atm/fore200e.c                     |  7 +++++--
 drivers/char/hw_random/n2-drv.c            |  7 +++++--
 drivers/char/ipmi/ipmi_si_intf.c           |  7 +++++--
 drivers/char/xilinx_hwicap/xilinx_hwicap.c | 14 +++++++++-----
 drivers/edac/ppc4xx_edac.c                 |  2 +-
 drivers/i2c/busses/i2c-mpc.c               |  9 ++++++---
 drivers/mmc/host/sdhci-of-core.c           |  7 +++++--
 drivers/mtd/maps/physmap_of.c              |  7 +++++--
 drivers/net/can/mscan/mpc5xxx_can.c        |  7 +++++--
 drivers/net/fs_enet/fs_enet-main.c         |  9 ++++++---
 drivers/net/fs_enet/mii-fec.c              |  7 +++++--
 drivers/net/sunhme.c                       |  7 +++++--
 drivers/scsi/qlogicpti.c                   |  7 +++++--
 drivers/tty/serial/of_serial.c             |  7 +++++--
 drivers/usb/gadget/fsl_qe_udc.c            |  7 +++++--
 drivers/watchdog/mpc8xxx_wdt.c             |  7 +++++--
 include/linux/device.h                     |  1 -
 include/linux/of_device.h                  | 12 ++++++------
 22 files changed, 108 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 188272934cfb..104faa8aa23c 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
 	.end = mpc83xx_suspend_end,
 };
 
+static struct of_device_id pmc_match[];
 static int pmc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource res;
 	struct pmc_type *type;
 	int ret = 0;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(pmc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
-	type = ofdev->dev.of_match->data;
+	type = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20f..01cd2f089512 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi,
 	return 0;
 }
 
+static const struct of_device_id fsl_of_msi_ids[];
 static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 {
+	const struct of_device_id *match;
 	struct fsl_msi *msi;
 	struct resource res;
 	int err, i, j, irq_index, count;
@@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 	u32 offset;
 	static const u32 all_avail[] = { 0, NR_MSI_IRQS };
 
-	if (!dev->dev.of_match)
+	match = of_match_device(fsl_of_msi_ids, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	features = dev->dev.of_match->data;
+	features = match->data;
 
 	printk(KERN_DEBUG "Setting up Freescale MSI support\n");
 
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
index 948068a083fc..d1840dbdaa2f 100644
--- a/arch/sparc/kernel/pci_sabre.c
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm,
 	sabre_scan_bus(pbm, &op->dev);
 }
 
+static const struct of_device_id sabre_match[];
 static int __devinit sabre_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct linux_prom64_registers *pr_regs;
 	struct device_node *dp = op->dev.of_node;
 	struct pci_pbm_info *pbm;
@@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op)
 	const u32 *vdma;
 	u64 clear_irq;
 
-	hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL);
+	match = of_match_device(sabre_match, &op->dev);
+	hummingbird_p = match && (match->data != NULL);
 	if (!hummingbird_p) {
 		struct device_node *cpu_dp;
 
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index fecfcb2063c8..283fbc329a43 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -1458,11 +1458,15 @@ out_err:
 	return err;
 }
 
+static const struct of_device_id schizo_match[];
 static int __devinit schizo_probe(struct platform_device *op)
 {
-	if (!op->dev.of_match)
+	const struct of_device_id *match;
+
+	match = of_match_device(schizo_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	return __schizo_init(op, (unsigned long) op->dev.of_match->data);
+	return __schizo_init(op, (unsigned long)match->data);
 }
 
 /* The ordering of this table is very important.  Some Tomatillo
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index bdd2719f3f68..bc9e702186dd 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent)
 }
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id fore200e_sba_match[];
 static int __devinit fore200e_sba_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	const struct fore200e_bus *bus;
 	struct fore200e *fore200e;
 	static int index = 0;
 	int err;
 
-	if (!op->dev.of_match)
+	match = of_match_device(fore200e_sba_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	bus = op->dev.of_match->data;
+	bus = match->data;
 
 	fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL);
 	if (!fore200e)
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 43ac61978d8b..ac6739e085e3 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void)
 		pr_info("%s", version);
 }
 
+static const struct of_device_id n2rng_match[];
 static int __devinit n2rng_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	int victoria_falls;
 	int err = -ENOMEM;
 	struct n2rng *np;
 
-	if (!op->dev.of_match)
+	match = of_match_device(n2rng_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	victoria_falls = (op->dev.of_match->data != NULL);
+	victoria_falls = (match->data != NULL);
 
 	n2rng_driver_version();
 	np = kzalloc(sizeof(*np), GFP_KERNEL);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index cc6c9b2546a3..64c6b8530615 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
+static struct of_device_id ipmi_match[];
 static int __devinit ipmi_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_OF
+	const struct of_device_id *match;
 	struct smi_info *info;
 	struct resource resource;
 	const __be32 *regsize, *regspacing, *regshift;
@@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 
 	dev_info(&dev->dev, "probing via device tree\n");
 
-	if (!dev->dev.of_match)
+	match = of_match_device(ipmi_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
 
 	ret = of_address_to_resource(np, 0, &resource);
@@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev)
 		return -ENOMEM;
 	}
 
-	info->si_type		= (enum si_type) dev->dev.of_match->data;
+	info->si_type		= (enum si_type) match->data;
 	info->addr_source	= SI_DEVICETREE;
 	info->irq_setup		= std_irq_setup;
 
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index d6412c16385f..39ccdeada791 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev)
 }
 
 #ifdef CONFIG_OF
-static int __devinit hwicap_of_probe(struct platform_device *op)
+static int __devinit hwicap_of_probe(struct platform_device *op,
+				     const struct hwicap_driver_config *config)
 {
 	struct resource res;
 	const unsigned int *id;
 	const char *family;
 	int rc;
-	const struct hwicap_driver_config *config = op->dev.of_match->data;
 	const struct config_registers *regs;
 
 
@@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op)
 			regs);
 }
 #else
-static inline int hwicap_of_probe(struct platform_device *op)
+static inline int hwicap_of_probe(struct platform_device *op,
+				  const struct hwicap_driver_config *config)
 {
 	return -EINVAL;
 }
 #endif /* CONFIG_OF */
 
+static const struct of_device_id __devinitconst hwicap_of_match[];
 static int __devinit hwicap_drv_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *match;
 	struct resource *res;
 	const struct config_registers *regs;
 	const char *family;
 
-	if (pdev->dev.of_match)
-		return hwicap_of_probe(pdev);
+	match = of_match_device(hwicap_of_match, &pdev->dev);
+	if (match)
+		return hwicap_of_probe(pdev, match->data);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index c1f0045ceb8e..af8e7b1aa290 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
 	struct ppc4xx_edac_pdata *pdata = NULL;
 	const struct device_node *np = op->dev.of_node;
 
-	if (op->dev.of_match == NULL)
+	if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL)
 		return -EINVAL;
 
 	/* Initial driver pointers and private data */
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 75b984c519ac..107397a606b4 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = {
 	.timeout = HZ,
 };
 
+static const struct of_device_id mpc_i2c_of_match[];
 static int __devinit fsl_i2c_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct mpc_i2c *i2c;
 	const u32 *prop;
 	u32 clock = MPC_I2C_CLOCK_LEGACY;
 	int result = 0;
 	int plen;
 
-	if (!op->dev.of_match)
+	match = of_match_device(mpc_i2c_of_match, &op->dev);
+	if (!match)
 		return -EINVAL;
 
 	i2c = kzalloc(sizeof(*i2c), GFP_KERNEL);
@@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op)
 			clock = *prop;
 	}
 
-	if (op->dev.of_match->data) {
-		struct mpc_i2c_data *data = op->dev.of_match->data;
+	if (match->data) {
+		struct mpc_i2c_data *data = match->data;
 		data->setup(op->dev.of_node, i2c, clock, data->prescaler);
 	} else {
 		/* Backwards compatibility */
diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c
index f9b611fc773e..60e4186a4345 100644
--- a/drivers/mmc/host/sdhci-of-core.c
+++ b/drivers/mmc/host/sdhci-of-core.c
@@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
 #endif
 }
 
+static const struct of_device_id sdhci_of_match[];
 static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct sdhci_of_data *sdhci_of_data;
 	struct sdhci_host *host;
@@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev)
 	int size;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(sdhci_of_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	sdhci_of_data = ofdev->dev.of_match->data;
+	sdhci_of_data = match->data;
 
 	if (!of_device_is_available(np))
 		return -ENODEV;
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index bd483f0c57e1..c1d33464aee8 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes)
 }
 #endif
 
+static struct of_device_id of_flash_match[];
 static int __devinit of_flash_probe(struct platform_device *dev)
 {
 #ifdef CONFIG_MTD_PARTITIONS
 	const char **part_probe_types;
 #endif
+	const struct of_device_id *match;
 	struct device_node *dp = dev->dev.of_node;
 	struct resource res;
 	struct of_flash *info;
@@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev)
 	struct mtd_info **mtd_list = NULL;
 	resource_size_t res_size;
 
-	if (!dev->dev.of_match)
+	match = of_match_device(of_flash_match, &dev->dev);
+	if (!match)
 		return -EINVAL;
-	probe_type = dev->dev.of_match->data;
+	probe_type = match->data;
 
 	reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32);
 
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index bd1d811c204f..5fedc3375562 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev,
 }
 #endif /* CONFIG_PPC_MPC512x */
 
+static struct of_device_id mpc5xxx_can_table[];
 static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct mpc5xxx_can_data *data;
 	struct device_node *np = ofdev->dev.of_node;
 	struct net_device *dev;
@@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev)
 	int irq, mscan_clksrc = 0;
 	int err = -ENOMEM;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data;
+	data = match->data;
 
 	base = of_iomap(np, 0);
 	if (!base) {
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 24cb953900dd..5131e61c358c 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -998,8 +998,10 @@ static const struct net_device_ops fs_enet_netdev_ops = {
 #endif
 };
 
+static struct of_device_id fs_enet_match[];
 static int __devinit fs_enet_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct net_device *ndev;
 	struct fs_enet_private *fep;
 	struct fs_platform_info *fpi;
@@ -1007,14 +1009,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	const u8 *mac_addr;
 	int privsize, len, ret = -ENODEV;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	fpi = kzalloc(sizeof(*fpi), GFP_KERNEL);
 	if (!fpi)
 		return -ENOMEM;
 
-	if (!IS_FEC(ofdev->dev.of_match)) {
+	if (!IS_FEC(match)) {
 		data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len);
 		if (!data || len != 4)
 			goto out_free_fpi;
@@ -1049,7 +1052,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev)
 	fep->dev = &ofdev->dev;
 	fep->ndev = ndev;
 	fep->fpi = fpi;
-	fep->ops = ofdev->dev.of_match->data;
+	fep->ops = match->data;
 
 	ret = fep->ops->setup_data(ndev);
 	if (ret)
diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
index 7e840d373ab3..6a2e150e75bb 100644
--- a/drivers/net/fs_enet/mii-fec.c
+++ b/drivers/net/fs_enet/mii-fec.c
@@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus)
 	return 0;
 }
 
+static struct of_device_id fs_enet_mdio_fec_match[];
 static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct resource res;
 	struct mii_bus *new_bus;
 	struct fec_info *fec;
 	int (*get_bus_freq)(struct device_node *);
 	int ret = -ENOMEM, clock, speed;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	get_bus_freq = ofdev->dev.of_match->data;
+	get_bus_freq = match->data;
 
 	new_bus = mdiobus_alloc();
 	if (!new_bus)
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index eb4f59fb01e9..bff2f7999ff0 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -3237,15 +3237,18 @@ static void happy_meal_pci_exit(void)
 #endif
 
 #ifdef CONFIG_SBUS
+static const struct of_device_id hme_sbus_match[];
 static int __devinit hme_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct device_node *dp = op->dev.of_node;
 	const char *model = of_get_property(dp, "model", NULL);
 	int is_qfe;
 
-	if (!op->dev.of_match)
+	match = of_match_device(hme_sbus_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	is_qfe = (op->dev.of_match->data != NULL);
+	is_qfe = (match->data != NULL);
 
 	if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
 		is_qfe = 1;
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index e2d45c91b8e8..9689d41c7888 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = {
 	.use_clustering		= ENABLE_CLUSTERING,
 };
 
+static const struct of_device_id qpti_match[];
 static int __devinit qpti_sbus_probe(struct platform_device *op)
 {
+	const struct of_device_id *match;
 	struct scsi_host_template *tpnt;
 	struct device_node *dp = op->dev.of_node;
 	struct Scsi_Host *host;
@@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op)
 	static int nqptis;
 	const char *fcode;
 
-	if (!op->dev.of_match)
+	match = of_match_device(qpti_match, &op->dev);
+	if (!match)
 		return -EINVAL;
-	tpnt = op->dev.of_match->data;
+	tpnt = match->data;
 
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c
index 0e8eec516df4..c911b2419abb 100644
--- a/drivers/tty/serial/of_serial.c
+++ b/drivers/tty/serial/of_serial.c
@@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev,
 /*
  * Try to register a serial port
  */
+static struct of_device_id of_platform_serial_table[];
 static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct of_serial_info *info;
 	struct uart_port port;
 	int port_type;
 	int ret;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(of_platform_serial_table, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL))
@@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev)
 	if (info == NULL)
 		return -ENOMEM;
 
-	port_type = (unsigned long)ofdev->dev.of_match->data;
+	port_type = (unsigned long)match->data;
 	ret = of_platform_serial_setup(ofdev, port_type, &port);
 	if (ret)
 		goto out;
diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c
index 36613b37c504..3a68e09309f7 100644
--- a/drivers/usb/gadget/fsl_qe_udc.c
+++ b/drivers/usb/gadget/fsl_qe_udc.c
@@ -2539,15 +2539,18 @@ static void qe_udc_release(struct device *dev)
 }
 
 /* Driver probe functions */
+static const struct of_device_id qe_udc_match[];
 static int __devinit qe_udc_probe(struct platform_device *ofdev)
 {
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct qe_ep *ep;
 	unsigned int ret = 0;
 	unsigned int i;
 	const void *prop;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(qe_udc_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
 
 	prop = of_get_property(np, "mode", NULL);
@@ -2561,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev)
 		return -ENOMEM;
 	}
 
-	udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data;
+	udc_controller->soc_type = (unsigned long)match->data;
 	udc_controller->usb_regs = of_iomap(np, 0);
 	if (!udc_controller->usb_regs) {
 		ret = -ENOMEM;
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index 528bceb220fd..eed5436ffb51 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = {
 	.fops	= &mpc8xxx_wdt_fops,
 };
 
+static const struct of_device_id mpc8xxx_wdt_match[];
 static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev)
 {
 	int ret;
+	const struct of_device_id *match;
 	struct device_node *np = ofdev->dev.of_node;
 	struct mpc8xxx_wdt_type *wdt_type;
 	u32 freq = fsl_get_sys_freq();
 	bool enabled;
 
-	if (!ofdev->dev.of_match)
+	match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev);
+	if (!match)
 		return -EINVAL;
-	wdt_type = ofdev->dev.of_match->data;
+	wdt_type = match->data;
 
 	if (!freq || freq == -1)
 		return -EINVAL;
diff --git a/include/linux/device.h b/include/linux/device.h
index ab8dfc095709..d08399db6e2c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -442,7 +442,6 @@ struct device {
 	struct dev_archdata	archdata;
 
 	struct device_node	*of_node; /* associated device tree node */
-	const struct of_device_id *of_match; /* matching of_device_id from driver */
 
 	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index b33d68814a73..ae5638480ef2 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -21,12 +21,7 @@ extern void of_device_make_bus_id(struct device *dev);
 static inline int of_driver_match_device(struct device *dev,
 					 const struct device_driver *drv)
 {
-	const struct of_device_id *match;
-
-	match = of_match_device(drv->of_match_table, dev);
-	if (match)
-		dev->of_match = match;
-	return match != NULL;
+	return of_match_device(drv->of_match_table, dev) != NULL;
 }
 
 extern struct platform_device *of_dev_get(struct platform_device *dev);
@@ -62,6 +57,11 @@ static inline int of_device_uevent(struct device *dev,
 
 static inline void of_device_node_put(struct device *dev) { }
 
+static inline const struct of_device_id *of_match_device(
+		const struct of_device_id *matches, const struct device *dev)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_DEVICE */
 
 #endif /* _LINUX_OF_DEVICE_H */
-- 
cgit v1.2.3


From 69e3cea8d5fd52677f2b6219542d0f8b53fe4c80 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 19 May 2011 13:07:12 +1000
Subject: powerpc/smp: Make start_secondary_resume available to all CPU
 variants

This should fix SMP & Hotplug builds on FSL BookE and 476

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_32.S |  9 ---------
 arch/powerpc/kernel/misc_32.S | 11 +++++++++++
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index c5c24beb8387..98c4b29a56f4 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -890,15 +890,6 @@ __secondary_start:
 	mtspr	SPRN_SRR1,r4
 	SYNC
 	RFI
-
-_GLOBAL(start_secondary_resume)
-	/* Reset stack */
-	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
-	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
-	bl	start_secondary
-	b	.
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd9821ad4..402560e957bd 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -694,6 +694,17 @@ _GLOBAL(kernel_thread)
 	addi	r1,r1,16
 	blr
 
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+	/* Reset stack */
+	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r3,0
+	std	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	start_secondary
+	b	.
+#endif /* CONFIG_SMP */
+	
 /*
  * This routine is just here to keep GCC happy - sigh...
  */
-- 
cgit v1.2.3


From 35d215fbe4f4d3569f2adabd8d53510a26ecb9c5 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 24 Apr 2011 15:04:31 +0000
Subject: powerpc/kexec: Fix build failure on 32-bit SMP

Commit b987812b3fcaf70fdf0037589e5d2f5f2453e6ce left
crash_kexec_wait_realmode() undefined for UP.

Commit 7c7a81b53e581d727d069cc45df5510516faac31 defined it for UP but
left it undefined for 32-bit SMP.

Seems like people are getting confused by nested #ifdef's, so move the
definitions of crash_kexec_wait_realmode() after the #ifdef CONFIG_SMP
section.

Compile-tested with 32-bit UP, 32-bit SMP and 64-bit SMP configurations.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Tested-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 59 +++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 5b5e1f002a8e..5eb0ee37f6cd 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -162,34 +162,6 @@ static void crash_kexec_prepare_cpus(int cpu)
 	/* Leave the IPI callback set */
 }
 
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#ifdef CONFIG_PPC_STD_MMU_64
-static void crash_kexec_wait_realmode(int cpu)
-{
-	unsigned int msecs;
-	int i;
-
-	msecs = 10000;
-	for (i=0; i < NR_CPUS && msecs > 0; i++) {
-		if (i == cpu)
-			continue;
-
-		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
-			barrier();
-			if (!cpu_possible(i)) {
-				break;
-			}
-			if (!cpu_online(i)) {
-				break;
-			}
-			msecs--;
-			mdelay(1);
-		}
-	}
-	mb();
-}
-#endif	/* CONFIG_PPC_STD_MMU_64 */
-
 /*
  * This function will be called by secondary cpus or by kexec cpu
  * if soft-reset is activated to stop some CPUs.
@@ -234,7 +206,6 @@ void crash_kexec_secondary(struct pt_regs *regs)
 }
 
 #else	/* ! CONFIG_SMP */
-static inline void crash_kexec_wait_realmode(int cpu) {}
 
 static void crash_kexec_prepare_cpus(int cpu)
 {
@@ -257,6 +228,36 @@ void crash_kexec_secondary(struct pt_regs *regs)
 }
 #endif	/* CONFIG_SMP */
 
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+	unsigned int msecs;
+	int i;
+
+	msecs = 10000;
+	for (i=0; i < NR_CPUS && msecs > 0; i++) {
+		if (i == cpu)
+			continue;
+
+		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+			barrier();
+			if (!cpu_possible(i)) {
+				break;
+			}
+			if (!cpu_online(i)) {
+				break;
+			}
+			msecs--;
+			mdelay(1);
+		}
+	}
+	mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif	/* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+
 /*
  * Register a function to be called on shutdown.  Only use this if you
  * can't reset your device in the second kernel.
-- 
cgit v1.2.3


From c560bbceaf6b06e52f1ef20131b76a3fdc0a2c19 Mon Sep 17 00:00:00 2001
From: kerstin jonsson <kerstin.jonsson@ericsson.com>
Date: Tue, 17 May 2011 23:57:11 +0000
Subject: powerpc/4xx: Fix regression in SMP on 476

commit c56e58537d504706954a06570b4034c04e5b7500 breaks SMP support in PPC_47x chip.
 secondary_ti must be set to current thread info before callin kick_cpu or else
 start_secondary_47x will jump into void when trying to return to c-code.
 In the current setup secondary_ti is initialized before the CPU idle task is started
 and only the boot core will start. I am not sure this is the correct solution, but it
 makes SMP possible in my chip.
 Note! The HOTPLUG support probably need some fixing to, There is no trampoline code
 available in head_44x.S - start_secondary_resume?

Signed-off-by: Kerstin Jonsson <kerstin.jonsson@ericsson.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index cbdbb14be4b0..f2dcab7aadc8 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -410,8 +410,6 @@ int __cpuinit __cpu_up(unsigned int cpu)
 {
 	int rc, c;
 
-	secondary_ti = current_set[cpu];
-
 	if (smp_ops == NULL ||
 	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
 		return -EINVAL;
@@ -421,6 +419,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	if (rc)
 		return rc;
 
+	secondary_ti = current_set[cpu];
+
 	/* Make sure callin-map entry is 0 (can be leftover a CPU
 	 * hotplug
 	 */
-- 
cgit v1.2.3


From c4f56af0f64ea894363b428b6590f5073047b455 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Tue, 5 Apr 2011 04:58:50 +0000
Subject: powerpc: Call gzip with -n

The timestamps recorded in the .gz files add no value.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/boot/wrapper | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index dfa29cb0f475..c74531af72c0 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -258,7 +258,7 @@ if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then
     ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
 
     if [ -n "$gzip" ]; then
-        gzip -f -9 "$vmz.$$"
+        gzip -n -f -9 "$vmz.$$"
     fi
 
     if [ -n "$cacheit" ]; then
@@ -343,7 +343,7 @@ coff)
     $objbin/hack-coff "$ofile"
     ;;
 cuboot*)
-    gzip -f -9 "$ofile"
+    gzip -n -f -9 "$ofile"
     ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a "$base" -e "$entry" \
             $uboot_version -d "$ofile".gz "$ofile"
     ;;
@@ -390,6 +390,6 @@ ps3)
 
     odir="$(dirname "$ofile.bin")"
     rm -f "$odir/otheros.bld"
-    gzip --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+    gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
     ;;
 esac
-- 
cgit v1.2.3


From 93395febdb51ed812ac1003852f537177a172aad Mon Sep 17 00:00:00 2001
From: Justin Mattock <justinmattock@gmail.com>
Date: Tue, 5 Apr 2011 06:58:22 +0000
Subject: powerpc: Remove unused config in the Makefile

The patch below removes an unused config variable found by using a kernel
cleanup script.
Note: I did try to cross compile these but hit erros while doing so..
(gcc is not setup to cross compile) and am unsure if anymore needs to be done.
Please have a look if/when anybody has free time.

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/Makefile | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 82e0bed0650d..9aab36312572 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -78,7 +78,6 @@ obj-$(CONFIG_PPC_FSL_BOOK3E)	+= cpu_setup_fsl_booke.o dbell.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= dbell.o
 
 extra-y				:= head_$(CONFIG_WORD_SIZE).o
-extra-$(CONFIG_PPC_BOOK3E_32)	:= head_new_booke.o
 extra-$(CONFIG_40x)		:= head_40x.o
 extra-$(CONFIG_44x)		:= head_44x.o
 extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
-- 
cgit v1.2.3


From 31355403dba5aa7a700c69f91cd6266092932701 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Thu, 5 May 2011 05:22:55 +0000
Subject: powerpc: Use the deterministic mode of ar

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/boot/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 0e2a152c3aa5..c26200b40a47 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -128,7 +128,7 @@ quiet_cmd_bootas = BOOTAS  $@
       cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
 
 quiet_cmd_bootar = BOOTAR  $@
-      cmd_bootar = $(CROSS32AR) -cr $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+      cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
 
 $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
 	$(call if_changed_dep,bootcc)
-- 
cgit v1.2.3


From 767303349e052ae0cb9e6495a70870da3459eeb6 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Fri, 6 May 2011 13:27:30 +0000
Subject: powerpc: Fix kexec with dynamic dma windows

When we kexec we look for a particular property added by the first
kernel, "linux,direct64-ddr-window-info", per-device where we already
have set up dynamic dma windows. The current code, though, wasn't
initializing the size of this property and thus when we kexec'd, we
would find the property but read uninitialized memory resulting in
garbage ddw values for the kexec'd kernel and panics. Fix this by
setting the size at enable_ddw() time and ensuring that the size of the
found property is valid at dupe_ddw_if_kexec() time.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d5412a18b26..019009b10e62 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -730,16 +730,20 @@ static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
 	pcidn = PCI_DN(dn);
 	direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
 	if (direct64) {
-		window = kzalloc(sizeof(*window), GFP_KERNEL);
-		if (!window) {
+		if (len < sizeof(struct dynamic_dma_window_prop)) {
 			remove_ddw(pdn);
 		} else {
-			window->device = pdn;
-			window->prop = direct64;
-			spin_lock(&direct_window_list_lock);
-			list_add(&window->list, &direct_window_list);
-			spin_unlock(&direct_window_list_lock);
-			dma_addr = direct64->dma_base;
+			window = kzalloc(sizeof(*window), GFP_KERNEL);
+			if (!window) {
+				remove_ddw(pdn);
+			} else {
+				window->device = pdn;
+				window->prop = direct64;
+				spin_lock(&direct_window_list_lock);
+				list_add(&window->list, &direct_window_list);
+				spin_unlock(&direct_window_list_lock);
+				dma_addr = direct64->dma_base;
+			}
 		}
 	}
 
@@ -833,7 +837,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	struct device_node *dn;
 	const u32 *uninitialized_var(ddr_avail);
 	struct direct_window *window;
-	struct property *uninitialized_var(win64);
+	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
 
 	mutex_lock(&direct_window_init_mutex);
@@ -907,6 +911,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	}
 	win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
 	win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+	win64->length = sizeof(*ddwprop);
 	if (!win64->name || !win64->value) {
 		dev_info(&dev->dev,
 			"couldn't allocate property name and value\n");
-- 
cgit v1.2.3


From af442a1baa6d00117cc7e7377ce7e6a545268684 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 4 May 2011 12:54:16 +0000
Subject: powerpc: Ensure dtl buffers do not cross 4k boundary

Future releases of fimrware will enforce a requirement that DTL buffers
do not cross a 4k boundary. Commit
127493d5dc73589cbe00ea5ec8357cc2a4c0d82a satisfies this requirement for
CONFIG_VIRT_CPU_ACCOUNTING=y kernels, but if !CONFIG_VIRT_CPU_ACCOUNTING
&& CONFIG_DTL=y, the current code will fail at dtl registration time.
Fix this by making the kmem cache from
127493d5dc73589cbe00ea5ec8357cc2a4c0d82a visible outside of setup.c and
using the same cache in both dtl.c and setup.c. This requires a bit of
reorganization to ensure ordering of the kmem cache and buffer
allocations.

Note: Since firmware now limits the size of the buffer, I made
dtl_buf_entries read-only in debugfs.

Tested with upcoming firmware with the 4 combinations of
CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_DTL.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/lppaca.h      |  2 ++
 arch/powerpc/platforms/pseries/dtl.c   | 20 +++++++++++---------
 arch/powerpc/platforms/pseries/setup.c | 31 ++++++++++++++++++++++---------
 3 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index a077adc0b35e..e0298d26ce5d 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -210,6 +210,8 @@ struct dtl_entry {
 #define DISPATCH_LOG_BYTES	4096	/* bytes per cpu */
 #define N_DISPATCH_LOG		(DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
 
+extern struct kmem_cache *dtl_cache;
+
 /*
  * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index c371bc06434b..e9190073bb97 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7;
 
 
 /*
- * Size of per-cpu log buffers. Default is just under 16 pages worth.
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
  */
-static int dtl_buf_entries = (16 * 85);
-
+static int dtl_buf_entries = N_DISPATCH_LOG;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 struct dtl_ring {
@@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl)
 
 	/* Register our dtl buffer with the hypervisor. The HV expects the
 	 * buffer size to be passed in the second word of the buffer */
-	((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry);
+	((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
 
 	hwcpu = get_hard_smp_processor_id(dtl->cpu);
 	addr = __pa(dtl->buf);
@@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl)
 	long int rc;
 	struct dtl_entry *buf = NULL;
 
+	if (!dtl_cache)
+		return -ENOMEM;
+
 	/* only allow one reader */
 	if (dtl->buf)
 		return -EBUSY;
 
 	n_entries = dtl_buf_entries;
-	buf = kmalloc_node(n_entries * sizeof(struct dtl_entry),
-			GFP_KERNEL, cpu_to_node(dtl->cpu));
+	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
 	if (!buf) {
 		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
 				__func__, dtl->cpu);
@@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl)
 	spin_unlock(&dtl->lock);
 
 	if (rc)
-		kfree(buf);
+		kmem_cache_free(dtl_cache, buf);
 	return rc;
 }
 
@@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl)
 {
 	spin_lock(&dtl->lock);
 	dtl_stop(dtl);
-	kfree(dtl->buf);
+	kmem_cache_free(dtl_cache, dtl->buf);
 	dtl->buf = NULL;
 	dtl->buf_entries = 0;
 	spin_unlock(&dtl->lock);
@@ -365,7 +367,7 @@ static int dtl_init(void)
 
 	event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
 				dtl_dir, &dtl_event_mask);
-	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600,
+	buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
 				dtl_dir, &dtl_buf_entries);
 
 	if (!event_mask_file || !buf_entries_file) {
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1689adccc6d7..593acceeff96 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -278,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = {
 	.notifier_call = pci_dn_reconfig_notifier,
 };
 
+struct kmem_cache *dtl_cache;
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Allocate space for the dispatch trace log for all possible cpus
@@ -289,18 +291,12 @@ static int alloc_dispatch_logs(void)
 	int cpu, ret;
 	struct paca_struct *pp;
 	struct dtl_entry *dtl;
-	struct kmem_cache *dtl_cache;
 
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return 0;
 
-	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
-						DISPATCH_LOG_BYTES, 0, NULL);
-	if (!dtl_cache) {
-		pr_warn("Failed to create dispatch trace log buffer cache\n");
-		pr_warn("Stolen time statistics will be unreliable\n");
+	if (!dtl_cache)
 		return 0;
-	}
 
 	for_each_possible_cpu(cpu) {
 		pp = &paca[cpu];
@@ -334,10 +330,27 @@ static int alloc_dispatch_logs(void)
 
 	return 0;
 }
-
-early_initcall(alloc_dispatch_logs);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static inline int alloc_dispatch_logs(void)
+{
+	return 0;
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
+static int alloc_dispatch_log_kmem_cache(void)
+{
+	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+						DISPATCH_LOG_BYTES, 0, NULL);
+	if (!dtl_cache) {
+		pr_warn("Failed to create dispatch trace log buffer cache\n");
+		pr_warn("Stolen time statistics will be unreliable\n");
+		return 0;
+	}
+
+	return alloc_dispatch_logs();
+}
+early_initcall(alloc_dispatch_log_kmem_cache);
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Discover PIC type and setup ppc_md accordingly */
-- 
cgit v1.2.3


From 2a2c29c1a581319f4485af55e8d628d89e8f2583 Mon Sep 17 00:00:00 2001
From: Stratos Psomadakis <psomas@cslab.ece.ntua.gr>
Date: Sat, 7 May 2011 04:11:31 +0000
Subject: powerpc/mm: Fix compiler warning in pgtable-ppc64.h
 [-Wunused-but-set-variable]

The variable 'old' is set but not used in the wrprotect functions in
arch/powerpc/include/asm/pgtable-ppc64.h, which can trigger a compiler warning.

Remove the variable, since it's not used anyway.

Signed-off-by: Stratos Psomadakis <psomas@ece.ntua.gr>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pgtable-ppc64.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 2b09cd522d33..81576ee0cfb1 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -257,21 +257,20 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	unsigned long old;
 
-       	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-       		return;
-	old = pte_update(mm, addr, ptep, _PAGE_RW, 0);
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	unsigned long old;
-
 	if ((pte_val(*ptep) & _PAGE_RW) == 0)
 		return;
-	old = pte_update(mm, addr, ptep, _PAGE_RW, 1);
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 1);
 }
 
 /*
-- 
cgit v1.2.3


From 32218bdd31fc9367c0babd6f7d309c6856a5c7da Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 13:18:27 +0000
Subject: powerpc/pseries: Enable Emulex and Qlogic 10Gbit cards

Enable the Qlogic and Emulex 10Gbit adapters.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/configs/pseries_defconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 249ddd0a27cd..e4abe276c124 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -197,6 +197,8 @@ CONFIG_S2IO=m
 CONFIG_MYRI10GE=m
 CONFIG_NETXEN_NIC=m
 CONFIG_MLX4_EN=m
+CONFIG_QLGE=m
+CONFIG_BE2NET=m
 CONFIG_PPP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
-- 
cgit v1.2.3


From 37e0c21e9b5b6d6fd38a444762076c84c6170598 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 13:19:30 +0000
Subject: powerpc/pseries: Enable iSCSI support for a number of cards

Enable iSCSI support for a number of cards. We had the base
networking devices enabled but forgot to enable iSCSI.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/configs/pseries_defconfig | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index e4abe276c124..7de13865508c 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -146,12 +146,18 @@ CONFIG_SCSI_MULTI_LUN=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_FC_ATTRS=y
 CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_BE2ISCSI=m
 CONFIG_SCSI_IBMVSCSI=y
 CONFIG_SCSI_IBMVFC=m
 CONFIG_SCSI_SYM53C8XX_2=y
 CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
 CONFIG_SCSI_IPR=y
 CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
 CONFIG_ATA=y
 # CONFIG_ATA_SFF is not set
-- 
cgit v1.2.3


From d988f0e3f84cb8a4f85ccdbca6f6fefcc37bedcb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 21:18:38 +0000
Subject: powerpc: Simplify 4k/64k copy_page logic

To make it easier to add optimised versions of copy_page, remove
the 4kB loop for 64kB pages and just do all the work in copy_page.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/page_64.h | 19 +------------------
 arch/powerpc/kernel/ppc_ksyms.c    |  5 +----
 arch/powerpc/lib/copypage_64.S     |  7 ++++---
 3 files changed, 6 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 488c52eb64cb..9356262fd3cc 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -59,24 +59,7 @@ static __inline__ void clear_page(void *addr)
 	: "ctr", "memory");
 }
 
-extern void copy_4K_page(void *to, void *from);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static inline void copy_page(void *to, void *from)
-{
-	unsigned int i;
-	for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
-		copy_4K_page(to, from);
-		to += 4096;
-		from += 4096;
-	}
-}
-#else /* CONFIG_PPC_64K_PAGES */
-static inline void copy_page(void *to, void *from)
-{
-	copy_4K_page(to, from);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
+extern void copy_page(void *to, void *from);
 
 /* Log 2 of page table size */
 extern u64 ppc64_pft_size;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ef3ef566235e..7d28f540200c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs);
 extern int sys_sigreturn(struct pt_regs *regs);
 
 EXPORT_SYMBOL(clear_pages);
-EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
 EXPORT_SYMBOL(DMA_MODE_READ);
 EXPORT_SYMBOL(DMA_MODE_WRITE);
@@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(copy_4K_page);
-#endif
+EXPORT_SYMBOL(copy_page);
 
 #if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
 EXPORT_SYMBOL(isa_io_base);
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4d4eeb900486..53dcb6b1b708 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -6,6 +6,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
@@ -15,9 +16,9 @@ PPC64_CACHES:
         .tc             ppc64_caches[TC],ppc64_caches
         .section        ".text"
 
-
-_GLOBAL(copy_4K_page)
-	li	r5,4096		/* 4K page size */
+_GLOBAL(copy_page)
+	lis	r5,PAGE_SIZE@h
+	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
 	ld      r10,PPC64_CACHES@toc(r2)
 	lwz	r11,DCACHEL1LOGLINESIZE(r10)	/* log2 of cache line size */
-- 
cgit v1.2.3


From ba00ce1d6e08ad06f19f2ac53fd5c60bbe3fbeeb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 21:20:19 +0000
Subject: powerpc: Remove static branch hint in giveup_altivec

A static branch hint will override dynamic branch prediction on
recent POWER CPUs. Since we are about to use more altivec in the
kernel remove the static hint in giveup_altivec that assumes
a userspace task is using altivec.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vector.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 9de6f396cf85..4d5a3edff49e 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -102,7 +102,7 @@ _GLOBAL(giveup_altivec)
 	MTMSRD(r5)			/* enable use of VMX now */
 	isync
 	PPC_LCMPI	0,r3,0
-	beqlr-				/* if no previous owner, done */
+	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	PPC_LL	r5,PT_REGS(r3)
 	PPC_LCMPI	0,r5,0
-- 
cgit v1.2.3


From f5f0307f42d39a51a925ca4841f76a2f2ea330ff Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 21:36:44 +0000
Subject: powerpc: Improve scheduling of system call entry instructions

After looking at our system call path, Mary Brown suggested that we
should put all mfspr SRR* instructions before any mtspr SRR*.

To test this I used a very simple null syscall (actually getppid)
testcase at http://ozlabs.org/~anton/junkcode/null_syscall.c

I tested with the following changes against the pseries_defconfig:

CONFIG_VIRT_CPU_ACCOUNTING=n
CONFIG_AUDIT=n

to remove the overhead of virtual CPU accounting and syscall
auditing.

POWER6:
baseline:       mean = 757.2 cycles       sd = 2.108
modified:       mean = 759.1 cycles       sd = 2.020

POWER7:
baseline:       mean = 411.4 cycles       sd = 0.138
modified:       mean = 404.1 cycles       sd = 0.109

So we have 1.77% improvement on POWER7 which looks significant. The
POWER6 suggest a 0.25% slowdown, but the results are within 1
standard deviation and may be in the noise.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/exceptions-64s.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0ec3b42717d7..a85f4874cba7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -211,11 +211,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 	mr	r9,r13
 	GET_PACA(r13)
 	mfspr	r11,SPRN_SRR0
-	ld	r12,PACAKBASE(r13)
-	ld	r10,PACAKMSR(r13)
-	LOAD_HANDLER(r12, system_call_entry)
-	mtspr	SPRN_SRR0,r12
 	mfspr	r12,SPRN_SRR1
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10, system_call_entry)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
 	rfid
 	b	.	/* prevent speculative execution */
-- 
cgit v1.2.3


From be135f40899cb3334faa7d2e27025055da311ec4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 21:41:59 +0000
Subject: powerpc: Add ioremap_wc

Add ioremap_wc so drivers can request write combining on kernel
mappings.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io.h |  5 +++++
 arch/powerpc/mm/pgtable_32.c  |  8 ++++++++
 arch/powerpc/mm/pgtable_64.c  | 11 +++++++++++
 3 files changed, 24 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 2f365f5007a0..662d2edae768 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -2,6 +2,8 @@
 #define _ASM_POWERPC_IO_H
 #ifdef __KERNEL__
 
+#define ARCH_HAS_IOREMAP_WC
+
 /*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -628,6 +630,8 @@ static inline void iosync(void)
  *
  * * ioremap_nocache is identical to ioremap
  *
+ * * ioremap_wc enables write combining
+ *
  * * iounmap undoes such a mapping and can be hooked
  *
  * * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -648,6 +652,7 @@ static inline void iosync(void)
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
 extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size,
 				   unsigned long flags);
+extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)	ioremap((addr), (size))
 #define ioremap_prot(addr, size, prot)	ioremap_flags((addr), (size), (prot))
 
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 8dc41c0157fe..fca98f8fb600 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -132,6 +132,14 @@ ioremap(phys_addr_t addr, unsigned long size)
 }
 EXPORT_SYMBOL(ioremap);
 
+void __iomem *
+ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
+				__builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
 void __iomem *
 ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
 {
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 88927a05cdc2..1146fc6e8921 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -255,6 +255,16 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size)
 	return __ioremap_caller(addr, size, flags, caller);
 }
 
+void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+	unsigned long flags = _PAGE_NO_CACHE;
+	void *caller = __builtin_return_address(0);
+
+	if (ppc_md.ioremap)
+		return ppc_md.ioremap(addr, size, flags, caller);
+	return __ioremap_caller(addr, size, flags, caller);
+}
+
 void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
 			     unsigned long flags)
 {
@@ -311,6 +321,7 @@ void iounmap(volatile void __iomem *token)
 }
 
 EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(ioremap_wc);
 EXPORT_SYMBOL(ioremap_flags);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(__ioremap_at);
-- 
cgit v1.2.3


From 40f1ce7fb7e8b5d4d0821c0f3dc866cb1d47d99c Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 8 May 2011 21:43:47 +0000
Subject: powerpc: Remove ioremap_flags

We have a confusing number of ioremap functions. Make things just a
bit simpler by merging ioremap_flags and ioremap_prot.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/io.h             | 12 +++++-------
 arch/powerpc/lib/devres.c                 |  6 +++---
 arch/powerpc/mm/pgtable_32.c              |  4 ++--
 arch/powerpc/mm/pgtable_64.c              |  4 ++--
 arch/powerpc/platforms/ps3/spu.c          |  4 ++--
 arch/powerpc/sysdev/axonram.c             |  2 +-
 arch/powerpc/sysdev/fsl_85xx_cache_sram.c |  4 ++--
 7 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 662d2edae768..45698d55cd6a 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -624,9 +624,8 @@ static inline void iosync(void)
  * * ioremap is the standard one and provides non-cacheable guarded mappings
  *   and can be hooked by the platform via ppc_md
  *
- * * ioremap_flags allows to specify the page flags as an argument and can
- *   also be hooked by the platform via ppc_md. ioremap_prot is the exact
- *   same thing as ioremap_flags.
+ * * ioremap_prot allows to specify the page flags as an argument and can
+ *   also be hooked by the platform via ppc_md.
  *
  * * ioremap_nocache is identical to ioremap
  *
@@ -639,7 +638,7 @@ static inline void iosync(void)
  *   currently be hooked. Must be page aligned.
  *
  * * __ioremap is the low level implementation used by ioremap and
- *   ioremap_flags and cannot be hooked (but can be used by a hook on one
+ *   ioremap_prot and cannot be hooked (but can be used by a hook on one
  *   of the previous ones)
  *
  * * __ioremap_caller is the same as above but takes an explicit caller
@@ -650,11 +649,10 @@ static inline void iosync(void)
  *
  */
 extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_flags(phys_addr_t address, unsigned long size,
-				   unsigned long flags);
+extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
+				  unsigned long flags);
 extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
 #define ioremap_nocache(addr, size)	ioremap((addr), (size))
-#define ioremap_prot(addr, size, prot)	ioremap_flags((addr), (size), (prot))
 
 extern void iounmap(volatile void __iomem *addr);
 
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
index deac4d30daf4..e91615abae66 100644
--- a/arch/powerpc/lib/devres.c
+++ b/arch/powerpc/lib/devres.c
@@ -9,11 +9,11 @@
 
 #include <linux/device.h>	/* devres_*(), devm_ioremap_release() */
 #include <linux/gfp.h>
-#include <linux/io.h>		/* ioremap_flags() */
+#include <linux/io.h>		/* ioremap_prot() */
 #include <linux/module.h>	/* EXPORT_SYMBOL() */
 
 /**
- * devm_ioremap_prot - Managed ioremap_flags()
+ * devm_ioremap_prot - Managed ioremap_prot()
  * @dev: Generic device to remap IO address for
  * @offset: BUS offset to map
  * @size: Size of map
@@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
 	if (!ptr)
 		return NULL;
 
-	addr = ioremap_flags(offset, size, flags);
+	addr = ioremap_prot(offset, size, flags);
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index fca98f8fb600..51f87956f8f8 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -141,7 +141,7 @@ ioremap_wc(phys_addr_t addr, unsigned long size)
 EXPORT_SYMBOL(ioremap_wc);
 
 void __iomem *
-ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
+ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
 {
 	/* writeable implies dirty for kernel addresses */
 	if (flags & _PAGE_RW)
@@ -160,7 +160,7 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags)
 
 	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
 }
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_prot);
 
 void __iomem *
 __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 1146fc6e8921..6e595f6496d4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -265,7 +265,7 @@ void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
 	return __ioremap_caller(addr, size, flags, caller);
 }
 
-void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size,
+void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
 			     unsigned long flags)
 {
 	void *caller = __builtin_return_address(0);
@@ -322,7 +322,7 @@ void iounmap(volatile void __iomem *token)
 
 EXPORT_SYMBOL(ioremap);
 EXPORT_SYMBOL(ioremap_wc);
-EXPORT_SYMBOL(ioremap_flags);
+EXPORT_SYMBOL(ioremap_prot);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(__ioremap_at);
 EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
index 39a472e9e80f..375a9f92158d 100644
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -197,7 +197,7 @@ static void spu_unmap(struct spu *spu)
  * The current HV requires the spu shadow regs to be mapped with the
  * PTE page protection bits set as read-only (PP=3).  This implementation
  * uses the low level __ioremap() to bypass the page protection settings
- * inforced by ioremap_flags() to get the needed PTE bits set for the
+ * inforced by ioremap_prot() to get the needed PTE bits set for the
  * shadow regs.
  */
 
@@ -214,7 +214,7 @@ static int __init setup_areas(struct spu *spu)
 		goto fail_ioremap;
 	}
 
-	spu->local_store = (__force void *)ioremap_flags(spu->local_store_phys,
+	spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
 		LS_SIZE, _PAGE_NO_CACHE);
 
 	if (!spu->local_store) {
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1636dd896707..bd0d54060b94 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -216,7 +216,7 @@ static int axon_ram_probe(struct platform_device *device)
 			AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
 
 	bank->ph_addr = resource.start;
-	bank->io_addr = (unsigned long) ioremap_flags(
+	bank->io_addr = (unsigned long) ioremap_prot(
 			bank->ph_addr, bank->size, _PAGE_NO_CACHE);
 	if (bank->io_addr == 0) {
 		dev_err(&device->dev, "ioremap() failed\n");
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
index 54fb1922fe30..116415899176 100644
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -106,10 +106,10 @@ int __init instantiate_cache_sram(struct platform_device *dev,
 		goto out_free;
 	}
 
-	cache_sram->base_virt = ioremap_flags(cache_sram->base_phys,
+	cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
 				cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
 	if (!cache_sram->base_virt) {
-		dev_err(&dev->dev, "%s: ioremap_flags failed\n",
+		dev_err(&dev->dev, "%s: ioremap_prot failed\n",
 				dev->dev.of_node->full_name);
 		ret = -ENOMEM;
 		goto out_release;
-- 
cgit v1.2.3


From eb0dd411bd90dd5ad3f1936930d3e83d9ef95561 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Mon, 9 May 2011 12:58:03 +0000
Subject: pseries/iommu: Restore iommu table pointer when restoring iommu ops

When we swtich to direct dma ops, we set the dma data union to have the
dma offset.  When we switch back to iommu table ops because of a later
dma_set_mask, we need to restore the iommu table pointer. Without this
change, crashes have been observed on kexec where (for reasons still
being investigated) we fall back to a 32-bit dma mask on a particular
device and then panic because the table pointer is not valid.

The easiset way to find this value is to call
pci_dma_dev_setup_pSeriesLP which will search up the pci tree until it
finds the node with the table.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Milton Miller <miltonm@bga.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 019009b10e62..48eec3b87026 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1029,10 +1029,10 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
 		return -EIO;
 
+	pdev = to_pci_dev(dev);
+
 	/* only attempt to use a new window if 64-bit DMA is requested */
 	if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
-		pdev = to_pci_dev(dev);
-
 		dn = pci_device_to_OF_node(pdev);
 		dev_dbg(dev, "node is %s\n", dn->full_name);
 
@@ -1063,6 +1063,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	if (!ddw_enabled) {
 		dev_info(dev, "Using 32-bit DMA via iommu\n");
 		set_dma_ops(dev, &dma_iommu_ops);
+		pci_dma_dev_setup_pSeriesLP(pdev);
 	}
 
 	*dev->dma_mask = dma_mask;
-- 
cgit v1.2.3


From f0e939ae373836400b38e090d78fba6a183976f0 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 10 May 2011 13:34:03 +0000
Subject: powerpc/pseries: Print corrupt r3 in FWNMI code

I have a report of an FWNMI with an r3 value that we think is
corrupt, but since we don't print r3 we have no idea what was
wrong with it.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/ras.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 164a8eb45923..086d2ae4e06a 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 	struct rtas_error_log *h, *errhdr = NULL;
 
 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
-		printk(KERN_ERR "FWNMI: corrupt r3\n");
+		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 		return NULL;
 	}
 
-- 
cgit v1.2.3


From 3d2cea732d68aa270c360f55d8669820ebce188a Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:33 +0000
Subject: powerpc/kexec: Fix memory corruption from unallocated slaves

Commit 1fc711f7ffb01089efc58042cfdbac8573d1b59a (powerpc/kexec: Fix race
in kexec shutdown) moved the write to signal the cpu had exited the kernel
from before the transition to real mode in kexec_smp_wait to kexec_wait.

Unfornately it missed that kexec_wait is used both by cpus leaving the
kernel and by secondary slave cpus that were not allocated a paca for
what ever reason -- they could be beyond nr_cpus or not described in
the current device tree for whatever reason (for example, kexec-load
was not refreshed after a cpu hotplug operation).  Cpus coming through
that path they will write to paca[NR_CPUS] which is beyond the space
allocated for the paca data and overwrite memory not allocated to pacas
but very likely still real mode accessable).

Move the write back to kexec_smp_wait, which is used only by cpus that
found their paca, but after the transition to real mode.

Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: <stable@kernel.org> # (1fc711f was backported to 2.6.32)
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/misc_64.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 206a321a71d3..e89df59cdc5a 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -462,7 +462,8 @@ _GLOBAL(disable_kernel_fp)
  * wait for the flag to change, indicating this kernel is going away but
  * the slave code for the next one is at addresses 0 to 100.
  *
- * This is used by all slaves.
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
  *
  * Physical (hardware) cpu id should be in r3.
  */
@@ -471,10 +472,6 @@ _GLOBAL(kexec_wait)
 1:	mflr	r5
 	addi	r5,r5,kexec_flag-1b
 
-	li	r4,KEXEC_STATE_REAL_MODE
-	stb	r4,PACAKEXECSTATE(r13)
-	SYNC
-
 99:	HMT_LOW
 #ifdef CONFIG_KEXEC		/* use no memory without kexec */
 	lwz	r4,0(r5)
@@ -499,11 +496,17 @@ kexec_flag:
  *
  * get phys id from paca
  * switch to real mode
+ * mark the paca as no longer used
  * join other cpus in kexec_wait(phys_id)
  */
 _GLOBAL(kexec_smp_wait)
 	lhz	r3,PACAHWCPUID(r13)
 	bl	real_mode
+
+	li	r4,KEXEC_STATE_REAL_MODE
+	stb	r4,PACAKEXECSTATE(r13)
+	SYNC
+
 	b	.kexec_wait
 
 /*
-- 
cgit v1.2.3


From 768d18ad6d5e600d911f9499ca287d6986d8d81b Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:37 +0000
Subject: powerpc: Don't search for paca in freed memory

Starting with 1426d5a3bd07589534286375998c0c8c6fdc5260 (powerpc:
Dynamically allocate pacas) we free the memory for pacas beyond
cpu_possible, but we failed to update the loop the secondary cpus use
to find their paca.  If the system has running cpu threads for which
the kernel did not allocate a paca for they will search the memory that
was freed.  For instance this could happen when the device tree for
a kdump kernel was not updated after a cpu hotplug, or the kernel is
running with more cpus than the kernel was configured.

Since c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids
early and use it to free PACAs) we set nr_cpu_ids before telling the
cpus to advance, so use that to limit the search.

We can't reference nr_cpu_ids without CONFIG_SMP because it is defined
as 1 instead of a memory location, but any extra threads should be sent
to kexec_wait in that case anyways, so make that explicit and remove
the search loop for UP.

Note to stable: The fix also requires
c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set
nr_cpu_ids early and use it to free PACAs) to function.  Also
9d07bc841c9779b4d7902e417f4e509996ce805d (Properly handshake CPUs going
out of boot spin loop) affects the second chunk, specifically the branch
target was 3b before and is 4b after that patch, and there was a blank
line before the #ifdef CONFIG_SMP that was removed

Cc: <stable@kernel.org> # .34.x: c1854e0072 powerpc: Set nr_cpu_ids early
Cc: <stable@kernel.org> # .34.x
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_64.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 73d6e9afcdf1..ba504099844a 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -218,13 +218,19 @@ generic_secondary_common_init:
 	 */
 	LOAD_REG_ADDR(r13, paca)	/* Load paca pointer		 */
 	ld	r13,0(r13)		/* Get base vaddr of paca array	 */
+#ifndef CONFIG_SMP
+	addi	r13,r13,PACA_SIZE	/* know r13 if used accidentally */
+	b	.kexec_wait		/* wait for next kernel if !SMP	 */
+#else
+	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
+	lwz	r7,0(r7)		/* also the max paca allocated 	 */
 	li	r5,0			/* logical cpu id                */
 1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
 	cmpw	r6,r24			/* Compare to our id             */
 	beq	2f
 	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
 	addi	r5,r5,1
-	cmpwi	r5,NR_CPUS
+	cmpw	r5,r7			/* Check if more pacas exist     */
 	blt	1b
 
 	mr	r3,r24			/* not found, copy phys to r3	 */
@@ -259,9 +265,6 @@ generic_secondary_common_init:
 4:	HMT_LOW
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
 					/* start.			 */
-#ifndef CONFIG_SMP
-	b	4b			/* Never go on non-SMP		 */
-#else
 	cmpwi	0,r23,0
 	beq	4b			/* Loop until told to go	 */
 
@@ -273,7 +276,7 @@ generic_secondary_common_init:
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
 	b	__secondary_start
-#endif
+#endif /* SMP */
 
 /*
  * Turn the MMU off.
-- 
cgit v1.2.3


From bd9e5eefecb3d69018bb95796298019d309cbec8 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:41 +0000
Subject: powerpc/kdump64: Don't reference freed memory as pacas

Starting with 1426d5a3bd07589534286375998c0c8c6fdc5260 (powerpc:
Dynamically allocate pacas) the space for pacas beyond cpu_possible
is freed, but we failed to update the loop in crash.c.

Since c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids
early and use it to free PACAs) the number of pacas allocated is
always nr_cpu_ids.

Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: <stable@kernel.org> # .34.x
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/crash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 21f2c781ded1..4e6ee944495a 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -236,7 +236,7 @@ static void crash_kexec_wait_realmode(int cpu)
 	int i;
 
 	msecs = 10000;
-	for (i=0; i < NR_CPUS && msecs > 0; i++) {
+	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
 		if (i == cpu)
 			continue;
 
-- 
cgit v1.2.3


From 7c82733744a74f45e86125f369e876b896765038 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:44 +0000
Subject: powerpc/iseries: Cleanup and fix secondary startup

9cb82f2f4692293a27c578c3038518ce4477de72 (Make iSeries spin on
__secondary_hold_spinloop, like pSeries) added a load of current_set
but this load was repeated later and we don't even have the paca yet.
It also checked __secondary_hold_spinloop with a 32 bit compare instead
of a 64 bit compare.

b6f6b98a4e91fcf31db7de54c3aa86252fc6fb5f (Don't spin on sync instruction
at boot time) missed the copy of the startup code in iseries.

1426d5a3bd07589534286375998c0c8c6fdc5260 (Dynamically allocate pacas)
doesn't allow for pacas to be less than lppacas and recalculated the paca
location from the cpu id in r0 every time through the secondary loop.

Various revisions over time made the comments on conditional branches
confusing with respect to being a hold loop or forward progress

Mostly in-order description of the changes:

Replicate the few lines of code saved by the ugly scoped ifdef CONFIG_SMP
in the secondary loop between yielding on UP and marking time with the
hypervisor on SMP.  Always compile the iseries_secondary_yield loop and
use it if the cpu id is above nr_cpu_ids.  Change all forward progress
paths to be forward branches to the next numerical label.  Assign a
label to all loops.  Move all sync instructions from the loops to the
forward progress path.  Wait to load current_set until paca is set to go.
Move the iseries_secondary_smp_loop label to cover the whole spin loop.
Add HMT_MEDIUM when we make forward progress.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/iseries/exception.S | 59 ++++++++++++++++++------------
 1 file changed, 35 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index a67984c04954..29c02f36b32f 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -61,29 +61,31 @@ system_reset_iSeries:
 /* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
 /* In the UP case we'll yield() later, and we will not access the paca anyway */
 #ifdef CONFIG_SMP
-1:
+iSeries_secondary_wait_paca:
 	HMT_LOW
 	LOAD_REG_ADDR(r23, __secondary_hold_spinloop)
 	ld	r23,0(r23)
-	sync
-	LOAD_REG_ADDR(r3,current_set)
-	sldi	r28,r24,3		/* get current_set[cpu#] */
-	ldx	r3,r3,r28
-	addi	r1,r3,THREAD_SIZE
-	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0			/* Keep poking the Hypervisor until */
-	bne	2f			/* we're released */
-	/* Let the Hypervisor know we are alive */
+	cmpdi	0,r23,0
+	bne	2f			/* go on when the master is ready */
+
+	/* Keep poking the Hypervisor until we're released */
 	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
 	lis	r3,0x8002
 	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
-	b	1b
-#endif
+	b	iSeries_secondary_wait_paca
 
 2:
+	HMT_MEDIUM
+	sync
+
+	LOAD_REG_ADDR(r3, nr_cpu_ids)	/* get number of pacas allocated */
+	lwz	r3,0(r3)		/* nr_cpus= or NR_CPUS can limit */
+	cmpld	0,r24,r3		/* is our cpu number allocated? */
+	bge	iSeries_secondary_yield	/* no, yield forever */
+
 	/* Load our paca now that it's been allocated */
 	LOAD_REG_ADDR(r13, paca)
 	ld	r13,0(r13)
@@ -94,10 +96,24 @@ system_reset_iSeries:
 	ori	r23,r23,MSR_RI
 	mtmsrd	r23			/* RI on */
 
-	HMT_LOW
-#ifdef CONFIG_SMP
+iSeries_secondary_smp_loop:
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
 					 * should start */
+	cmpwi	0,r23,0
+	bne	3f			/* go on when we are told */
+
+	HMT_LOW
+	/* Let the Hypervisor know we are alive */
+	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
+	lis	r3,0x8002
+	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
+	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
+	sc				/* Invoke the hypervisor via a system call */
+	mfspr	r13,SPRN_SPRG_PACA	/* Put r13 back ???? */
+	b	iSeries_secondary_smp_loop /* wait for signal to start */
+
+3:
+	HMT_MEDIUM
 	sync
 	LOAD_REG_ADDR(r3,current_set)
 	sldi	r28,r24,3		/* get current_set[cpu#] */
@@ -105,27 +121,22 @@ system_reset_iSeries:
 	addi	r1,r3,THREAD_SIZE
 	subi	r1,r1,STACK_FRAME_OVERHEAD
 
-	cmpwi	0,r23,0
-	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
 	b	__secondary_start		/* Loop until told to go */
-iSeries_secondary_smp_loop:
-	/* Let the Hypervisor know we are alive */
-	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
-	lis	r3,0x8002
-	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
-#else /* CONFIG_SMP */
+#endif /* CONFIG_SMP */
+
+iSeries_secondary_yield:
 	/* Yield the processor.  This is required for non-SMP kernels
 		which are running on multi-threaded machines. */
+	HMT_LOW
 	lis	r3,0x8000
 	rldicr	r3,r3,32,15		/* r3 = (r3 << 32) & 0xffff000000000000 */
 	addi	r3,r3,18		/* r3 = 0x8000000000000012 which is "yield" */
 	li	r4,0			/* "yield timed" */
 	li	r5,-1			/* "yield forever" */
-#endif /* CONFIG_SMP */
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
 	mfspr	r13,SPRN_SPRG_PACA	/* Put r13 back ???? */
-	b	2b			/* If SMP not configured, secondaries
+	b	iSeries_secondary_yield	/* If SMP not configured, secondaries
 					 * loop forever */
 
 /***  ISeries-LPAR interrupt handlers ***/
-- 
cgit v1.2.3


From 8657ae28ddd34db0f52b0730a6a25992c0173264 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:48 +0000
Subject: powerpc: Respect nr_cpu_ids when calling set_cpu_possible and
 set_cpu_present

We should not set cpus above nr_cpu_ids to possible.  While we
will trigger a warning with CONFIG_CPUMASK_DEBUG, even then the mask
initializers will set the bits beyond what the iterators check and cause
nr_cpu_ids to increase.

Respecting nr_cpu_ids during setup will allow us to use it in our initial
paca allocation.  It can be reduced from NR_CPUS by the existing early param
nr_cpus=, which was added in 2b633e3fac5efada088b57d31e65401f22bcc18f (smp:
Use nr_cpus= to set nr_cpu_ids early).  We already call parse_early_parms
between finding the command line and allocating the pacas.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1475df6e403f..fce759ba31f3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc)
  *                  cpu_present_mask
  *
  * Having the possible map set up early allows us to restrict allocations
- * of things like irqstacks to num_possible_cpus() rather than NR_CPUS.
+ * of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
  *
  * We do not initialize the online map here; cpus set their own bits in
  * cpu_online_mask as they come up.
@@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void)
 
 	DBG("smp_setup_cpu_maps()\n");
 
-	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
+	while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
 		const int *intserv;
 		int j, len;
 
@@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void)
 				intserv = &cpu;	/* assume logical == phys */
 		}
 
-		for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
+		for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
 			DBG("    thread %d -> cpu %d (hard id %d)\n",
 			    j, cpu, intserv[j]);
 			set_cpu_present(cpu, true);
@@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void)
 		if (cpu_has_feature(CPU_FTR_SMT))
 			maxcpus *= nthreads;
 
-		if (maxcpus > NR_CPUS) {
+		if (maxcpus > nr_cpu_ids) {
 			printk(KERN_WARNING
 			       "Partition configured for %d cpus, "
 			       "operating system maximum is %d.\n",
-			       maxcpus, NR_CPUS);
-			maxcpus = NR_CPUS;
+			       maxcpus, nr_cpu_ids);
+			maxcpus = nr_cpu_ids;
 		} else
 			printk(KERN_INFO "Partition configured for %d cpus.\n",
 			       maxcpus);
-- 
cgit v1.2.3


From 2cd947f1757fb937806535be13caf2ddd813d60b Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:52 +0000
Subject: powerpc: Use nr_cpu_ids in initial paca allocation

Now that we never set a cpu above nr_cpu_ids possible we can
limit our initial paca allocation to nr_cpu_ids.  We can then
clamp the number of cpus in platforms/iseries/setup.c.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/paca.c             | 17 ++++++-----------
 arch/powerpc/platforms/iseries/setup.c |  5 +++++
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 102244edecf0..efeb88184182 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,7 +7,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/threads.h>
+#include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/memblock.h>
 
@@ -178,7 +178,7 @@ static int __initdata paca_size;
 
 void __init allocate_pacas(void)
 {
-	int nr_cpus, cpu, limit;
+	int cpu, limit;
 
 	/*
 	 * We can't take SLB misses on the paca, and we want to access them
@@ -190,23 +190,18 @@ void __init allocate_pacas(void)
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		limit = min(limit, HvPagesToMap * HVPAGESIZE);
 
-	nr_cpus = NR_CPUS;
-	/* On iSeries we know we can never have more than 64 cpus */
-	if (firmware_has_feature(FW_FEATURE_ISERIES))
-		nr_cpus = min(64, nr_cpus);
-
-	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus);
+	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
 
 	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
 	memset(paca, 0, paca_size);
 
 	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
-		paca_size, nr_cpus, paca);
+		paca_size, nr_cpu_ids, paca);
 
-	allocate_lppacas(nr_cpus, limit);
+	allocate_lppacas(nr_cpu_ids, limit);
 
 	/* Can't use for_each_*_cpu, as they aren't functional yet */
-	for (cpu = 0; cpu < nr_cpus; cpu++)
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		initialise_paca(&paca[cpu], cpu);
 }
 
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 81cb8d2c4132..c25a0815c26b 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -685,6 +685,11 @@ void * __init iSeries_early_setup(void)
 	powerpc_firmware_features |= FW_FEATURE_ISERIES;
 	powerpc_firmware_features |= FW_FEATURE_LPAR;
 
+#ifdef CONFIG_SMP
+	/* On iSeries we know we can never have more than 64 cpus */
+	nr_cpu_ids = max(nr_cpu_ids, 64);
+#endif
+
 	iSeries_fixup_klimit();
 
 	/*
-- 
cgit v1.2.3


From aa79bc2167104581cc1d77762394f2c01d3bf3f3 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:55 +0000
Subject: powerpc: Call no-longer static setup_nr_cpu_ids instead of
 replicating it

c1854e00727f50f7ac99e98d26ece04c087ef785 (powerpc: Set nr_cpu_ids early
and use it to free PACAs) copied the formerly static setup_nr_cpu_ids
from init/main.c but 34db18a054c600b6f81787165669dc572fe4de25 (smp:
move smp setup functions to kernel/smp.c) moved it to kernel/smp.c
with a declaration in include/linux/smp.h, so we can call it instead of
replicating it.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index fce759ba31f3..ef33a084fcf4 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -510,7 +510,7 @@ void __init smp_setup_cpu_maps(void)
 	cpu_init_thread_core_maps(nthreads);
 
 	/* Now that possible cpus are set, set nr_cpu_ids for later use */
-	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
+	setup_nr_cpu_ids();
 
 	free_unused_pacas();
 }
-- 
cgit v1.2.3


From ebc04215108c124cb4f519d687a8e27a0d16a4aa Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:28:59 +0000
Subject: powerpc/mpic: Limit NR_CPUS loop to 32 bit

mpic_physmask was looping NR_CPUS times over a mask that was passed as
a u32. Since mpic is architecturaly limited to 32 physical cpus, clamp
the logical cpus to 32 when compiling (we could also clamp at runtime
to nr_cpu_ids).

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/mpic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 824a94fc413b..a93da805435f 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -631,7 +631,7 @@ static inline u32 mpic_physmask(u32 cpumask)
 	int i;
 	u32 mask = 0;
 
-	for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1)
+	for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
 		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
 	return mask;
 }
-- 
cgit v1.2.3


From 2a116f3dd07cbb55b440d3841fc24a0b3fd99ccd Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:02 +0000
Subject: powerpc/mpic: Break cpumask abstraction earlier

mpic_set_affinity is allocating and freeing a cpumask var even though
it was breaking the cpumask abstraction when passing the mask to
mpic_physmask.  It also didn't have any check for allocatin failure.

Break the cpumask abstraction earlier and use simple bitwise and of the
bits from the mask with the bits of cpu_online_mask.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/mpic.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index a93da805435f..116695b7a5cb 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -821,16 +821,12 @@ int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), 1 << cpuid);
 	} else {
-		cpumask_var_t tmp;
+		u32 mask = cpumask_bits(cpumask)[0];
 
-		alloc_cpumask_var(&tmp, GFP_KERNEL);
-
-		cpumask_and(tmp, cpumask, cpu_online_mask);
+		mask &= cpumask_bits(cpu_online_mask)[0];
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
-			       mpic_physmask(cpumask_bits(tmp)[0]));
-
-		free_cpumask_var(tmp);
+			       mpic_physmask(mask));
 	}
 
 	return 0;
-- 
cgit v1.2.3


From e04763713286b1e00e1c2a33fe2741caf9470f2b Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:06 +0000
Subject: powerpc: Remove call sites of MSG_ALL_BUT_SELF

The only user of MSG_ALL_BUT_SELF in the whole kernel tree is powerpc,
and it only uses it to start the debugger. Both debuggers always call
smp_send_debugger_break with MSG_ALL_BUT_SELF, and only mpic can do
anything more optimal than a loop over all online cpus, but all message
passing implementations have to code for this special delivery target.

Convert smp_send_debugger_break to take void and loop calling the smp_ops
message_pass function for each of the other cpus in the online cpumask.

Use raw_smp_processor_id() because we are either entering the debugger
or trying to start kdump and the additional warning it not useful were
it to trigger.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/smp.h |  2 +-
 arch/powerpc/kernel/kgdb.c     |  2 +-
 arch/powerpc/kernel/smp.c      | 19 +++++++++++++------
 arch/powerpc/xmon/xmon.c       |  2 +-
 4 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 50873493a97c..91472c56800f 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -35,7 +35,7 @@ extern void cpu_die(void);
 
 #ifdef CONFIG_SMP
 
-extern void smp_send_debugger_break(int cpu);
+extern void smp_send_debugger_break(void);
 extern void smp_message_recv(int);
 extern void start_secondary_resume(void);
 
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 42850ee00ada..bd9d35f59cf4 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs)
 #ifdef CONFIG_SMP
 void kgdb_roundup_cpus(unsigned long flags)
 {
-	smp_send_debugger_break(MSG_ALL_BUT_SELF);
+	smp_send_debugger_break();
 }
 #endif
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 87517ab6d365..b74411446922 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -218,11 +218,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 		smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
 }
 
-#ifdef CONFIG_DEBUGGER
-void smp_send_debugger_break(int cpu)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+void smp_send_debugger_break(void)
 {
-	if (likely(smp_ops))
-		smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+	int cpu;
+	int me = raw_smp_processor_id();
+
+	if (unlikely(!smp_ops))
+		return;
+
+	for_each_online_cpu(cpu)
+		if (cpu != me)
+			smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
 }
 #endif
 
@@ -230,9 +237,9 @@ void smp_send_debugger_break(int cpu)
 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
 	crash_ipi_function_ptr = crash_ipi_callback;
-	if (crash_ipi_callback && smp_ops) {
+	if (crash_ipi_callback) {
 		mb();
-		smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK);
+		smp_send_debugger_break();
 	}
 }
 #endif
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 91309c5c00d7..42541bbcc7fa 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -437,7 +437,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		xmon_owner = cpu;
 		mb();
 		if (ncpus > 1) {
-			smp_send_debugger_break(MSG_ALL_BUT_SELF);
+			smp_send_debugger_break();
 			/* wait for other cpus to come in */
 			for (timeout = 100000000; timeout != 0; --timeout) {
 				if (cpumask_weight(&cpus_in_xmon) >= ncpus)
-- 
cgit v1.2.3


From f1072939b6dd01d038d47db0bdc01b33e5f90f28 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:10 +0000
Subject: powerpc: Remove checks for MSG_ALL and MSG_ALL_BUT_SELF

Now that smp_ops->smp_message_pass is always called with an (online) cpu
number for the target remove the checks for MSG_ALL and MSG_ALL_BUT_SELF.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/dbell.h       |  2 +-
 arch/powerpc/include/asm/machdep.h     |  2 +-
 arch/powerpc/include/asm/xics.h        |  2 +-
 arch/powerpc/kernel/dbell.c            | 29 +++++------------------------
 arch/powerpc/platforms/cell/beat_smp.c | 18 +-----------------
 arch/powerpc/platforms/cell/smp.c      | 18 +-----------------
 arch/powerpc/platforms/iseries/smp.c   | 18 +-----------------
 arch/powerpc/platforms/powermac/smp.c  | 17 +++--------------
 arch/powerpc/platforms/ps3/smp.c       | 22 +++-------------------
 arch/powerpc/sysdev/mpic.c             | 20 ++------------------
 arch/powerpc/sysdev/xics/icp-hv.c      | 18 +-----------------
 arch/powerpc/sysdev/xics/icp-native.c  | 18 +-----------------
 12 files changed, 21 insertions(+), 163 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 0893ab9343a6..3269eb49640a 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -27,7 +27,7 @@ enum ppc_dbell {
 	PPC_G_DBELL_MC = 4,	/* guest mcheck doorbell */
 };
 
-extern void doorbell_message_pass(int target, int msg);
+extern void doorbell_message_pass(int cpu, int msg);
 extern void doorbell_exception(struct pt_regs *regs);
 extern void doorbell_check_self(void);
 extern void doorbell_setup_this_cpu(void);
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c6345acf166f..b0802a5bd744 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -31,7 +31,7 @@ struct kimage;
 
 #ifdef CONFIG_SMP
 struct smp_ops_t {
-	void  (*message_pass)(int target, int msg);
+	void  (*message_pass)(int cpu, int msg);
 	int   (*probe)(void);
 	int   (*kick_cpu)(int nr);
 	void  (*setup_cpu)(int nr);
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 6c06306c4100..1750c8dae1fa 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -40,7 +40,7 @@ struct icp_ops {
 	void (*teardown_cpu)(void);
 	void (*flush_ipi)(void);
 #ifdef CONFIG_SMP
-	void (*message_pass)(int target, int msg);
+	void (*message_pass)(int cpu, int msg);
 	irq_handler_t ipi_action;
 #endif
 };
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 3307a52d797f..e49b24c84133 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -34,32 +34,13 @@ void doorbell_setup_this_cpu(void)
 	info->tag = mfspr(SPRN_PIR) & 0x3fff;
 }
 
-void doorbell_message_pass(int target, int msg)
+void doorbell_message_pass(int cpu, int msg)
 {
 	struct doorbell_cpu_info *info;
-	int i;
-
-	if (target < NR_CPUS) {
-		info = &per_cpu(doorbell_cpu_info, target);
-		set_bit(msg, &info->messages);
-		ppc_msgsnd(PPC_DBELL, 0, info->tag);
-	}
-	else if (target == MSG_ALL_BUT_SELF) {
-		for_each_online_cpu(i) {
-			if (i == smp_processor_id())
-				continue;
-			info = &per_cpu(doorbell_cpu_info, i);
-			set_bit(msg, &info->messages);
-			ppc_msgsnd(PPC_DBELL, 0, info->tag);
-		}
-	}
-	else { /* target == MSG_ALL */
-		for_each_online_cpu(i) {
-			info = &per_cpu(doorbell_cpu_info, i);
-			set_bit(msg, &info->messages);
-		}
-		ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);
-	}
+
+	info = &per_cpu(doorbell_cpu_info, cpu);
+	set_bit(msg, &info->messages);
+	ppc_msgsnd(PPC_DBELL, 0, info->tag);
 }
 
 void doorbell_exception(struct pt_regs *regs)
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
index 3e86acbb0fb4..23bbe6e08c87 100644
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ b/arch/powerpc/platforms/cell/beat_smp.c
@@ -67,22 +67,6 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	return 0;
 }
 
-static void smp_beatic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		beatic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			beatic_cause_IPI(i, msg);
-		}
-	}
-}
-
 static int __init smp_beatic_probe(void)
 {
 	return cpumask_weight(cpu_possible_mask);
@@ -105,7 +89,7 @@ static int smp_celleb_cpu_bootable(unsigned int nr)
 	return 1;
 }
 static struct smp_ops_t bpa_beatic_smp_ops = {
-	.message_pass	= smp_beatic_message_pass,
+	.message_pass	= beatic_cause_IPI,
 	.probe		= smp_beatic_probe,
 	.kick_cpu	= smp_celleb_kick_cpu,
 	.setup_cpu	= smp_beatic_setup_cpu,
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index a2161b91b0bf..d176e6148e3f 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -103,22 +103,6 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
 	return 1;
 }
 
-static void smp_iic_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		iic_cause_IPI(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			iic_cause_IPI(i, msg);
-		}
-	}
-}
-
 static int __init smp_iic_probe(void)
 {
 	iic_request_IPIs();
@@ -168,7 +152,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
 	return 1;
 }
 static struct smp_ops_t bpa_iic_smp_ops = {
-	.message_pass	= smp_iic_message_pass,
+	.message_pass	= iic_cause_IPI,
 	.probe		= smp_iic_probe,
 	.kick_cpu	= smp_cell_kick_cpu,
 	.setup_cpu	= smp_cell_setup_cpu,
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 02a677a1f912..dcdbc5dc5aad 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -59,28 +59,12 @@ void iSeries_smp_message_recv(void)
 			smp_message_recv(msg);
 }
 
-static inline void smp_iSeries_do_message(int cpu, int msg)
+static void smp_iSeries_message_pass(int cpu, int msg)
 {
 	set_bit(msg, &iSeries_smp_message[cpu]);
 	HvCall_sendIPI(&(paca[cpu]));
 }
 
-static void smp_iSeries_message_pass(int target, int msg)
-{
-	int i;
-
-	if (target < NR_CPUS)
-		smp_iSeries_do_message(target, msg);
-	else {
-		for_each_online_cpu(i) {
-			if ((target == MSG_ALL_BUT_SELF) &&
-					(i == smp_processor_id()))
-				continue;
-			smp_iSeries_do_message(i, msg);
-		}
-	}
-}
-
 static int smp_iSeries_probe(void)
 {
 	return cpumask_weight(cpu_possible_mask);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 621d4b7755f2..c49e71926a54 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -186,21 +186,10 @@ irqreturn_t psurge_primary_intr(int irq, void *d)
 	return IRQ_HANDLED;
 }
 
-static void smp_psurge_message_pass(int target, int msg)
+static void smp_psurge_message_pass(int cpu, int msg)
 {
-	int i;
-
-	if (num_online_cpus() < 2)
-		return;
-
-	for_each_online_cpu(i) {
-		if (target == MSG_ALL
-		    || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
-		    || target == i) {
-			set_bit(msg, &psurge_smp_message[i]);
-			psurge_set_ipi(i);
-		}
-	}
+	set_bit(msg, &psurge_smp_message[cpu]);
+	psurge_set_ipi(cpu);
 }
 
 /*
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
index 51ffde40af2b..4c44794faac0 100644
--- a/arch/powerpc/platforms/ps3/smp.c
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -39,7 +39,7 @@
 #define MSG_COUNT 4
 static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
 
-static void do_message_pass(int target, int msg)
+static void ps3_smp_message_pass(int cpu, int msg)
 {
 	int result;
 	unsigned int virq;
@@ -49,28 +49,12 @@ static void do_message_pass(int target, int msg)
 		return;
 	}
 
-	virq = per_cpu(ps3_ipi_virqs, target)[msg];
+	virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
 	result = ps3_send_event_locally(virq);
 
 	if (result)
 		DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
-			" (%d)\n", __func__, __LINE__, target, msg, result);
-}
-
-static void ps3_smp_message_pass(int target, int msg)
-{
-	int cpu;
-
-	if (target < NR_CPUS)
-		do_message_pass(target, msg);
-	else if (target == MSG_ALL_BUT_SELF) {
-		for_each_online_cpu(cpu)
-			if (cpu != smp_processor_id())
-				do_message_pass(cpu, msg);
-	} else {
-		for_each_online_cpu(cpu)
-			do_message_pass(cpu, msg);
-	}
+			" (%d)\n", __func__, __LINE__, cpu, msg, result);
 }
 
 static int ps3_smp_probe(void)
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 116695b7a5cb..68ea50c41902 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1628,31 +1628,15 @@ static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask)
 		       mpic_physmask(cpumask_bits(cpu_mask)[0]));
 }
 
-void smp_mpic_message_pass(int target, int msg)
+void smp_mpic_message_pass(int cpu, int msg)
 {
-	cpumask_var_t tmp;
-
 	/* make sure we're sending something that translates to an IPI */
 	if ((unsigned int)msg > 3) {
 		printk("SMP %d: smp_message_pass: unknown msg %d\n",
 		       smp_processor_id(), msg);
 		return;
 	}
-	switch (target) {
-	case MSG_ALL:
-		mpic_send_ipi(msg, cpu_online_mask);
-		break;
-	case MSG_ALL_BUT_SELF:
-		alloc_cpumask_var(&tmp, GFP_NOWAIT);
-		cpumask_andnot(tmp, cpu_online_mask,
-			       cpumask_of(smp_processor_id()));
-		mpic_send_ipi(msg, tmp);
-		free_cpumask_var(tmp);
-		break;
-	default:
-		mpic_send_ipi(msg, cpumask_of(target));
-		break;
-	}
+	mpic_send_ipi(msg, cpumask_of(cpu));
 }
 
 int __init smp_mpic_probe(void)
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 76e87245bbfe..234764c189a4 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -118,7 +118,7 @@ static void icp_hv_set_cpu_priority(unsigned char cppr)
 
 #ifdef CONFIG_SMP
 
-static inline void icp_hv_do_message(int cpu, int msg)
+static void icp_hv_message_pass(int cpu, int msg)
 {
 	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
 
@@ -127,22 +127,6 @@ static inline void icp_hv_do_message(int cpu, int msg)
 	icp_hv_set_qirr(cpu, IPI_PRIORITY);
 }
 
-static void icp_hv_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		icp_hv_do_message(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			icp_hv_do_message(i, msg);
-		}
-	}
-}
-
 static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
 {
 	int cpu = smp_processor_id();
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 3508321c4501..246500eefbfd 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -134,7 +134,7 @@ static unsigned int icp_native_get_irq(void)
 
 #ifdef CONFIG_SMP
 
-static inline void icp_native_do_message(int cpu, int msg)
+static void icp_native_message_pass(int cpu, int msg)
 {
 	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
 
@@ -143,22 +143,6 @@ static inline void icp_native_do_message(int cpu, int msg)
 	icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
-static void icp_native_message_pass(int target, int msg)
-{
-	unsigned int i;
-
-	if (target < NR_CPUS) {
-		icp_native_do_message(target, msg);
-	} else {
-		for_each_online_cpu(i) {
-			if (target == MSG_ALL_BUT_SELF
-			    && i == smp_processor_id())
-				continue;
-			icp_native_do_message(i, msg);
-		}
-	}
-}
-
 static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
 {
 	int cpu = smp_processor_id();
-- 
cgit v1.2.3


From 3caba98fddd551ca7dcdb1eb701b36ed70b04fde Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:17 +0000
Subject: powerpc/mpic: Simplify ipi cpu mask handling

Now that MSG_ALL and MSG_ALL_BUT_SELF have been eliminated,
smp_mpic_mesage_pass no longer needs to lookup the cpumask just to
have mpic_send_ipi extract part of it and recode it in a NR_CPUS loop
by mpic_physmask.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/mpic.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 68ea50c41902..53121f625068 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -1613,30 +1613,28 @@ void mpic_request_ipis(void)
 	}
 }
 
-static void mpic_send_ipi(unsigned int ipi_no, const struct cpumask *cpu_mask)
+void smp_mpic_message_pass(int cpu, int msg)
 {
 	struct mpic *mpic = mpic_primary;
+	u32 physmask;
 
 	BUG_ON(mpic == NULL);
 
-#ifdef DEBUG_IPI
-	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
-#endif
-
-	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
-		       ipi_no * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE),
-		       mpic_physmask(cpumask_bits(cpu_mask)[0]));
-}
-
-void smp_mpic_message_pass(int cpu, int msg)
-{
 	/* make sure we're sending something that translates to an IPI */
 	if ((unsigned int)msg > 3) {
 		printk("SMP %d: smp_message_pass: unknown msg %d\n",
 		       smp_processor_id(), msg);
 		return;
 	}
-	mpic_send_ipi(msg, cpumask_of(cpu));
+
+#ifdef DEBUG_IPI
+	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, msg);
+#endif
+
+	physmask = 1 << get_hard_smp_processor_id(cpu);
+
+	mpic_cpu_write(MPIC_INFO(CPU_IPI_DISPATCH_0) +
+		       msg * MPIC_INFO(CPU_IPI_DISPATCH_STRIDE), physmask);
 }
 
 int __init smp_mpic_probe(void)
-- 
cgit v1.2.3


From 7ca8aa0924712de81485c70e00bbea8c092a08c1 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:20 +0000
Subject: powerpc: Remove powermac/pic.h

Its unused, and of the three declarations, one is duplicated in pmac.h,
the second is static and the third is renamed and static.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powermac/pic.h | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 arch/powerpc/platforms/powermac/pic.h

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powermac/pic.h b/arch/powerpc/platforms/powermac/pic.h
deleted file mode 100644
index d622a8345aaa..000000000000
--- a/arch/powerpc/platforms/powermac/pic.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __PPC_PLATFORMS_PMAC_PIC_H
-#define __PPC_PLATFORMS_PMAC_PIC_H
-
-#include <linux/irq.h>
-
-extern struct irq_chip pmac_pic;
-
-extern void pmac_pic_init(void);
-extern int pmac_get_irq(void);
-
-#endif /* __PPC_PLATFORMS_PMAC_PIC_H */
-- 
cgit v1.2.3


From a56555e573d3740d588d912aada506d57759cf5d Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:24 +0000
Subject: powerpc: Remove alloc_maybe_bootmem for zalloc version

Replace all remaining callers of alloc_maybe_bootmem with
zalloc_maybe_bootmem.   The callsite in pci_dn is followed with a
memset to clear the memory, and not zeroing at the other callsites
in the celleb fake pci code could lead to following uninitialized
memory as pointers or even freeing said pointers on error paths.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/system.h        | 2 --
 arch/powerpc/kernel/pci_dn.c             | 3 +--
 arch/powerpc/lib/alloc.c                 | 8 --------
 arch/powerpc/platforms/cell/celleb_pci.c | 6 +++---
 4 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 5e474ddd2273..2dc595dda03b 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -219,8 +219,6 @@ extern int mem_init_done;	/* set on boot once kmalloc can be called */
 extern int init_bootmem_done;	/* set once bootmem is available */
 extern phys_addr_t memory_limit;
 extern unsigned long klimit;
-
-extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index d225d99fe39d..6baabc13306a 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
 	const u32 *regs;
 	struct pci_dn *pdn;
 
-	pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+	pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
 	if (pdn == NULL)
 		return NULL;
-	memset(pdn, 0, sizeof(*pdn));
 	dn->data = pdn;
 	pdn->node = dn;
 	pdn->phb = phb;
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index f53e09c7dac7..13b676c20d12 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -6,14 +6,6 @@
 
 #include <asm/system.h>
 
-void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-	if (mem_init_done)
-		return kmalloc(size, mask);
-	else
-		return alloc_bootmem(size);
-}
-
 void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
 {
 	void *p;
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 2904b0a6b2c5..5822141aa63f 100644
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -319,7 +319,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
 
 	size = 256;
 	config = &private->fake_config[devno][fn];
-	*config = alloc_maybe_bootmem(size, GFP_KERNEL);
+	*config = zalloc_maybe_bootmem(size, GFP_KERNEL);
 	if (*config == NULL) {
 		printk(KERN_ERR "PCI: "
 		       "not enough memory for fake configuration space\n");
@@ -330,7 +330,7 @@ static int __init celleb_setup_fake_pci_device(struct device_node *node,
 
 	size = sizeof(struct celleb_pci_resource);
 	res = &private->res[devno][fn];
-	*res = alloc_maybe_bootmem(size, GFP_KERNEL);
+	*res = zalloc_maybe_bootmem(size, GFP_KERNEL);
 	if (*res == NULL) {
 		printk(KERN_ERR
 		       "PCI: not enough memory for resource data space\n");
@@ -431,7 +431,7 @@ static int __init phb_set_bus_ranges(struct device_node *dev,
 static void __init celleb_alloc_private_mem(struct pci_controller *hose)
 {
 	hose->private_data =
-		alloc_maybe_bootmem(sizeof(struct celleb_pci_private),
+		zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
 			GFP_KERNEL);
 }
 
-- 
cgit v1.2.3


From d4fc8fe1f66f46493d3c56436685eef3b5b32b07 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:28 +0000
Subject: powerpc: Remove stubbed beat smp support

I have no idea if the beat hypervisor supports multiple cpus in
a partition, but the code has not been touched since these stubs
were added in February of 2007 except to move them in April of 2008.
These are stubs: start_cpu always returns fail (which is dropped),
the message passing and reciving are empty functions, and the top
of file comment says "Incomplete".

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/Makefile         |   1 -
 arch/powerpc/platforms/cell/beat_interrupt.c |  16 ----
 arch/powerpc/platforms/cell/beat_interrupt.h |   3 -
 arch/powerpc/platforms/cell/beat_smp.c       | 107 ---------------------------
 arch/powerpc/platforms/cell/celleb_setup.c   |   4 -
 5 files changed, 131 deletions(-)
 delete mode 100644 arch/powerpc/platforms/cell/beat_smp.c

(limited to 'arch')

diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 8839ef6c7188..a4a89350bcfc 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -43,7 +43,6 @@ obj-y					+= celleb_setup.o \
 					   beat_hvCall.o beat_interrupt.o \
 					   beat_iommu.o
 
-obj-$(CONFIG_SMP)			+= beat_smp.o
 obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
 obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
 obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 4cb9e147c307..d46f7e47a1dc 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -257,22 +257,6 @@ void __init beatic_init_IRQ(void)
 	irq_set_default_host(beatic_host);
 }
 
-#ifdef CONFIG_SMP
-
-/* Nullified to compile with SMP mode */
-void beatic_setup_cpu(int cpu)
-{
-}
-
-void beatic_cause_IPI(int cpu, int mesg)
-{
-}
-
-void beatic_request_IPIs(void)
-{
-}
-#endif /* CONFIG_SMP */
-
 void beatic_deinit_IRQ(void)
 {
 	int	i;
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
index b470fd0051f1..a7e52f91a078 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
@@ -24,9 +24,6 @@
 
 extern void beatic_init_IRQ(void);
 extern unsigned int beatic_get_irq(void);
-extern void beatic_cause_IPI(int cpu, int mesg);
-extern void beatic_request_IPIs(void);
-extern void beatic_setup_cpu(int);
 extern void beatic_deinit_IRQ(void);
 
 #endif
diff --git a/arch/powerpc/platforms/cell/beat_smp.c b/arch/powerpc/platforms/cell/beat_smp.c
deleted file mode 100644
index 23bbe6e08c87..000000000000
--- a/arch/powerpc/platforms/cell/beat_smp.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * SMP support for Celleb platform. (Incomplete)
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/cell/smp.c:
- * Dave Engebretsen, Peter Bergner, and
- * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- * Plus various changes from other IBM teams...
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
-
-#include <asm/irq.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_interrupt.h"
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/*
- * The primary thread of each non-boot processor is recorded here before
- * smp init.
- */
-/* static cpumask_t of_spin_map; */
-
-/**
- * smp_startup_cpu() - start the given cpu
- *
- * At boot time, there is nothing to do for primary threads which were
- * started from Open Firmware.  For anything else, call RTAS with the
- * appropriate start location.
- *
- * Returns:
- *	0	- failure
- *	1	- success
- */
-static inline int __devinit smp_startup_cpu(unsigned int lcpu)
-{
-	return 0;
-}
-
-static int __init smp_beatic_probe(void)
-{
-	return cpumask_weight(cpu_possible_mask);
-}
-
-static void __devinit smp_beatic_setup_cpu(int cpu)
-{
-	beatic_setup_cpu(cpu);
-}
-
-static int __devinit smp_celleb_kick_cpu(int nr)
-{
-	BUG_ON(nr < 0 || nr >= NR_CPUS);
-
-	return smp_startup_cpu(nr);
-}
-
-static int smp_celleb_cpu_bootable(unsigned int nr)
-{
-	return 1;
-}
-static struct smp_ops_t bpa_beatic_smp_ops = {
-	.message_pass	= beatic_cause_IPI,
-	.probe		= smp_beatic_probe,
-	.kick_cpu	= smp_celleb_kick_cpu,
-	.setup_cpu	= smp_beatic_setup_cpu,
-	.cpu_bootable	= smp_celleb_cpu_bootable,
-};
-
-/* This is called very early */
-void __init smp_init_celleb(void)
-{
-	DBG(" -> smp_init_celleb()\n");
-
-	smp_ops = &bpa_beatic_smp_ops;
-
-	DBG(" <- smp_init_celleb()\n");
-}
diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
index e53845579770..d58d9bae4b9b 100644
--- a/arch/powerpc/platforms/cell/celleb_setup.c
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -128,10 +128,6 @@ static void __init celleb_setup_arch_beat(void)
 	spu_management_ops	= &spu_management_of_ops;
 #endif
 
-#ifdef CONFIG_SMP
-	smp_init_celleb();
-#endif
-
 	celleb_setup_arch_common();
 }
 
-- 
cgit v1.2.3


From 17f9c8a73bac2c7dfe28a520516ea6b8bbbe977e Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:35 +0000
Subject: powerpc: Move smp_ops_t from machdep.h to smp.h

I can't see any reason these functions are needed by machdep.h
and they are all hidden by CONFIG_SMP with no UP alternative.

Also move the declarations for the fallback timebase ops, which
are used to fill in the smp ops.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/machdep.h | 21 ---------------------
 arch/powerpc/include/asm/smp.h     | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index b0802a5bd744..47cacddb14cf 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -29,21 +29,6 @@ struct file;
 struct pci_controller;
 struct kimage;
 
-#ifdef CONFIG_SMP
-struct smp_ops_t {
-	void  (*message_pass)(int cpu, int msg);
-	int   (*probe)(void);
-	int   (*kick_cpu)(int nr);
-	void  (*setup_cpu)(int nr);
-	void  (*bringup_done)(void);
-	void  (*take_timebase)(void);
-	void  (*give_timebase)(void);
-	int   (*cpu_disable)(void);
-	void  (*cpu_die)(unsigned int nr);
-	int   (*cpu_bootable)(unsigned int nr);
-};
-#endif
-
 struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
@@ -312,12 +297,6 @@ extern sys_ctrler_t sys_ctrler;
 
 #endif /* CONFIG_PPC_PMAC */
 
-#ifdef CONFIG_SMP
-/* Poor default implementations */
-extern void __devinit smp_generic_give_timebase(void);
-extern void __devinit smp_generic_take_timebase(void);
-#endif /* CONFIG_SMP */
-
 
 /* Functions to produce codes on the leds.
  * The SRC code should be unique for the message category and should
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 91472c56800f..6f7c95c0027a 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -35,9 +35,24 @@ extern void cpu_die(void);
 
 #ifdef CONFIG_SMP
 
+struct smp_ops_t {
+	void  (*message_pass)(int cpu, int msg);
+	int   (*probe)(void);
+	int   (*kick_cpu)(int nr);
+	void  (*setup_cpu)(int nr);
+	void  (*bringup_done)(void);
+	void  (*take_timebase)(void);
+	void  (*give_timebase)(void);
+	int   (*cpu_disable)(void);
+	void  (*cpu_die)(unsigned int nr);
+	int   (*cpu_bootable)(unsigned int nr);
+};
+
 extern void smp_send_debugger_break(void);
 extern void smp_message_recv(int);
 extern void start_secondary_resume(void);
+extern void __devinit smp_generic_give_timebase(void);
+extern void __devinit smp_generic_take_timebase(void);
 
 DECLARE_PER_CPU(unsigned int, cpu_pvr);
 
-- 
cgit v1.2.3


From 23d72bfd8f9f24aa9efafed3586a99f5669c23d7 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:39 +0000
Subject: powerpc: Consolidate ipi message mux and demux

Consolidate the mux and demux of ipi messages into smp.c and call
a new smp_ops callback to actually trigger the ipi.

The powerpc architecture code is optimised for having 4 distinct
ipi triggers, which are mapped to 4 distinct messages (ipi many, ipi
single, scheduler ipi, and enter debugger).  However, several interrupt
controllers only provide a single software triggered interrupt that
can be delivered to each cpu.  To resolve this limitation, each smp_ops
implementation created a per-cpu variable that is manipulated with atomic
bitops.  Since these lines will be contended they are optimialy marked as
shared_aligned and take a full cache line for each cpu.  Distro kernels
may have 2 or 3 of these in their config, each taking per-cpu space
even though at most one will be in use.

This consolidation removes smp_message_recv and replaces the single call
actions cases with direct calls from the common message recognition loop.
The complicated debugger ipi case with its muxed crash handling code is
moved to debug_ipi_action which is now called from the demux code (instead
of the multi-message action calling smp_message_recv).

I put a call to reschedule_action to increase the likelyhood of correctly
merging the anticipated scheduler_ipi() hook coming from the scheduler
tree; that single required call can be inlined later.

The actual message decode is a copy of the old pseries xics code with its
memory barriers and cache line spacing, augmented with a per-cpu unsigned
long based on the book-e doorbell code.  The optional data is set via a
callback from the implementation and is passed to the new cause-ipi hook
along with the logical cpu number.  While currently only the doorbell
implemntation uses this data it should be almost zero cost to retrieve and
pass it -- it adds a single register load for the argument from the same
cache line to which we just completed a store and the register is dead
on return from the call.  I extended the data element from unsigned int
to unsigned long in case some other code wanted to associate a pointer.

The doorbell check_self is replaced by a call to smp_muxed_ipi_resend,
conditioned on the CPU_DBELL feature.  The ifdef guard could be relaxed
to CONFIG_SMP but I left it with BOOKE for now.

Also, the doorbell interrupt vector for book-e was not calling irq_enter
and irq_exit, which throws off cpu accounting and causes code to not
realize it is running in interrupt context.  Add the missing calls.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/dbell.h        |  3 +-
 arch/powerpc/include/asm/smp.h          | 16 ++++--
 arch/powerpc/include/asm/xics.h         |  2 +-
 arch/powerpc/kernel/dbell.c             | 46 +++-------------
 arch/powerpc/kernel/irq.c               |  4 +-
 arch/powerpc/kernel/smp.c               | 94 ++++++++++++++++++++++-----------
 arch/powerpc/platforms/85xx/smp.c       |  6 ++-
 arch/powerpc/platforms/cell/interrupt.c | 16 +++++-
 arch/powerpc/platforms/iseries/irq.c    |  3 +-
 arch/powerpc/platforms/iseries/smp.c    | 23 ++------
 arch/powerpc/platforms/iseries/smp.h    |  6 ---
 arch/powerpc/platforms/powermac/smp.c   | 27 +++-------
 arch/powerpc/platforms/pseries/smp.c    |  3 +-
 arch/powerpc/platforms/wsp/smp.c        |  3 +-
 arch/powerpc/sysdev/xics/icp-hv.c       | 10 ++--
 arch/powerpc/sysdev/xics/icp-native.c   | 10 ++--
 arch/powerpc/sysdev/xics/xics-common.c  | 30 +----------
 17 files changed, 126 insertions(+), 176 deletions(-)
 delete mode 100644 arch/powerpc/platforms/iseries/smp.h

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 3269eb49640a..9c70d0ca96d4 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -27,9 +27,8 @@ enum ppc_dbell {
 	PPC_G_DBELL_MC = 4,	/* guest mcheck doorbell */
 };
 
-extern void doorbell_message_pass(int cpu, int msg);
+extern void doorbell_cause_ipi(int cpu, unsigned long data);
 extern void doorbell_exception(struct pt_regs *regs);
-extern void doorbell_check_self(void);
 extern void doorbell_setup_this_cpu(void);
 
 static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 6f7c95c0027a..26f861560c51 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -20,6 +20,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
+#include <linux/irqreturn.h>
 
 #ifndef __ASSEMBLY__
 
@@ -37,6 +38,7 @@ extern void cpu_die(void);
 
 struct smp_ops_t {
 	void  (*message_pass)(int cpu, int msg);
+	void  (*cause_ipi)(int cpu, unsigned long data);
 	int   (*probe)(void);
 	int   (*kick_cpu)(int nr);
 	void  (*setup_cpu)(int nr);
@@ -49,7 +51,6 @@ struct smp_ops_t {
 };
 
 extern void smp_send_debugger_break(void);
-extern void smp_message_recv(int);
 extern void start_secondary_resume(void);
 extern void __devinit smp_generic_give_timebase(void);
 extern void __devinit smp_generic_take_timebase(void);
@@ -109,13 +110,16 @@ extern int cpu_to_core_id(int cpu);
 #define PPC_MSG_CALL_FUNC_SINGLE	2
 #define PPC_MSG_DEBUGGER_BREAK  3
 
-/*
- * irq controllers that have dedicated ipis per message and don't
- * need additional code in the action handler may use this
- */
+/* for irq controllers that have dedicated ipis per message (4) */
 extern int smp_request_message_ipi(int virq, int message);
 extern const char *smp_ipi_name[];
 
+/* for irq controllers with only a single ipi */
+extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
+extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_resend(void);
+extern irqreturn_t smp_ipi_demux(void);
+
 void smp_init_iSeries(void);
 void smp_init_pSeries(void);
 void smp_init_cell(void);
@@ -185,6 +189,8 @@ extern unsigned long __secondary_hold_spinloop;
 extern unsigned long __secondary_hold_acknowledge;
 extern char __secondary_hold;
 
+extern irqreturn_t debug_ipi_action(int irq, void *data);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 1750c8dae1fa..b183a4062011 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -40,7 +40,7 @@ struct icp_ops {
 	void (*teardown_cpu)(void);
 	void (*flush_ipi)(void);
 #ifdef CONFIG_SMP
-	void (*message_pass)(int cpu, int msg);
+	void (*cause_ipi)(int cpu, unsigned long data);
 	irq_handler_t ipi_action;
 #endif
 };
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index e49b24c84133..2cc451aaaca7 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -13,65 +13,35 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
-#include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 #include <asm/dbell.h>
 #include <asm/irq_regs.h>
 
 #ifdef CONFIG_SMP
-struct doorbell_cpu_info {
-	unsigned long	messages;	/* current messages bits */
-	unsigned int	tag;		/* tag value */
-};
-
-static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info);
-
 void doorbell_setup_this_cpu(void)
 {
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
+	unsigned long tag = mfspr(SPRN_PIR) & 0x3fff;
 
-	info->messages = 0;
-	info->tag = mfspr(SPRN_PIR) & 0x3fff;
+	smp_muxed_ipi_set_data(smp_processor_id(), tag);
 }
 
-void doorbell_message_pass(int cpu, int msg)
+void doorbell_cause_ipi(int cpu, unsigned long data)
 {
-	struct doorbell_cpu_info *info;
-
-	info = &per_cpu(doorbell_cpu_info, cpu);
-	set_bit(msg, &info->messages);
-	ppc_msgsnd(PPC_DBELL, 0, info->tag);
+	ppc_msgsnd(PPC_DBELL, 0, data);
 }
 
 void doorbell_exception(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-	int msg;
 
-	/* Warning: regs can be NULL when called from irq enable */
+	irq_enter();
 
-	if (!info->messages || (num_online_cpus() < 2))
-		goto out;
+	smp_ipi_demux();
 
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &info->messages))
-			smp_message_recv(msg);
-
-out:
+	irq_exit();
 	set_irq_regs(old_regs);
 }
-
-void doorbell_check_self(void)
-{
-	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info);
-
-	if (!info->messages)
-		return;
-
-	ppc_msgsnd(PPC_DBELL, 0, info->tag);
-}
-
 #else /* CONFIG_SMP */
 void doorbell_exception(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index a81dd74414bf..826552cecebd 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -66,7 +66,6 @@
 #include <asm/ptrace.h>
 #include <asm/machdep.h>
 #include <asm/udbg.h>
-#include <asm/dbell.h>
 #include <asm/smp.h>
 
 #ifdef CONFIG_PPC64
@@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en)
 
 #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP)
 	/* Check for pending doorbell interrupts and resend to ourself */
-	doorbell_check_self();
+	if (cpu_has_feature(CPU_FTR_DBELL))
+		smp_muxed_ipi_resend();
 #endif
 
 	/*
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index b74411446922..fa8e8700064b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -111,35 +111,6 @@ int __devinit smp_generic_kick_cpu(int nr)
 }
 #endif
 
-void smp_message_recv(int msg)
-{
-	switch(msg) {
-	case PPC_MSG_CALL_FUNCTION:
-		generic_smp_call_function_interrupt();
-		break;
-	case PPC_MSG_RESCHEDULE:
-		/* we notice need_resched on exit */
-		break;
-	case PPC_MSG_CALL_FUNC_SINGLE:
-		generic_smp_call_function_single_interrupt();
-		break;
-	case PPC_MSG_DEBUGGER_BREAK:
-		if (crash_ipi_function_ptr) {
-			crash_ipi_function_ptr(get_irq_regs());
-			break;
-		}
-#ifdef CONFIG_DEBUGGER
-		debugger_ipi(get_irq_regs());
-		break;
-#endif /* CONFIG_DEBUGGER */
-		/* FALLTHROUGH */
-	default:
-		printk("SMP %d: smp_message_recv(): unknown msg %d\n",
-		       smp_processor_id(), msg);
-		break;
-	}
-}
-
 static irqreturn_t call_function_action(int irq, void *data)
 {
 	generic_smp_call_function_interrupt();
@@ -158,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t debug_ipi_action(int irq, void *data)
+irqreturn_t debug_ipi_action(int irq, void *data)
 {
-	smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
+	if (crash_ipi_function_ptr) {
+		crash_ipi_function_ptr(get_irq_regs());
+		return IRQ_HANDLED;
+	}
+
+#ifdef CONFIG_DEBUGGER
+	debugger_ipi(get_irq_regs());
+#endif /* CONFIG_DEBUGGER */
+
 	return IRQ_HANDLED;
 }
 
@@ -199,6 +178,59 @@ int smp_request_message_ipi(int virq, int msg)
 	return err;
 }
 
+struct cpu_messages {
+	unsigned long messages;		/* current messages bits */
+	unsigned long data;		/* data for cause ipi */
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
+
+void smp_muxed_ipi_set_data(int cpu, unsigned long data)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+
+	info->data = data;
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
+	unsigned long *tgt = &info->messages;
+
+	set_bit(msg, tgt);
+	mb();
+	smp_ops->cause_ipi(cpu, info->data);
+}
+
+void smp_muxed_ipi_resend(void)
+{
+	struct cpu_messages *info = &__get_cpu_var(ipi_message);
+	unsigned long *tgt = &info->messages;
+
+	if (*tgt)
+		smp_ops->cause_ipi(smp_processor_id(), info->data);
+}
+
+irqreturn_t smp_ipi_demux(void)
+{
+	struct cpu_messages *info = &__get_cpu_var(ipi_message);
+	unsigned long *tgt = &info->messages;
+
+	mb();	/* order any irq clear */
+	while (*tgt) {
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt))
+			generic_smp_call_function_interrupt();
+		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt))
+			reschedule_action(0, NULL); /* upcoming sched hook */
+		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt))
+			generic_smp_call_function_single_interrupt();
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt))
+			debug_ipi_action(0, NULL);
+#endif
+	}
+	return IRQ_HANDLED;
+}
+
 void smp_send_reschedule(int cpu)
 {
 	if (likely(smp_ops))
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index fe3f6a3a5307..d6a93a10c0f5 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -235,8 +235,10 @@ void __init mpc85xx_smp_init(void)
 		smp_85xx_ops.message_pass = smp_mpic_message_pass;
 	}
 
-	if (cpu_has_feature(CPU_FTR_DBELL))
-		smp_85xx_ops.message_pass = doorbell_message_pass;
+	if (cpu_has_feature(CPU_FTR_DBELL)) {
+		smp_85xx_ops.message_pass = smp_muxed_ipi_message_pass;
+		smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+	}
 
 	BUG_ON(!smp_85xx_ops.message_pass);
 
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 44cfd1bef89b..6a58744d66c3 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -196,8 +196,20 @@ static irqreturn_t iic_ipi_action(int irq, void *dev_id)
 {
 	int ipi = (int)(long)dev_id;
 
-	smp_message_recv(ipi);
-
+	switch(ipi) {
+	case PPC_MSG_CALL_FUNCTION:
+		generic_smp_call_function_interrupt();
+		break;
+	case PPC_MSG_RESCHEDULE:
+		/* Upcoming sched hook */
+		break;
+	case PPC_MSG_CALL_FUNC_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
+	case PPC_MSG_DEBUGGER_BREAK:
+		debug_ipi_action(0, NULL);
+		break;
+	}
 	return IRQ_HANDLED;
 }
 static void iic_request_ipi(int ipi, const char *name)
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 375c21ca6602..b2103453eb01 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -42,7 +42,6 @@
 #include "irq.h"
 #include "pci.h"
 #include "call_pci.h"
-#include "smp.h"
 
 #ifdef CONFIG_PCI
 
@@ -316,7 +315,7 @@ unsigned int iSeries_get_irq(void)
 #ifdef CONFIG_SMP
 	if (get_lppaca()->int_dword.fields.ipi_cnt) {
 		get_lppaca()->int_dword.fields.ipi_cnt = 0;
-		iSeries_smp_message_recv();
+		smp_ipi_demux();
 	}
 #endif /* CONFIG_SMP */
 	if (hvlpevent_is_pending())
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index dcdbc5dc5aad..e3265adde5d3 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -42,26 +42,8 @@
 #include <asm/cputable.h>
 #include <asm/system.h>
 
-#include "smp.h"
-
-static unsigned long iSeries_smp_message[NR_CPUS];
-
-void iSeries_smp_message_recv(void)
-{
-	int cpu = smp_processor_id();
-	int msg;
-
-	if (num_online_cpus() < 2)
-		return;
-
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &iSeries_smp_message[cpu]))
-			smp_message_recv(msg);
-}
-
-static void smp_iSeries_message_pass(int cpu, int msg)
+static void smp_iSeries_cause_ipi(int cpu, unsigned long data)
 {
-	set_bit(msg, &iSeries_smp_message[cpu]);
 	HvCall_sendIPI(&(paca[cpu]));
 }
 
@@ -93,7 +75,8 @@ static void __devinit smp_iSeries_setup_cpu(int nr)
 }
 
 static struct smp_ops_t iSeries_smp_ops = {
-	.message_pass = smp_iSeries_message_pass,
+	.message_pass = smp_muxed_ipi_message_pass,
+	.cause_ipi    = smp_iSeries_cause_ipi,
 	.probe        = smp_iSeries_probe,
 	.kick_cpu     = smp_iSeries_kick_cpu,
 	.setup_cpu    = smp_iSeries_setup_cpu,
diff --git a/arch/powerpc/platforms/iseries/smp.h b/arch/powerpc/platforms/iseries/smp.h
deleted file mode 100644
index d501f7de01e7..000000000000
--- a/arch/powerpc/platforms/iseries/smp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _PLATFORMS_ISERIES_SMP_H
-#define _PLATFORMS_ISERIES_SMP_H
-
-extern void iSeries_smp_message_recv(void);
-
-#endif	/* _PLATFORMS_ISERIES_SMP_H */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index c49e71926a54..a3401071abfb 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -156,28 +156,13 @@ static inline void psurge_clr_ipi(int cpu)
 /*
  * On powersurge (old SMP powermac architecture) we don't have
  * separate IPIs for separate messages like openpic does.  Instead
- * we have a bitmap for each processor, where a 1 bit means that
- * the corresponding message is pending for that processor.
- * Ideally each cpu's entry would be in a different cache line.
+ * use the generic demux helpers
  *  -- paulus.
  */
-static unsigned long psurge_smp_message[NR_CPUS];
-
 void psurge_smp_message_recv(void)
 {
-	int cpu = smp_processor_id();
-	int msg;
-
-	/* clear interrupt */
-	psurge_clr_ipi(cpu);
-
-	if (num_online_cpus() < 2)
-		return;
-
-	/* make sure there is a message there */
-	for (msg = 0; msg < 4; msg++)
-		if (test_and_clear_bit(msg, &psurge_smp_message[cpu]))
-			smp_message_recv(msg);
+	psurge_clr_ipi(smp_processor_id());
+	smp_ipi_demux();
 }
 
 irqreturn_t psurge_primary_intr(int irq, void *d)
@@ -186,9 +171,8 @@ irqreturn_t psurge_primary_intr(int irq, void *d)
 	return IRQ_HANDLED;
 }
 
-static void smp_psurge_message_pass(int cpu, int msg)
+static void smp_psurge_cause_ipi(int cpu, unsigned long data)
 {
-	set_bit(msg, &psurge_smp_message[cpu]);
 	psurge_set_ipi(cpu);
 }
 
@@ -428,7 +412,8 @@ void __init smp_psurge_give_timebase(void)
 
 /* PowerSurge-style Macs */
 struct smp_ops_t psurge_smp_ops = {
-	.message_pass	= smp_psurge_message_pass,
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= smp_psurge_cause_ipi,
 	.probe		= smp_psurge_probe,
 	.kick_cpu	= smp_psurge_kick_cpu,
 	.setup_cpu	= smp_psurge_setup_cpu,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 95f578158ff0..fbffd7e47ab8 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -207,7 +207,8 @@ static struct smp_ops_t pSeries_mpic_smp_ops = {
 };
 
 static struct smp_ops_t pSeries_xics_smp_ops = {
-	.message_pass	= NULL,	/* Filled at runtime by xics_smp_probe() */
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
 	.probe		= xics_smp_probe,
 	.kick_cpu	= smp_pSeries_kick_cpu,
 	.setup_cpu	= smp_xics_setup_cpu,
diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c
index c7b8db9ed9b3..9d20fa9d3710 100644
--- a/arch/powerpc/platforms/wsp/smp.c
+++ b/arch/powerpc/platforms/wsp/smp.c
@@ -75,7 +75,8 @@ static int __init smp_a2_probe(void)
 }
 
 static struct smp_ops_t a2_smp_ops = {
-	.message_pass	= doorbell_message_pass,
+	.message_pass	= smp_muxed_ipi_message_pass,
+	.cause_ipi	= doorbell_cause_ipi,
 	.probe		= smp_a2_probe,
 	.kick_cpu	= smp_a2_kick_cpu,
 	.setup_cpu	= smp_a2_setup_cpu,
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 234764c189a4..9518d367a64f 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -118,12 +118,8 @@ static void icp_hv_set_cpu_priority(unsigned char cppr)
 
 #ifdef CONFIG_SMP
 
-static void icp_hv_message_pass(int cpu, int msg)
+static void icp_hv_cause_ipi(int cpu, unsigned long data)
 {
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	set_bit(msg, tgt);
-	mb();
 	icp_hv_set_qirr(cpu, IPI_PRIORITY);
 }
 
@@ -133,7 +129,7 @@ static irqreturn_t icp_hv_ipi_action(int irq, void *dev_id)
 
 	icp_hv_set_qirr(cpu, 0xff);
 
-	return xics_ipi_dispatch(cpu);
+	return smp_ipi_demux();
 }
 
 #endif /* CONFIG_SMP */
@@ -146,7 +142,7 @@ static const struct icp_ops icp_hv_ops = {
 	.flush_ipi	= icp_hv_flush_ipi,
 #ifdef CONFIG_SMP
 	.ipi_action	= icp_hv_ipi_action,
-	.message_pass	= icp_hv_message_pass,
+	.cause_ipi	= icp_hv_cause_ipi,
 #endif
 };
 
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 246500eefbfd..1f15ad436140 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -134,12 +134,8 @@ static unsigned int icp_native_get_irq(void)
 
 #ifdef CONFIG_SMP
 
-static void icp_native_message_pass(int cpu, int msg)
+static void icp_native_cause_ipi(int cpu, unsigned long data)
 {
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	set_bit(msg, tgt);
-	mb();
 	icp_native_set_qirr(cpu, IPI_PRIORITY);
 }
 
@@ -149,7 +145,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
 
 	icp_native_set_qirr(cpu, 0xff);
 
-	return xics_ipi_dispatch(cpu);
+	return smp_ipi_demux();
 }
 
 #endif /* CONFIG_SMP */
@@ -267,7 +263,7 @@ static const struct icp_ops icp_native_ops = {
 	.flush_ipi	= icp_native_flush_ipi,
 #ifdef CONFIG_SMP
 	.ipi_action	= icp_native_ipi_action,
-	.message_pass	= icp_native_message_pass,
+	.cause_ipi	= icp_native_cause_ipi,
 #endif
 };
 
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index a0576b705ddd..a31a7103218f 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -126,32 +126,6 @@ void xics_mask_unknown_vec(unsigned int vec)
 
 #ifdef CONFIG_SMP
 
-DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
-
-irqreturn_t xics_ipi_dispatch(int cpu)
-{
-	unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
-	mb();	/* order mmio clearing qirr */
-	while (*tgt) {
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNCTION);
-		}
-		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
-			smp_message_recv(PPC_MSG_RESCHEDULE);
-		}
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
-			smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
-		}
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
-			smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
-		}
-#endif
-	}
-	return IRQ_HANDLED;
-}
-
 static void xics_request_ipi(void)
 {
 	unsigned int ipi;
@@ -170,8 +144,8 @@ static void xics_request_ipi(void)
 
 int __init xics_smp_probe(void)
 {
-	/* Setup message_pass callback  based on which ICP is used */
-	smp_ops->message_pass = icp_ops->message_pass;
+	/* Setup cause_ipi callback  based on which ICP is used */
+	smp_ops->cause_ipi = icp_ops->cause_ipi;
 
 	/* Register all the IPIs */
 	xics_request_ipi();
-- 
cgit v1.2.3


From 1ece355b6825b7c61d1dc39a5c6cf49dc746e193 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:42 +0000
Subject: powerpc: Add kconfig for muxed smp ipi support

Compile the new smp ipi mux and demux code only if a platform
will make use of it.  The new config is selected as required.

The new cause_ipi smp op is only available conditionally to point out
configs where the select is required; this makes setting the op an
immediate fail instead of a deferred unresolved symbol at link.

This also creates a new config for power surge powermac upgrade support
that can be disabled in expert mode but is default on.

I also removed the depends / default y on CONFIG_XICS since it is selected
by PSERIES.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/smp.h          |  2 ++
 arch/powerpc/kernel/smp.c               |  2 ++
 arch/powerpc/platforms/Kconfig          |  8 ++++++++
 arch/powerpc/platforms/Kconfig.cputype  |  2 ++
 arch/powerpc/platforms/iseries/Kconfig  |  1 +
 arch/powerpc/platforms/powermac/Kconfig | 11 ++++++++++-
 arch/powerpc/platforms/powermac/pic.c   |  4 ++--
 arch/powerpc/platforms/powermac/smp.c   |  8 ++++----
 arch/powerpc/sysdev/xics/Kconfig        |  1 +
 9 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 26f861560c51..880b8c1e6e53 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -38,7 +38,9 @@ extern void cpu_die(void);
 
 struct smp_ops_t {
 	void  (*message_pass)(int cpu, int msg);
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
 	void  (*cause_ipi)(int cpu, unsigned long data);
+#endif
 	int   (*probe)(void);
 	int   (*kick_cpu)(int nr);
 	void  (*setup_cpu)(int nr);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fa8e8700064b..d76f7d7929be 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -178,6 +178,7 @@ int smp_request_message_ipi(int virq, int msg)
 	return err;
 }
 
+#ifdef CONFIG_PPC_SMP_MUXED_IPI
 struct cpu_messages {
 	unsigned long messages;		/* current messages bits */
 	unsigned long data;		/* data for cause ipi */
@@ -230,6 +231,7 @@ irqreturn_t smp_ipi_demux(void)
 	}
 	return IRQ_HANDLED;
 }
+#endif /* CONFIG_PPC_SMP_MUXED_IPI */
 
 void smp_send_reschedule(int cpu)
 {
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 6059053e7158..83c704a637bb 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -57,6 +57,14 @@ config UDBG_RTAS_CONSOLE
 	depends on PPC_RTAS
 	default n
 
+config PPC_SMP_MUXED_IPI
+	bool
+	help
+	  Select this opton if your platform supports SMP and your
+	  interrupt controller provides less than 4 interrupts to each
+	  cpu.	This will enable the generic code to multiplex the 4
+	  messages on to one ipi.
+
 config PPC_UDBG_BEAT
 	bool "BEAT based debug console"
 	depends on PPC_CELLEB
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a1e623822a30..2165b65876f9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -73,6 +73,7 @@ config PPC_BOOK3S_64
 config PPC_BOOK3E_64
 	bool "Embedded processors"
 	select PPC_FPU # Make it a choice ?
+	select PPC_SMP_MUXED_IPI
 
 endchoice
 
@@ -178,6 +179,7 @@ config FSL_BOOKE
 config PPC_FSL_BOOK3E
 	bool
 	select FSL_EMB_PERFMON
+	select PPC_SMP_MUXED_IPI
 	default y if FSL_BOOKE
 
 config PTE_64BIT
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
index ea1d3622b41c..b57cda3a0817 100644
--- a/arch/powerpc/platforms/iseries/Kconfig
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -1,6 +1,7 @@
 config PPC_ISERIES
 	bool "IBM Legacy iSeries"
 	depends on PPC64 && PPC_BOOK3S
+	select PPC_SMP_MUXED_IPI
 	select PPC_INDIRECT_PIO
 	select PPC_INDIRECT_MMIO
 	select PPC_PCI_CHOICE if EXPERT
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index 1e1a0873e1dd..1afd10f67858 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -18,4 +18,13 @@ config PPC_PMAC64
 	select PPC_970_NAP
 	default y
 
-
+config PPC_PMAC32_PSURGE
+	bool "Support for powersurge upgrade cards" if EXPERT
+	depends on SMP && PPC32 && PPC_PMAC
+	select PPC_SMP_MUXED_IPI
+	default y
+	help
+	  The powersurge cpu boards can be used in the generation
+	  of powermacs that have a socket for an upgradeable cpu card,
+	  including the 7500, 8500, 9500, 9600.  Support exists for
+	  both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 2f34ad04029f..b706cb3ad99c 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -239,7 +239,7 @@ static unsigned int pmac_pic_get_irq(void)
 	unsigned long bits = 0;
 	unsigned long flags;
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 	void psurge_smp_message_recv(void);
 
        	/* IPI's are a hack on the powersurge -- Cort */
@@ -247,7 +247,7 @@ static unsigned int pmac_pic_get_irq(void)
 		psurge_smp_message_recv();
 		return NO_IRQ_IGNORE;	/* ignore, already handled */
         }
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
 	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
 		int i = irq >> 5;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index a3401071abfb..67b6e1432be2 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -70,7 +70,7 @@ static void (*pmac_tb_freeze)(int freeze);
 static u64 timebase;
 static int tb_req;
 
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 
 /*
  * Powersurge (old powermac SMP) support.
@@ -420,7 +420,7 @@ struct smp_ops_t psurge_smp_ops = {
 	.give_timebase	= smp_psurge_give_timebase,
 	.take_timebase	= smp_psurge_take_timebase,
 };
-#endif /* CONFIG_PPC32 - actually powersurge support */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 /*
  * Core 99 and later support
@@ -980,7 +980,7 @@ void __init pmac_setup_smp(void)
 		of_node_put(np);
 		smp_ops = &core99_smp_ops;
 	}
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_PMAC32_PSURGE
 	else {
 		/* We have to set bits in cpu_possible_mask here since the
 		 * secondary CPU(s) aren't in the device tree. Various
@@ -993,7 +993,7 @@ void __init pmac_setup_smp(void)
 			set_cpu_possible(cpu, true);
 		smp_ops = &psurge_smp_ops;
 	}
-#endif /* CONFIG_PPC32 */
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
 
 #ifdef CONFIG_HOTPLUG_CPU
 	ppc_md.cpu_die = pmac_cpu_die;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
index 123b8ddf2816..0031eda320c3 100644
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -1,5 +1,6 @@
 config PPC_XICS
        def_bool n
+       select PPC_SMP_MUXED_IPI
 
 config PPC_ICP_NATIVE
        def_bool n
-- 
cgit v1.2.3


From 714542721b4a53a3ebbdd5f0619ac0f66e7df610 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:46 +0000
Subject: powerpc: Use bytes instead of bitops in smp ipi multiplexing

Since there are only 4 messages, we can replace the atomic bit set
(which uses atomic load reserve and store conditional sequence) with
a byte stores to seperate bytes.  We still have to perform a load
reserve and store conditional sequence to avoid loosing messages on
reception but we can do that with a single call to xchg.

The do {} while and __BIG_ENDIAN specific mask testing was chosen by
looking at the generated asm code.  On gcc-4.4, the bit masking becomes
a simple bit mask and test of the register returned from xchg without
storing and loading the value to the stack like attempts with a union
of bytes and an int (or worse, loading single bit constants from the
constant pool into non-voliatle registers that had to be preseved on
the stack).  The do {} while avoids an unconditional branch to the
end of the loop to test the entry / repeat condition of a while loop
and instead optimises for the expected single iteration of the loop.

We have a full mb() at the beginning to cover ordering between send,
ipi, and receive so we can use xchg_local and forgo the further
acquire and release barriers of xchg.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/smp.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d76f7d7929be..a8909aa50642 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -180,7 +180,7 @@ int smp_request_message_ipi(int virq, int msg)
 
 #ifdef CONFIG_PPC_SMP_MUXED_IPI
 struct cpu_messages {
-	unsigned long messages;		/* current messages bits */
+	int messages;			/* current messages */
 	unsigned long data;		/* data for cause ipi */
 };
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
@@ -195,9 +195,9 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
 void smp_muxed_ipi_message_pass(int cpu, int msg)
 {
 	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-	unsigned long *tgt = &info->messages;
+	char *message = (char *)&info->messages;
 
-	set_bit(msg, tgt);
+	message[msg] = 1;
 	mb();
 	smp_ops->cause_ipi(cpu, info->data);
 }
@@ -205,30 +205,35 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
 void smp_muxed_ipi_resend(void)
 {
 	struct cpu_messages *info = &__get_cpu_var(ipi_message);
-	unsigned long *tgt = &info->messages;
 
-	if (*tgt)
+	if (info->messages)
 		smp_ops->cause_ipi(smp_processor_id(), info->data);
 }
 
 irqreturn_t smp_ipi_demux(void)
 {
 	struct cpu_messages *info = &__get_cpu_var(ipi_message);
-	unsigned long *tgt = &info->messages;
+	unsigned int all;
 
 	mb();	/* order any irq clear */
-	while (*tgt) {
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt))
+
+	do {
+		all = xchg_local(&info->messages, 0);
+
+#ifdef __BIG_ENDIAN
+		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
 			generic_smp_call_function_interrupt();
-		if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt))
+		if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE)))
 			reschedule_action(0, NULL); /* upcoming sched hook */
-		if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt))
+		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE)))
 			generic_smp_call_function_single_interrupt();
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-		if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt))
+		if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK)))
 			debug_ipi_action(0, NULL);
+#else
+#error Unsupported ENDIAN
 #endif
-	}
+	} while (info->messages);
+
 	return IRQ_HANDLED;
 }
 #endif /* CONFIG_PPC_SMP_MUXED_IPI */
-- 
cgit v1.2.3


From e085255ebce87c0b85d4752638d8a7d4f35f5b64 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:49 +0000
Subject: powerpc/xics: Cleanup xics_host_map and ipi

Since we already have a special case in map to set the ipi handler, use
the desired flow.

If we don't find an ics to handle the interrupt complain instead of
returning 0 without having set a chip or handler.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/xics/xics-common.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index a31a7103218f..43b2a791e204 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -135,9 +135,8 @@ static void xics_request_ipi(void)
 
 	/*
 	 * IPIs are marked IRQF_DISABLED as they must run with irqs
-	 * disabled
+	 * disabled, and PERCPU.  The handler was set in map.
 	 */
-	irq_set_handler(ipi, handle_percpu_irq);
 	BUG_ON(request_irq(ipi, icp_ops->ipi_action,
 			   IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL));
 }
@@ -341,15 +340,16 @@ static int xics_host_map(struct irq_host *h, unsigned int virq,
 	/* Don't call into ICS for IPIs */
 	if (hw == XICS_IPI) {
 		irq_set_chip_and_handler(virq, &xics_ipi_chip,
-					 handle_fasteoi_irq);
+					 handle_percpu_irq);
 		return 0;
 	}
 
 	/* Let the ICS setup the chip data */
 	list_for_each_entry(ics, &ics_list, link)
 		if (ics->map(ics, virq) == 0)
-			break;
-	return 0;
+			return 0;
+
+	return -EINVAL;
 }
 
 static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
-- 
cgit v1.2.3


From 3af259d1555a93b3b6f6545af13e0eb99b0d5d32 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:53 +0000
Subject: powerpc: Radix trees are available before init_IRQ

Since the generic irq code uses a radix tree for sparse interrupts,
the initcall ordering has been changed to initialize radix trees before
irqs.   We no longer need to defer creating revmap radix trees to the
arch_initcall irq_late_init.

Also, the kmem caches are allocated so we don't need to use
zalloc_maybe_bootmem.

Signed-off-by: Milton Miller <miltonm@bga.com>
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 78 +++--------------------------------------------
 1 file changed, 4 insertions(+), 74 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 826552cecebd..f42e869ee3cc 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -493,7 +493,6 @@ struct irq_map_entry {
 
 static LIST_HEAD(irq_hosts);
 static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static unsigned int revmap_trees_allocated;
 static DEFINE_MUTEX(revmap_trees_mutex);
 static struct irq_map_entry irq_map[NR_IRQS];
 static unsigned int irq_virq_count = NR_IRQS;
@@ -537,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 	/* Allocate structure and revmap table if using linear mapping */
 	if (revmap_type == IRQ_HOST_MAP_LINEAR)
 		size += revmap_arg * sizeof(unsigned int);
-	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
+	host = kzalloc(size, GFP_KERNEL);
 	if (host == NULL)
 		return NULL;
 
@@ -605,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 		smp_wmb();
 		host->revmap_data.linear.revmap = rmap;
 		break;
+	case IRQ_HOST_MAP_TREE:
+		INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL);
+		break;
 	default:
 		break;
 	}
@@ -839,13 +841,6 @@ void irq_dispose_mapping(unsigned int virq)
 			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
 		break;
 	case IRQ_HOST_MAP_TREE:
-		/*
-		 * Check if radix tree allocated yet, if not then nothing to
-		 * remove.
-		 */
-		smp_rmb();
-		if (revmap_trees_allocated < 1)
-			break;
 		mutex_lock(&revmap_trees_mutex);
 		radix_tree_delete(&host->revmap_data.tree, hwirq);
 		mutex_unlock(&revmap_trees_mutex);
@@ -905,14 +900,6 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 
 	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
 
-	/*
-	 * Check if the radix tree exists and has bee initialized.
-	 * If not, we fallback to slow mode
-	 */
-	if (revmap_trees_allocated < 2)
-		return irq_find_mapping(host, hwirq);
-
-	/* Now try to resolve */
 	/*
 	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us
 	 * as it's referencing an entry in the static irq_map table.
@@ -935,18 +922,8 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
-
 	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
 
-	/*
-	 * Check if the radix tree exists yet.
-	 * If not, then the irq will be inserted into the tree when it gets
-	 * initialized.
-	 */
-	smp_rmb();
-	if (revmap_trees_allocated < 1)
-		return;
-
 	if (virq != NO_IRQ) {
 		mutex_lock(&revmap_trees_mutex);
 		radix_tree_insert(&host->revmap_data.tree, hwirq,
@@ -1054,53 +1031,6 @@ int arch_early_irq_init(void)
 	return 0;
 }
 
-/* We need to create the radix trees late */
-static int irq_late_init(void)
-{
-	struct irq_host *h;
-	unsigned int i;
-
-	/*
-	 * No mutual exclusion with respect to accessors of the tree is needed
-	 * here as the synchronization is done via the state variable
-	 * revmap_trees_allocated.
-	 */
-	list_for_each_entry(h, &irq_hosts, link) {
-		if (h->revmap_type == IRQ_HOST_MAP_TREE)
-			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
-	}
-
-	/*
-	 * Make sure the radix trees inits are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 1;
-
-	/*
-	 * Insert the reverse mapping for those interrupts already present
-	 * in irq_map[].
-	 */
-	mutex_lock(&revmap_trees_mutex);
-	for (i = 0; i < irq_virq_count; i++) {
-		if (irq_map[i].host &&
-		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
-			radix_tree_insert(&irq_map[i].host->revmap_data.tree,
-					  irq_map[i].hwirq, &irq_map[i]);
-	}
-	mutex_unlock(&revmap_trees_mutex);
-
-	/*
-	 * Make sure the radix trees insertions are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 2;
-
-	return 0;
-}
-arch_initcall(irq_late_init);
-
 #ifdef CONFIG_VIRQ_DEBUG
 static int virq_debug_show(struct seq_file *m, void *private)
 {
-- 
cgit v1.2.3


From 2d441681a4df7822e6ef6fcc0320bb14d2a06dbb Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:29:57 +0000
Subject: powerpc: Return early if irq_host lookup type is wrong

If for some reason the code incrorectly calls the wrong function to
manage the revmap, not only should we warn, we should take action.
However, in the paths we expect to be taken every delivered interrupt
change to WARN_ON_ONCE.  Use the if (WARN_ON(x)) format to get the
unlikely for free.

Signed-off-by: Milton Miller <miltonm@bga.com>
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f42e869ee3cc..4a5aa8ca97a5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -814,8 +814,7 @@ void irq_dispose_mapping(unsigned int virq)
 		return;
 
 	host = irq_map[virq].host;
-	WARN_ON (host == NULL);
-	if (host == NULL)
+	if (WARN_ON(host == NULL))
 		return;
 
 	/* Never unmap legacy interrupts */
@@ -898,7 +897,8 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 	struct irq_map_entry *ptr;
 	unsigned int virq;
 
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE))
+		return irq_find_mapping(host, hwirq);
 
 	/*
 	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us
@@ -922,7 +922,8 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host,
 void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
+	if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE))
+		return;
 
 	if (virq != NO_IRQ) {
 		mutex_lock(&revmap_trees_mutex);
@@ -937,7 +938,8 @@ unsigned int irq_linear_revmap(struct irq_host *host,
 {
 	unsigned int *revmap;
 
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+	if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR))
+		return irq_find_mapping(host, hwirq);
 
 	/* Check revmap bounds */
 	if (unlikely(hwirq >= host->revmap_data.linear.size))
-- 
cgit v1.2.3


From df74e70ac25fc4bf4036a2f9690b4e2e4520e65d Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:00 +0000
Subject: powerpc: Remove trival irq_host_ops.unmap

These all just clear chip or chipdata fields, which will be done
by the generic code when we call irq_free_descs.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/82xx/pq2ads-pci-pic.c     | 8 --------
 arch/powerpc/platforms/embedded6xx/flipper-pic.c | 7 -------
 arch/powerpc/platforms/embedded6xx/hlwd-pic.c    | 7 -------
 arch/powerpc/platforms/ps3/interrupt.c           | 6 ------
 4 files changed, 28 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
index 5d6c34ce0cba..8ccf9ed62fe2 100644
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
@@ -112,16 +112,8 @@ static int pci_pic_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void pci_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	/* remove chip and handler */
-	irq_set_chip_data(virq, NULL);
-	irq_set_chip(virq, NULL);
-}
-
 static struct irq_host_ops pci_pic_host_ops = {
 	.map = pci_pic_host_map,
-	.unmap = pci_host_unmap,
 };
 
 int __init pq2ads_pci_init_irq(void)
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index 77cbe4c8f953..f61a2dd96b99 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -107,12 +107,6 @@ static int flipper_pic_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void flipper_pic_unmap(struct irq_host *h, unsigned int irq)
-{
-	irq_set_chip_data(irq, NULL);
-	irq_set_chip(irq, NULL);
-}
-
 static int flipper_pic_match(struct irq_host *h, struct device_node *np)
 {
 	return 1;
@@ -121,7 +115,6 @@ static int flipper_pic_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops flipper_irq_host_ops = {
 	.map = flipper_pic_map,
-	.unmap = flipper_pic_unmap,
 	.match = flipper_pic_match,
 };
 
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
index 44b398b0a2fd..e4919170c6bc 100644
--- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -100,15 +100,8 @@ static int hlwd_pic_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void hlwd_pic_unmap(struct irq_host *h, unsigned int irq)
-{
-	irq_set_chip_data(irq, NULL);
-	irq_set_chip(irq, NULL);
-}
-
 static struct irq_host_ops hlwd_irq_host_ops = {
 	.map = hlwd_pic_map,
-	.unmap = hlwd_pic_unmap,
 };
 
 static unsigned int __hlwd_pic_get_irq(struct irq_host *h)
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 523bd0d34d9d..600ed2c0ed59 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -659,11 +659,6 @@ static void __maybe_unused _dump_mask(struct ps3_private *pd,
 static void dump_bmp(struct ps3_private* pd) {};
 #endif /* defined(DEBUG) */
 
-static void ps3_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	irq_set_chip_data(virq, NULL);
-}
-
 static int ps3_host_map(struct irq_host *h, unsigned int virq,
 	irq_hw_number_t hwirq)
 {
@@ -683,7 +678,6 @@ static int ps3_host_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops ps3_host_ops = {
 	.map = ps3_host_map,
-	.unmap = ps3_host_unmap,
 	.match = ps3_host_match,
 };
 
-- 
cgit v1.2.3


From 7ee342bdc3d7e2cba4be6d1eece56efec9d3809f Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:04 +0000
Subject: powerpc: Remove i8259 irq_host_ops->unmap

It was never called because the host is always IRQ_HOST_MAP_LEGACY.

And what it purported to do was mask the interrupt (which will already
have happend if we shutdown the interrupt), then synchronise_irq and
clear the chip pointer, both of which will have been be done by the
caller were we to call unmap on a legacy irq.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/i8259.c | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 142770cb84b6..d18bb27e4df9 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -185,18 +185,6 @@ static int i8259_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-static void i8259_host_unmap(struct irq_host *h, unsigned int virq)
-{
-	/* Make sure irq is masked in hardware */
-	i8259_mask_irq(irq_get_irq_data(virq));
-
-	/* remove chip and handler */
-	irq_set_chip_and_handler(virq, NULL, NULL);
-
-	/* Make sure it's completed */
-	synchronize_irq(virq);
-}
-
 static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
 			    const u32 *intspec, unsigned int intsize,
 			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
@@ -220,7 +208,6 @@ static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
 static struct irq_host_ops i8259_host_ops = {
 	.match = i8259_host_match,
 	.map = i8259_host_map,
-	.unmap = i8259_host_unmap,
 	.xlate = i8259_host_xlate,
 };
 
-- 
cgit v1.2.3


From 6c4c82e20a1b476589c1e2b0b7c2adc783c93006 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:07 +0000
Subject: powerpc/fsl_msi: Don't abuse platform_data for driver_data

The msi platform device driver was abusing dev.platform_data for its
platform_driver_data.  Use the correct pointer for storage.

Platform_data is supposed to be for platforms to communicate to drivers
parameters that are not otherwise discoverable.  Its lifetime matches
the platform_device not the platform device driver.  It is generally
not needed for drivers that only support systems with device trees.

Signed-off-by: Milton Miller <miltonm@bga.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_msi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index d5679dc1e20f..077776c06cd0 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -253,7 +253,7 @@ unlock:
 
 static int fsl_of_msi_remove(struct platform_device *ofdev)
 {
-	struct fsl_msi *msi = ofdev->dev.platform_data;
+	struct fsl_msi *msi = platform_get_drvdata(ofdev);
 	int virq, i;
 	struct fsl_msi_cascade_data *cascade_data;
 
@@ -327,7 +327,7 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "No memory for MSI structure\n");
 		return -ENOMEM;
 	}
-	dev->dev.platform_data = msi;
+	platform_set_drvdata(dev, msi);
 
 	msi->irqhost = irq_alloc_host(dev->dev.of_node, IRQ_HOST_MAP_LINEAR,
 				      NR_MSI_IRQS, &fsl_msi_host_ops, 0);
-- 
cgit v1.2.3


From d1921bcdeee66c3d1704ef753dc74464856e4b7f Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:11 +0000
Subject: powerpc/fsl_msi: Use chip_data not handler_data

handler_data should be reserved for flow handlers on the dependent
irq, not consumed by the parent irq code that is part of the irq_chip
code.  The msi_data pointer was already set in msidesc->irqhost->hostdata
and being copied to irq_data->chipdata in the msidesc->irqhost->map()
method called via create_irq_mapping, so we can obtain the pointer
from there and free the instance it in teardown_msi_irqs.

Also remove the unnecessary cast of irq_get_handler_data in the
cascade handler, which is the demux flow handler of the parent
msi interrupt.  (This is the expected usage for handler_data).

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_msi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 077776c06cd0..067cc88ae376 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -110,7 +110,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
 	list_for_each_entry(entry, &pdev->msi_list, list) {
 		if (entry->irq == NO_IRQ)
 			continue;
-		msi_data = irq_get_handler_data(entry->irq);
+		msi_data = irq_get_chip_data(entry->irq);
 		irq_set_msi_desc(entry->irq, NULL);
 		msi_bitmap_free_hwirqs(&msi_data->bitmap,
 				       virq_to_hw(entry->irq), 1);
@@ -168,7 +168,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			rc = -ENOSPC;
 			goto out_free;
 		}
-		irq_set_handler_data(virq, msi_data);
+		/* chip_data is msi_data via host->hostdata in host->map() */
 		irq_set_msi_desc(virq, entry);
 
 		fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
@@ -193,7 +193,7 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	u32 have_shift = 0;
 	struct fsl_msi_cascade_data *cascade_data;
 
-	cascade_data = (struct fsl_msi_cascade_data *)irq_get_handler_data(irq);
+	cascade_data = irq_get_handler_data(irq);
 	msi_data = cascade_data->msi_data;
 
 	raw_spin_lock(&desc->lock);
-- 
cgit v1.2.3


From c42385cd4533b5ba9b9dd0034285cfcfd7982708 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:15 +0000
Subject: powerpc/mpc5121_ads_cpld: Remove use of NO_IRQ_IGNORE

As NO_IRQ_IGNORE is only used between the static function cpld_pic_get_irq
and its caller cpld_pic_cascade, and cpld_pic_cascade only uses it to
suppress calling handle_generic_irq, we can change these uses to NO_IRQ
and remove the extra tests and pathlength in cpld_pic_cascade.

Signed-off-by: Milton Miller <miltonm@bga.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/512x/mpc5121_ads_cpld.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index a8bc0d443934..9f09319352c0 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
 	status |= (ignore | mask);
 
 	if (status == 0xff)
-		return NO_IRQ_IGNORE;
+		return NO_IRQ;
 
 	cpld_irq = ffz(status) + offset;
 
@@ -109,14 +109,14 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
 {
 	irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
 		&cpld_regs->pci_mask);
-	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+	if (irq != NO_IRQ) {
 		generic_handle_irq(irq);
 		return;
 	}
 
 	irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
 		&cpld_regs->misc_mask);
-	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
+	if (irq != NO_IRQ) {
 		generic_handle_irq(irq);
 		return;
 	}
-- 
cgit v1.2.3


From 67347eba156d64150be73c9f5797342a66b123d9 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:18 +0000
Subject: powerpc/mpc62xx_pic: Fix get_irq handling of NO_IRQ

If none of irq category bits were set mpc52xx_get_irq() would pass
NO_IRQ_IGNORE (-1) to irq_linear_revmap, which does an unsigned compare
and declares the interrupt above the linear map range.  It then punts
to irq_find_mapping, which performs a linear search of all irqs,
which will likely miss and only then return NO_IRQ.

If no status bit is set, then we should return NO_IRQ directly.
The interrupt should not be suppressed from spurious counting, in fact
that is the definition of supurious.

Signed-off-by: Milton Miller <miltonm@bga.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/52xx/mpc52xx_pic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index bb611819b832..1a9a49570579 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -486,7 +486,7 @@ void __init mpc52xx_init_irq(void)
 unsigned int mpc52xx_get_irq(void)
 {
 	u32 status;
-	int irq = NO_IRQ_IGNORE;
+	int irq;
 
 	status = in_be32(&intr->enc_status);
 	if (status & 0x00000400) {	/* critical */
@@ -509,6 +509,8 @@ unsigned int mpc52xx_get_irq(void)
 		} else {
 			irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
 		}
+	} else {
+		return NO_IRQ;
 	}
 
 	return irq_linear_revmap(mpc52xx_irqhost, irq);
-- 
cgit v1.2.3


From 23f73a5fb0dee5ab681bfeb8897bcfc57153ba9a Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:22 +0000
Subject: powerpc/psurge: Create a irq_host for secondary cpus

Create a dummy irq_host using the generic dummy irq chip for the secondary
cpus to use.  Create a direct irq mapping for the ipi and register the
ipi action handler against it.  If for some unlikely reason part of this
fails then don't detect the secondary cpus.

This removes another instance of NO_IRQ_IGNORE, records the ipi stats
for the secondary cpus, and runs the ipi on the interrupt stack.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powermac/pic.c  |  9 +++----
 arch/powerpc/platforms/powermac/pmac.h |  1 +
 arch/powerpc/platforms/powermac/smp.c  | 49 +++++++++++++++++++++++++++++-----
 3 files changed, 46 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index b706cb3ad99c..360260d1352d 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -240,12 +240,9 @@ static unsigned int pmac_pic_get_irq(void)
 	unsigned long flags;
 
 #ifdef CONFIG_PPC_PMAC32_PSURGE
-	void psurge_smp_message_recv(void);
-
-       	/* IPI's are a hack on the powersurge -- Cort */
-       	if ( smp_processor_id() != 0 ) {
-		psurge_smp_message_recv();
-		return NO_IRQ_IGNORE;	/* ignore, already handled */
+	/* IPI's are a hack on the powersurge -- Cort */
+	if (smp_processor_id() != 0) {
+		return  psurge_secondary_virq;
         }
 #endif /* CONFIG_PPC_PMAC32_PSURGE */
 	raw_spin_lock_irqsave(&pmac_pic_lock, flags);
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 20468f49aec0..8327cce2bdb0 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -33,6 +33,7 @@ extern void pmac_setup_pci_dma(void);
 extern void pmac_check_ht_link(void);
 
 extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
 extern void low_cpu_die(void) __attribute__((noreturn));
 
 extern int pmac_nvram_init(void);
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 67b6e1432be2..db092d7c4c5b 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -124,6 +124,10 @@ static volatile u32 __iomem *psurge_start;
 /* what sort of powersurge board we have */
 static int psurge_type = PSURGE_NONE;
 
+/* irq for secondary cpus to report */
+static struct irq_host *psurge_host;
+int psurge_secondary_virq;
+
 /*
  * Set and clear IPIs for powersurge.
  */
@@ -159,15 +163,11 @@ static inline void psurge_clr_ipi(int cpu)
  * use the generic demux helpers
  *  -- paulus.
  */
-void psurge_smp_message_recv(void)
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
 {
 	psurge_clr_ipi(smp_processor_id());
 	smp_ipi_demux();
-}
 
-irqreturn_t psurge_primary_intr(int irq, void *d)
-{
-	psurge_smp_message_recv();
 	return IRQ_HANDLED;
 }
 
@@ -176,6 +176,38 @@ static void smp_psurge_cause_ipi(int cpu, unsigned long data)
 	psurge_set_ipi(cpu);
 }
 
+static int psurge_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+	return 0;
+}
+
+struct irq_host_ops psurge_host_ops = {
+	.map	= psurge_host_map,
+};
+
+static int psurge_secondary_ipi_init(void)
+{
+	int rc = -ENOMEM;
+
+	psurge_host = irq_alloc_host(NULL, IRQ_HOST_MAP_NOMAP, 0,
+		&psurge_host_ops, 0);
+
+	if (psurge_host)
+		psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+	if (psurge_secondary_virq)
+		rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+			IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
+
+	if (rc)
+		pr_err("Failed to setup secondary cpu IPI\n");
+
+	return rc;
+}
+
 /*
  * Determine a quad card presence. We read the board ID register, we
  * force the data bus to change to something else, and we read it again.
@@ -284,6 +316,9 @@ static int __init smp_psurge_probe(void)
 		ncpus = 2;
 	}
 
+	if (psurge_secondary_ipi_init())
+		return 1;
+
 	psurge_start = ioremap(PSURGE_START, 4);
 	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
 
@@ -372,8 +407,8 @@ static int __init smp_psurge_kick_cpu(int nr)
 }
 
 static struct irqaction psurge_irqaction = {
-	.handler = psurge_primary_intr,
-	.flags = IRQF_DISABLED,
+	.handler = psurge_ipi_intr,
+	.flags = IRQF_DISABLED|IRQF_PERCPU,
 	.name = "primary IPI",
 };
 
-- 
cgit v1.2.3


From da0519800260a3c791b4fe3317a0c7560027a372 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:26 +0000
Subject: powerpc: Remove irq_host_ops->remap hook

It was called from irq_create_mapping if that was called for a host
and hwirq that was previously mapped, "to update the flags".  But the
only implementation was in beat_interrupt and all it did was repeat a
hypervisor call without error checking that was performed with error
checking at the beginning of the map hook.  In addition, the comment on
the beat remap hook says it will only called once for a given mapping,
which would apply to map not remap.

All flags should be known by the time the match hook is called, before
we call the map hook.  Removing this mostly unused hook will simpify
the requirements of irq_domain concept.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/irq.h               |  3 ---
 arch/powerpc/kernel/irq.c                    |  2 --
 arch/powerpc/platforms/cell/beat_interrupt.c | 11 -----------
 3 files changed, 16 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index e1983d577688..6f4a3efb59a9 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -88,9 +88,6 @@ struct irq_host_ops {
 	/* Dispose of such a mapping */
 	void (*unmap)(struct irq_host *h, unsigned int virq);
 
-	/* Update of such a mapping  */
-	void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
-
 	/* Translate device-tree interrupt specifier from raw format coming
 	 * from the firmware to a irq_hw_number_t (interrupt line number) and
 	 * type (sense) that can be passed to set_irq_type(). In the absence
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4a5aa8ca97a5..0715a09a4101 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -732,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host,
 	 */
 	virq = irq_find_mapping(host, hwirq);
 	if (virq != NO_IRQ) {
-		if (host->ops->remap)
-			host->ops->remap(host, virq, hwirq);
 		pr_debug("irq: -> existing mapping on virq %d\n", virq);
 		return virq;
 	}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index d46f7e47a1dc..55015e1f6939 100644
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -147,16 +147,6 @@ static int beatic_pic_host_map(struct irq_host *h, unsigned int virq,
 	return 0;
 }
 
-/*
- * Update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- */
-static void beatic_pic_host_remap(struct irq_host *h, unsigned int virq,
-			       irq_hw_number_t hw)
-{
-	beat_construct_and_connect_irq_plug(virq, hw);
-}
-
 /*
  * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
  * to pass away to irq_create_mapping().
@@ -184,7 +174,6 @@ static int beatic_pic_host_match(struct irq_host *h, struct device_node *np)
 
 static struct irq_host_ops beatic_pic_host_ops = {
 	.map = beatic_pic_host_map,
-	.remap = beatic_pic_host_remap,
 	.unmap = beatic_pic_host_unmap,
 	.xlate = beatic_pic_host_xlate,
 	.match = beatic_pic_host_match,
-- 
cgit v1.2.3


From 6b0aea44d6b36b52010d206be69ce37c2f4f1bd1 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:29 +0000
Subject: powerpc/spider-pic: Get pic from chip_data instead of irq_map

Building on Grant's efforts to remove the irq_map array, this patch
moves spider-pics use of virq_to_host() to use irq_data_get_chip_data
and sets the irq chip data in the map call, like most other interrupt
controllers in powerpc.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/spider-pic.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 34d2b99d10c3..442c28c00f88 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -68,9 +68,9 @@ struct spider_pic {
 };
 static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
 
-static struct spider_pic *spider_virq_to_pic(unsigned int virq)
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
 {
-	return virq_to_host(virq)->host_data;
+	return irq_data_get_irq_chip_data(d);
 }
 
 static void __iomem *spider_get_irq_config(struct spider_pic *pic,
@@ -81,7 +81,7 @@ static void __iomem *spider_get_irq_config(struct spider_pic *pic,
 
 static void spider_unmask_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
 	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) | 0x30000000u);
@@ -89,7 +89,7 @@ static void spider_unmask_irq(struct irq_data *d)
 
 static void spider_mask_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
 	void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
 
 	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
@@ -97,7 +97,7 @@ static void spider_mask_irq(struct irq_data *d)
 
 static void spider_ack_irq(struct irq_data *d)
 {
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
 	unsigned int src = irqd_to_hwirq(d);
 
 	/* Reset edge detection logic if necessary
@@ -116,7 +116,7 @@ static void spider_ack_irq(struct irq_data *d)
 static int spider_set_irq_type(struct irq_data *d, unsigned int type)
 {
 	unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
-	struct spider_pic *pic = spider_virq_to_pic(d->irq);
+	struct spider_pic *pic = spider_irq_data_to_pic(d);
 	unsigned int hw = irqd_to_hwirq(d);
 	void __iomem *cfg = spider_get_irq_config(pic, hw);
 	u32 old_mask;
@@ -171,6 +171,7 @@ static struct irq_chip spider_pic = {
 static int spider_host_map(struct irq_host *h, unsigned int virq,
 			irq_hw_number_t hw)
 {
+	irq_set_chip_data(virq, h->host_data);
 	irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
 
 	/* Set default irq type */
-- 
cgit v1.2.3


From 9553361499f9f9e8ca8c9dae2e103f651fa48217 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:33 +0000
Subject: powerpc/axon_msi: Validate msi irq via chip_data

Instead of checking for rogue msi numbers via the irq_map host field
set the chip_data to h.host_data (which is the msic struct pointer)
at map and compare it in get_irq.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/axon_msi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 1e3329e8578b..ac06903e136a 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -113,7 +113,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		pr_devel("axon_msi: woff %x roff %x msi %x\n",
 			  write_offset, msic->read_offset, msi);
 
-		if (msi < NR_IRQS && virq_to_host(msi) == msic->irq_host) {
+		if (msi < NR_IRQS && irq_get_chip_data(msi) == msic) {
 			generic_handle_irq(msi);
 			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
 		} else {
@@ -320,6 +320,7 @@ static struct irq_chip msic_irq_chip = {
 static int msic_host_map(struct irq_host *h, unsigned int virq,
 			 irq_hw_number_t hw)
 {
+	irq_set_chip_data(virq, h->host_data);
 	irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
 
 	return 0;
-- 
cgit v1.2.3


From 3ee62d365b519c0c18c774049efcde84fe51c60c Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:36 +0000
Subject: powerpc: Add virq_is_host to reduce virq_to_host usage

Some irq_host implementations are using virq_to_host to check if
they are the irq_host for a virtual irq.  To allow us to make space
versus time tradeoffs, replace this usage with an assertive
virq_is_host that confirms or denies the irq is associated with the
given irq_host.

Signed-off-by: Milton Miller <miltonm@bga.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/irq.h         | 1 +
 arch/powerpc/kernel/irq.c              | 6 ++++++
 arch/powerpc/sysdev/xics/xics-common.c | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 6f4a3efb59a9..a65d1702643b 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -128,6 +128,7 @@ struct irq_host {
 struct irq_data;
 extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
+extern bool virq_is_host(unsigned int virq, struct irq_host *host);
 extern struct irq_host *virq_to_host(unsigned int virq);
 
 /**
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 0715a09a4101..73cf29078fef 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -510,6 +510,12 @@ irq_hw_number_t virq_to_hw(unsigned int virq)
 }
 EXPORT_SYMBOL_GPL(virq_to_hw);
 
+bool virq_is_host(unsigned int virq, struct irq_host *host)
+{
+	return irq_map[virq].host == host;
+}
+EXPORT_SYMBOL_GPL(virq_is_host);
+
 struct irq_host *virq_to_host(unsigned int virq)
 {
 	return irq_map[virq].host;
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 43b2a791e204..445c5a01b766 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -213,7 +213,7 @@ void xics_migrate_irqs_away(void)
 		/* We can't set affinity on ISA interrupts */
 		if (virq < NUM_ISA_INTERRUPTS)
 			continue;
-		if (virq_to_host(virq) != xics_host)
+		if (!virq_is_host(virq, xics_host))
 			continue;
 		irq = (unsigned int)virq_to_hw(virq);
 		/* We need to get IPIs still. */
-- 
cgit v1.2.3


From 1e8c23013ed0d535e531b3b9cc30200e884f3ff0 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:40 +0000
Subject: powerpc: Remove virq_to_host

The only references to the irq_map[].host field are internal to
arch/powerpc/kernel/irq.c

Signed-off-by: Milton Miller <miltonm@bga.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/irq.h | 1 -
 arch/powerpc/kernel/irq.c      | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index a65d1702643b..1bff591f7f72 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -129,7 +129,6 @@ struct irq_data;
 extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d);
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
 extern bool virq_is_host(unsigned int virq, struct irq_host *host);
-extern struct irq_host *virq_to_host(unsigned int virq);
 
 /**
  * irq_alloc_host - Allocate a new irq_host data structure
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 73cf29078fef..4368b5ed5604 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -516,12 +516,6 @@ bool virq_is_host(unsigned int virq, struct irq_host *host)
 }
 EXPORT_SYMBOL_GPL(virq_is_host);
 
-struct irq_host *virq_to_host(unsigned int virq)
-{
-	return irq_map[virq].host;
-}
-EXPORT_SYMBOL_GPL(virq_to_host);
-
 static int default_irq_host_match(struct irq_host *h, struct device_node *np)
 {
 	return h->of_node != NULL && h->of_node == np;
-- 
cgit v1.2.3


From d36b4c4f3cc6caae6d4a12d9f995513e4c3acdd5 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Wed, 6 Apr 2011 00:18:48 -0500
Subject: powerpc/fsl-booke64: Add support for Debug Level exception handler

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h  |  4 ++-
 arch/powerpc/kernel/exceptions-64e.S | 65 ++++++++++++++++++++++++++++++++++--
 arch/powerpc/kernel/setup_64.c       |  8 +++++
 3 files changed, 73 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 4efbfb3f3254..c0d842cfd012 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -157,6 +157,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_476_DD2			ASM_CONST(0x0000000000010000)
 #define CPU_FTR_NEED_COHERENT		ASM_CONST(0x0000000000020000)
 #define CPU_FTR_NO_BTIC			ASM_CONST(0x0000000000040000)
+#define CPU_FTR_DEBUG_LVL_EXC		ASM_CONST(0x0000000000080000)
 #define CPU_FTR_NODSISRALIGN		ASM_CONST(0x0000000000100000)
 #define CPU_FTR_PPC_LE			ASM_CONST(0x0000000000200000)
 #define CPU_FTR_REAL_LE			ASM_CONST(0x0000000000400000)
@@ -385,7 +386,8 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_DBELL)
 #define CPU_FTRS_E5500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
 	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
-	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
+	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+	    CPU_FTR_DEBUG_LVL_EXC)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 4d0abb4930a1..cf27a8fa0d29 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -253,9 +253,6 @@ exception_marker:
 	.balign	0x1000
 	.globl interrupt_base_book3e
 interrupt_base_book3e:					/* fake trap */
-	/* Note: If real debug exceptions are supported by the HW, the vector
-	 * below will have to be patched up to point to an appropriate handler
-	 */
 	EXCEPTION_STUB(0x000, machine_check)		/* 0x0200 */
 	EXCEPTION_STUB(0x020, critical_input)		/* 0x0580 */
 	EXCEPTION_STUB(0x040, debug_crit)		/* 0x0d00 */
@@ -455,6 +452,68 @@ interrupt_end_book3e:
 kernel_dbg_exc:
 	b	.	/* NYI */
 
+/* Debug exception as a debug interrupt*/
+	START_EXCEPTION(debug_debug);
+	DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+	/*
+	 * If there is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the DSRR1 value and clearing the debug status.
+	 */
+
+	mfspr	r14,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r15,r14,DBSR_IC@h
+	beq+	1f
+
+	LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+	LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+	cmpld	cr0,r10,r14
+	cmpld	cr1,r10,r15
+	blt+	cr0,1f
+	bge+	cr1,1f
+
+	/* here it looks like we got an inappropriate debug exception. */
+	lis	r14,DBSR_IC@h		/* clear the IC event */
+	rlwinm	r11,r11,0,~MSR_DE	/* clear DE in the DSRR1 value */
+	mtspr	SPRN_DBSR,r14
+	mtspr	SPRN_DSRR1,r11
+	lwz	r10,PACA_EXDBG+EX_CR(r13)	/* restore registers */
+	ld	r1,PACA_EXDBG+EX_R1(r13)
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	mtcr	r10
+	ld	r10,PACA_EXDBG+EX_R10(r13)	/* restore registers */
+	ld	r11,PACA_EXDBG+EX_R11(r13)
+	mfspr	r13,SPRN_SPRG_DBG_SCRATCH
+	rfdi
+
+	/* Normal debug exception */
+	/* XXX We only handle coming from userspace for now since we can't
+	 *     quite save properly an interrupted kernel state yet
+	 */
+1:	andi.	r14,r11,MSR_PR;		/* check for userspace again */
+	beq	kernel_dbg_exc;		/* if from kernel mode */
+
+	/* Now we mash up things to make it look like we are coming on a
+	 * normal exception
+	 */
+	mfspr	r15,SPRN_SPRG_DBG_SCRATCH
+	mtspr	SPRN_SPRG_GEN_SCRATCH,r15
+	mfspr	r14,SPRN_DBSR
+	EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL)
+	std	r14,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r14
+	ld	r14,PACA_EXDBG+EX_R14(r13)
+	ld	r15,PACA_EXDBG+EX_R15(r13)
+	bl	.save_nvgprs
+	bl	.DebugException
+	b	.ret_from_except
+
 /* Doorbell interrupt */
 	MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE)
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c2ec0a12e14f..a88bf2713d41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -62,6 +62,7 @@
 #include <asm/udbg.h>
 #include <asm/kexec.h>
 #include <asm/mmu_context.h>
+#include <asm/code-patching.h>
 
 #include "setup.h"
 
@@ -477,6 +478,9 @@ static void __init irqstack_early_init(void)
 #ifdef CONFIG_PPC_BOOK3E
 static void __init exc_lvl_early_init(void)
 {
+	extern unsigned int interrupt_base_book3e;
+	extern unsigned int exc_debug_debug_book3e;
+
 	unsigned int i;
 
 	for_each_possible_cpu(i) {
@@ -487,6 +491,10 @@ static void __init exc_lvl_early_init(void)
 		mcheckirq_ctx[i] = (struct thread_info *)
 			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
+
+	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+		patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1,
+			     (unsigned long)&exc_debug_debug_book3e, 0);
 }
 #else
 #define exc_lvl_early_init()
-- 
cgit v1.2.3


From 3a6e9bd7f60b29efc205485ceb11a768032c40d4 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Mon, 9 May 2011 16:26:00 -0500
Subject: powerpc/e5500: set non-base IVORs

Without this, we attempt to use doorbells for IPIs, and end up
branching to some bad address.  Plus, even for the exceptions
we don't implement, it's good to handle it and get a message out.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/reg_booke.h      |  4 +++
 arch/powerpc/kernel/cpu_setup_fsl_booke.S |  3 ++
 arch/powerpc/kernel/exceptions-64e.S      | 47 +++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 817bd1ac1752..0f0ad9fa01c1 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -83,6 +83,10 @@
 #define SPRN_IVOR13	0x19D	/* Interrupt Vector Offset Register 13 */
 #define SPRN_IVOR14	0x19E	/* Interrupt Vector Offset Register 14 */
 #define SPRN_IVOR15	0x19F	/* Interrupt Vector Offset Register 15 */
+#define SPRN_IVOR38	0x1B0	/* Interrupt Vector Offset Register 38 */
+#define SPRN_IVOR39	0x1B1	/* Interrupt Vector Offset Register 39 */
+#define SPRN_IVOR40	0x1B2	/* Interrupt Vector Offset Register 40 */
+#define SPRN_IVOR41	0x1B3	/* Interrupt Vector Offset Register 41 */
 #define SPRN_SPEFSCR	0x200	/* SPE & Embedded FP Status & Control */
 #define SPRN_BBEAR	0x201	/* Branch Buffer Entry Address Register */
 #define SPRN_BBTAR	0x202	/* Branch Buffer Target Address Register */
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 913611105c1f..8053db02b85e 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -88,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500)
 	bl	__e500_dcache_setup
 #ifdef CONFIG_PPC_BOOK3E_64
 	bl	.__setup_base_ivors
+	bl	.setup_perfmon_ivor
+	bl	.setup_doorbell_ivors
+	bl	.setup_ehv_ivors
 #else
 	bl	__setup_e500mc_ivors
 #endif
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index cf27a8fa0d29..d24d4400cc79 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -269,8 +269,13 @@ interrupt_base_book3e:					/* fake trap */
 	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */
 	EXCEPTION_STUB(0x1c0, data_tlb_miss)
 	EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+	EXCEPTION_STUB(0x260, perfmon)
 	EXCEPTION_STUB(0x280, doorbell)
 	EXCEPTION_STUB(0x2a0, doorbell_crit)
+	EXCEPTION_STUB(0x2c0, guest_doorbell)
+	EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+	EXCEPTION_STUB(0x300, hypercall)
+	EXCEPTION_STUB(0x320, ehpriv)
 
 	.globl interrupt_end_book3e
 interrupt_end_book3e:
@@ -514,6 +519,8 @@ kernel_dbg_exc:
 	bl	.DebugException
 	b	.ret_from_except
 
+	MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE)
+
 /* Doorbell interrupt */
 	MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE)
 
@@ -528,6 +535,11 @@ kernel_dbg_exc:
 //	b	ret_from_crit_except
 	b	.
 
+	MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE)
+
 
 /*
  * An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -647,7 +659,12 @@ fast_exception_return:
 BAD_STACK_TRAMPOLINE(0x000)
 BAD_STACK_TRAMPOLINE(0x100)
 BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
 BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
 BAD_STACK_TRAMPOLINE(0x400)
 BAD_STACK_TRAMPOLINE(0x500)
 BAD_STACK_TRAMPOLINE(0x600)
@@ -1183,3 +1200,33 @@ _GLOBAL(__setup_base_ivors)
 	sync
 
 	blr
+
+_GLOBAL(setup_perfmon_ivor)
+	SET_IVOR(35, 0x260) /* Performance Monitor */
+	blr
+
+_GLOBAL(setup_doorbell_ivors)
+	SET_IVOR(36, 0x280) /* Processor Doorbell */
+	SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+
+	/* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beqlr
+
+	SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+	SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+	blr
+
+_GLOBAL(setup_ehv_ivors)
+	/*
+	 * We may be running as a guest and lack E.HV even on a chip
+	 * that normally has it.
+	 */
+	mfspr	r10,SPRN_MMUCFG
+	rlwinm.	r10,r10,0,MMUCFG_LPIDSIZE
+	beqlr
+
+	SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+	SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+	blr
-- 
cgit v1.2.3


From e0be2c21649107282930837f0805c71dfe29e2cc Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 4 May 2011 16:29:31 +0200
Subject: powerpc/mpc8610_hpcd: Do not use "/" in interrupt names

It may trigger a warning in fs/proc/generic.c:__xlate_proc_name() when
trying to add an entry for the interrupt handler to sysfs.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/86xx/mpc8610_hpcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 018cc67be426..efadd3be8e6e 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -66,7 +66,7 @@ static void __init mpc8610_suspend_init(void)
 		return;
 	}
 
-	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9/wakeup", NULL);
+	ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL);
 	if (ret) {
 		pr_err("%s: can't request pixis event IRQ: %d\n",
 		       __func__, ret);
-- 
cgit v1.2.3


From f46dad270b7f425d7d4ec08676f2513732d11c2b Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Mon, 9 May 2011 14:29:40 -0500
Subject: powerpc/86xx: don't pretend that we support 8-bit pixels on the
 MPC8610 HPCD

If the video mode is set to 16-, 24-, or 32-bit pixels, then the pixel data
contains actual levels of red, blue, and green.  However, if the video mode
is set to 8-bit pixels, then the 8-bit value represents an index into color
table.  This is called "palette mode" on the Freescale DIU video controller.

The DIU driver does not currently support palette mode, but the MPC8610 HPCD
board file returned a non-zero (although incorrect) pixel format value for
8-bit mode.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/86xx/mpc8610_hpcd.c | 97 ++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index efadd3be8e6e..a896511690c2 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -105,45 +105,77 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 
-static u32 get_busfreq(void)
-{
-	struct device_node *node;
-
-	u32 fs_busfreq = 0;
-	node = of_find_node_by_type(NULL, "cpu");
-	if (node) {
-		unsigned int size;
-		const unsigned int *prop =
-			of_get_property(node, "bus-frequency", &size);
-		if (prop)
-			fs_busfreq = *prop;
-		of_node_put(node);
-	};
-	return fs_busfreq;
-}
+/*
+ * DIU Area Descriptor
+ *
+ * The MPC8610 reference manual shows the bits of the AD register in
+ * little-endian order, which causes the BLUE_C field to be split into two
+ * parts. To simplify the definition of the MAKE_AD() macro, we define the
+ * fields in big-endian order and byte-swap the result.
+ *
+ * So even though the registers don't look like they're in the
+ * same bit positions as they are on the P1022, the same value is written to
+ * the AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F		0x10000000
+#define AD_ALPHA_C_MASK		0x0E000000
+#define AD_ALPHA_C_SHIFT	25
+#define AD_BLUE_C_MASK		0x01800000
+#define AD_BLUE_C_SHIFT		23
+#define AD_GREEN_C_MASK		0x00600000
+#define AD_GREEN_C_SHIFT	21
+#define AD_RED_C_MASK		0x00180000
+#define AD_RED_C_SHIFT		19
+#define AD_PALETTE		0x00040000
+#define AD_PIXEL_S_MASK		0x00030000
+#define AD_PIXEL_S_SHIFT	16
+#define AD_COMP_3_MASK		0x0000F000
+#define AD_COMP_3_SHIFT		12
+#define AD_COMP_2_MASK		0x00000F00
+#define AD_COMP_2_SHIFT		8
+#define AD_COMP_1_MASK		0x000000F0
+#define AD_COMP_1_SHIFT		4
+#define AD_COMP_0_MASK		0x0000000F
+#define AD_COMP_0_SHIFT		0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+	cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+	(blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+	(red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
 
 unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
 						int monitor_port)
 {
 	static const unsigned long pixelformat[][3] = {
-		{0x88882317, 0x88083218, 0x65052119},
-		{0x88883316, 0x88082219, 0x65053118},
+		{
+			MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0)
+		},
+		{
+			MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8),
+			MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0),
+			MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0)
+		},
 	};
-	unsigned int pix_fmt, arch_monitor;
+	unsigned int arch_monitor;
 
+	/* The DVI port is mis-wired on revision 1 of this board. */
 	arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
-		/* DVI port for board version 0x01 */
-
-	if (bits_per_pixel == 32)
-		pix_fmt = pixelformat[arch_monitor][0];
-	else if (bits_per_pixel == 24)
-		pix_fmt = pixelformat[arch_monitor][1];
-	else if (bits_per_pixel == 16)
-		pix_fmt = pixelformat[arch_monitor][2];
-	else
-		pix_fmt = pixelformat[1][0];
-
-	return pix_fmt;
+
+	switch (bits_per_pixel) {
+	case 32:
+		return pixelformat[arch_monitor][0];
+	case 24:
+		return pixelformat[arch_monitor][1];
+	case 16:
+		return pixelformat[arch_monitor][2];
+	default:
+		pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel);
+		return 0;
+	}
 }
 
 void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
@@ -190,8 +222,7 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
 	}
 
 	/* Pixel Clock configuration */
-	pr_debug("DIU: Bus Frequency = %d\n", get_busfreq());
-	speed_ccb = get_busfreq();
+	speed_ccb = fsl_get_sys_freq();
 
 	/* Calculate the pixel clock with the smallest error */
 	/* calculate the following in steps to avoid overflow */
-- 
cgit v1.2.3


From c281739f5988af1f86ef06e92485aec25b8c8c4f Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 24 Mar 2011 16:43:52 -0500
Subject: powerpc/p1022ds: fix broken mpic timer node

There is no hardware interrupt 0xf7.  But now we can express the timer
interrupt using 4-cell interrupts.  This requires converting all of the
other interrupt specifiers in the tree as well.

Also add the second timer group, and fix the reg property to only
describe the timer registers.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p1022ds.dts | 106 +++++++++++++++++++++-----------------
 1 file changed, 59 insertions(+), 47 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts
index 59ef405c1c91..4f685a779f4c 100644
--- a/arch/powerpc/boot/dts/p1022ds.dts
+++ b/arch/powerpc/boot/dts/p1022ds.dts
@@ -52,7 +52,7 @@
 		#size-cells = <1>;
 		compatible = "fsl,p1022-elbc", "fsl,elbc", "simple-bus";
 		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
+		interrupts = <19 2 0 0>;
 
 		ranges = <0x0 0x0 0xf 0xe8000000 0x08000000
 			  0x1 0x0 0xf 0xe0000000 0x08000000
@@ -157,7 +157,7 @@
 			 * IRQ8 is generated if the "EVENT" switch is pressed
 			 * and PX_CTL[EVESEL] is set to 00.
 			 */
-			interrupts = <8 8>;
+			interrupts = <8 8 0 0>;
 		};
 	};
 
@@ -178,13 +178,13 @@
 		ecm@1000 {
 			compatible = "fsl,p1022-ecm", "fsl,ecm";
 			reg = <0x1000 0x1000>;
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		memory-controller@2000 {
 			compatible = "fsl,p1022-memory-controller";
 			reg = <0x2000 0x1000>;
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		i2c@3000 {
@@ -193,7 +193,7 @@
 			cell-index = <0>;
 			compatible = "fsl-i2c";
 			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
+			interrupts = <43 2 0 0>;
 			dfsrr;
 		};
 
@@ -203,7 +203,7 @@
 			cell-index = <1>;
 			compatible = "fsl-i2c";
 			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
+			interrupts = <43 2 0 0>;
 			dfsrr;
 
 			wm8776:codec@1a {
@@ -220,7 +220,7 @@
 			compatible = "ns16550";
 			reg = <0x4500 0x100>;
 			clock-frequency = <0>;
-			interrupts = <42 2>;
+			interrupts = <42 2 0 0>;
 		};
 
 		serial1: serial@4600 {
@@ -229,7 +229,7 @@
 			compatible = "ns16550";
 			reg = <0x4600 0x100>;
 			clock-frequency = <0>;
-			interrupts = <42 2>;
+			interrupts = <42 2 0 0>;
 		};
 
 		spi@7000 {
@@ -238,7 +238,7 @@
 			#size-cells = <0>;
 			compatible = "fsl,espi";
 			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
+			interrupts = <59 0x2 0 0>;
 			espi,num-ss-bits = <4>;
 			mode = "cpu";
 
@@ -275,7 +275,7 @@
 			compatible = "fsl,mpc8610-ssi";
 			cell-index = <0>;
 			reg = <0x15000 0x100>;
-			interrupts = <75 2>;
+			interrupts = <75 2 0 0>;
 			fsl,mode = "i2s-slave";
 			codec-handle = <&wm8776>;
 			fsl,playback-dma = <&dma00>;
@@ -294,25 +294,25 @@
 				compatible = "fsl,ssi-dma-channel";
 				reg = <0x0 0x80>;
 				cell-index = <0>;
-				interrupts = <76 2>;
+				interrupts = <76 2 0 0>;
 			};
 			dma01: dma-channel@80 {
 				compatible = "fsl,ssi-dma-channel";
 				reg = <0x80 0x80>;
 				cell-index = <1>;
-				interrupts = <77 2>;
+				interrupts = <77 2 0 0>;
 			};
 			dma-channel@100 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x100 0x80>;
 				cell-index = <2>;
-				interrupts = <78 2>;
+				interrupts = <78 2 0 0>;
 			};
 			dma-channel@180 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x180 0x80>;
 				cell-index = <3>;
-				interrupts = <79 2>;
+				interrupts = <79 2 0 0>;
 			};
 		};
 
@@ -320,7 +320,7 @@
 			#gpio-cells = <2>;
 			compatible = "fsl,mpc8572-gpio";
 			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
+			interrupts = <47 0x2 0 0>;
 			gpio-controller;
 		};
 
@@ -329,7 +329,7 @@
 			reg = <0x20000 0x1000>;
 			cache-line-size = <32>;	// 32 bytes
 			cache-size = <0x40000>; // L2, 256K
-			interrupts = <16 2>;
+			interrupts = <16 2 0 0>;
 		};
 
 		dma@21300 {
@@ -343,25 +343,25 @@
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x0 0x80>;
 				cell-index = <0>;
-				interrupts = <20 2>;
+				interrupts = <20 2 0 0>;
 			};
 			dma-channel@80 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x80 0x80>;
 				cell-index = <1>;
-				interrupts = <21 2>;
+				interrupts = <21 2 0 0>;
 			};
 			dma-channel@100 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x100 0x80>;
 				cell-index = <2>;
-				interrupts = <22 2>;
+				interrupts = <22 2 0 0>;
 			};
 			dma-channel@180 {
 				compatible = "fsl,eloplus-dma-channel";
 				reg = <0x180 0x80>;
 				cell-index = <3>;
-				interrupts = <23 2>;
+				interrupts = <23 2 0 0>;
 			};
 		};
 
@@ -370,7 +370,7 @@
 			#size-cells = <0>;
 			compatible = "fsl-usb2-dr";
 			reg = <0x22000 0x1000>;
-			interrupts = <28 0x2>;
+			interrupts = <28 0x2 0 0>;
 			phy_type = "ulpi";
 		};
 
@@ -381,11 +381,11 @@
 			reg = <0x24000 0x1000 0xb0030 0x4>;
 
 			phy0: ethernet-phy@0 {
-				interrupts = <3 1>;
+				interrupts = <3 1 0 0>;
 				reg = <0x1>;
 			};
 			phy1: ethernet-phy@1 {
-				interrupts = <9 1>;
+				interrupts = <9 1 0 0>;
 				reg = <0x2>;
 			};
 		};
@@ -416,13 +416,13 @@
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB0000 0x1000>;
-				interrupts = <29 2 30 2 34 2>;
+				interrupts = <29 2 0 0 30 2 0 0 34 2 0 0>;
 			};
 			queue-group@1{
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB4000 0x1000>;
-				interrupts = <17 2 18 2 24 2>;
+				interrupts = <17 2 0 0 18 2 0 0 24 2 0 0>;
 			};
 		};
 
@@ -443,20 +443,20 @@
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB1000 0x1000>;
-				interrupts = <35 2 36 2 40 2>;
+				interrupts = <35 2 0 0 36 2 0 0 40 2 0 0>;
 			};
 			queue-group@1{
 				#address-cells = <1>;
 				#size-cells = <1>;
 				reg = <0xB5000 0x1000>;
-				interrupts = <51 2 52 2 67 2>;
+				interrupts = <51 2 0 0 52 2 0 0 67 2 0 0>;
 			};
 		};
 
 		sdhci@2e000 {
 			compatible = "fsl,p1022-esdhc", "fsl,esdhc";
 			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
+			interrupts = <72 0x2 0 0>;
 			fsl,sdhci-auto-cmd12;
 			/* Filled in by U-Boot */
 			clock-frequency = <0>;
@@ -467,7 +467,7 @@
 				     "fsl,sec2.4", "fsl,sec2.2", "fsl,sec2.1",
 				     "fsl,sec2.0";
 			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
+			interrupts = <45 2 0 0 58 2 0 0>;
 			fsl,num-channels = <4>;
 			fsl,channel-fifo-len = <24>;
 			fsl,exec-units-mask = <0x97c>;
@@ -478,14 +478,14 @@
 			compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
 			reg = <0x18000 0x1000>;
 			cell-index = <1>;
-			interrupts = <74 0x2>;
+			interrupts = <74 0x2 0 0>;
 		};
 
 		sata@19000 {
 			compatible = "fsl,p1022-sata", "fsl,pq-sata-v2";
 			reg = <0x19000 0x1000>;
 			cell-index = <2>;
-			interrupts = <41 0x2>;
+			interrupts = <41 0x2 0 0>;
 		};
 
 		power@e0070{
@@ -496,21 +496,33 @@
 		display@10000 {
 			compatible = "fsl,diu", "fsl,p1022-diu";
 			reg = <0x10000 1000>;
-			interrupts = <64 2>;
+			interrupts = <64 2 0 0>;
 		};
 
 		timer@41100 {
 			compatible = "fsl,mpic-global-timer";
-			reg = <0x41100 0x204>;
-			interrupts = <0xf7 0x2>;
+			reg = <0x41100 0x100 0x41300 4>;
+			interrupts = <0 0 3 0
+			              1 0 3 0
+			              2 0 3 0
+			              3 0 3 0>;
+		};
+
+		timer@42100 {
+			compatible = "fsl,mpic-global-timer";
+			reg = <0x42100 0x100 0x42300 4>;
+			interrupts = <4 0 3 0
+			              5 0 3 0
+			              6 0 3 0
+			              7 0 3 0>;
 		};
 
 		mpic: pic@40000 {
 			interrupt-controller;
 			#address-cells = <0>;
-			#interrupt-cells = <2>;
+			#interrupt-cells = <4>;
 			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
+			compatible = "fsl,mpic";
 			device_type = "open-pic";
 		};
 
@@ -519,14 +531,14 @@
 			reg = <0x41600 0x80>;
 			msi-available-ranges = <0 0x100>;
 			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
+				0xe0 0 0 0
+				0xe1 0 0 0
+				0xe2 0 0 0
+				0xe3 0 0 0
+				0xe4 0 0 0
+				0xe5 0 0 0
+				0xe6 0 0 0
+				0xe7 0 0 0>;
 		};
 
 		global-utilities@e0000 {	//global utilities block
@@ -547,7 +559,7 @@
 		ranges = <0x2000000 0x0 0xa0000000 0xc 0x20000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc10000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -582,7 +594,7 @@
 		ranges = <0x2000000 0x0 0xc0000000 0xc 0x40000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc20000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -618,7 +630,7 @@
 		ranges = <0x2000000 0x0 0x80000000 0xc 0x00000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0xf 0xffc00000 0x0 0x10000>;
 		clock-frequency = <33333333>;
-		interrupts = <16 2>;
+		interrupts = <16 2 0 0>;
 		interrupt-map-mask = <0xf800 0 0 7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
-- 
cgit v1.2.3


From 22d168ce60272ca112e86e58c5ebde82f20f9c83 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 24 Mar 2011 16:43:54 -0500
Subject: powerpc/mpic: parse 4-cell intspec types other than zero

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/mpic.h |  2 ++
 arch/powerpc/sysdev/mpic.c      | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 7005ee0b074d..25a0cb3a79f3 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -371,6 +371,8 @@ struct mpic
  * NOTE: This flag trumps MPIC_WANTS_RESET.
  */
 #define MPIC_NO_RESET			0x00004000
+/* Freescale MPIC (compatible includes "fsl,mpic") */
+#define MPIC_FSL			0x00008000
 
 /* MPIC HW modification ID */
 #define MPIC_REGSET_MASK		0xf0000000
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 824a94fc413b..0a3c1c20115c 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -6,6 +6,7 @@
  *  with various broken implementations of this HW.
  *
  *  Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *  Copyright 2010-2011 Freescale Semiconductor, Inc.
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of this archive
@@ -1023,6 +1024,7 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
+	struct mpic *mpic = h->host_data;
 	static unsigned char map_mpic_senses[4] = {
 		IRQ_TYPE_EDGE_RISING,
 		IRQ_TYPE_LEVEL_LOW,
@@ -1031,7 +1033,38 @@ static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
 	};
 
 	*out_hwirq = intspec[0];
-	if (intsize > 1) {
+	if (intsize >= 4 && (mpic->flags & MPIC_FSL)) {
+		/*
+		 * Freescale MPIC with extended intspec:
+		 * First two cells are as usual.  Third specifies
+		 * an "interrupt type".  Fourth is type-specific data.
+		 *
+		 * See Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+		 */
+		switch (intspec[2]) {
+		case 0:
+		case 1: /* no EISR/EIMR support for now, treat as shared IRQ */
+			break;
+		case 2:
+			if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->ipi_vecs[intspec[0]];
+			break;
+		case 3:
+			if (intspec[0] >= ARRAY_SIZE(mpic->timer_vecs))
+				return -EINVAL;
+
+			*out_hwirq = mpic->timer_vecs[intspec[0]];
+			break;
+		default:
+			pr_debug("%s: unknown irq type %u\n",
+				 __func__, intspec[2]);
+			return -EINVAL;
+		}
+
+		*out_flags = map_mpic_senses[intspec[1] & 3];
+	} else if (intsize > 1) {
 		u32 mask = 0x3;
 
 		/* Apple invented a new race of encoding on machines with
@@ -1130,6 +1163,8 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	/* Check for "big-endian" in device-tree */
 	if (node && of_get_property(node, "big-endian", NULL) != NULL)
 		mpic->flags |= MPIC_BIG_ENDIAN;
+	if (node && of_device_is_compatible(node, "fsl,mpic"))
+		mpic->flags |= MPIC_FSL;
 
 	/* Look for protected sources */
 	if (node) {
-- 
cgit v1.2.3


From ea94187face757e723aa461a60698ca43c09fbb9 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 24 Mar 2011 16:43:55 -0500
Subject: powerpc/mpic: add the mpic global timer support

Add support for MPIC timers as requestable interrupt sources.

Based on http://patchwork.ozlabs.org/patch/20941/ by Dave Liu.

Signed-off-by: Dave Liu <daveliu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/mpic.h |  3 +-
 arch/powerpc/sysdev/mpic.c      | 92 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 88 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 25a0cb3a79f3..664bee6622e7 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -263,6 +263,7 @@ struct mpic
 #ifdef CONFIG_SMP
 	struct irq_chip		hc_ipi;
 #endif
+	struct irq_chip		hc_tm;
 	const char		*name;
 	/* Flags */
 	unsigned int		flags;
@@ -281,7 +282,7 @@ struct mpic
 
 	/* vector numbers used for internal sources (ipi/timers) */
 	unsigned int		ipi_vecs[4];
-	unsigned int		timer_vecs[4];
+	unsigned int		timer_vecs[8];
 
 	/* Spurious vector to program into unused sources */
 	unsigned int		spurious_vec;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 0a3c1c20115c..0e47bce5f696 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -219,6 +219,28 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu
 	_mpic_write(mpic->reg_type, &mpic->gregs, offset, value);
 }
 
+static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm)
+{
+	unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+			      ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+	if (tm >= 4)
+		offset += 0x1000 / 4;
+
+	return _mpic_read(mpic->reg_type, &mpic->tmregs, offset);
+}
+
+static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value)
+{
+	unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) +
+			      ((tm & 3) * MPIC_INFO(TIMER_STRIDE));
+
+	if (tm >= 4)
+		offset += 0x1000 / 4;
+
+	_mpic_write(mpic->reg_type, &mpic->tmregs, offset, value);
+}
+
 static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
 {
 	unsigned int cpu = mpic_processor_id(mpic);
@@ -269,6 +291,8 @@ static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
 #define mpic_write(b,r,v)	_mpic_write(mpic->reg_type,&(b),(r),(v))
 #define mpic_ipi_read(i)	_mpic_ipi_read(mpic,(i))
 #define mpic_ipi_write(i,v)	_mpic_ipi_write(mpic,(i),(v))
+#define mpic_tm_read(i)		_mpic_tm_read(mpic,(i))
+#define mpic_tm_write(i,v)	_mpic_tm_write(mpic,(i),(v))
 #define mpic_cpu_read(i)	_mpic_cpu_read(mpic,(i))
 #define mpic_cpu_write(i,v)	_mpic_cpu_write(mpic,(i),(v))
 #define mpic_irq_read(s,r)	_mpic_irq_read(mpic,(s),(r))
@@ -625,6 +649,13 @@ static unsigned int mpic_is_ipi(struct mpic *mpic, unsigned int irq)
 	return (src >= mpic->ipi_vecs[0] && src <= mpic->ipi_vecs[3]);
 }
 
+/* Determine if the linux irq is a timer */
+static unsigned int mpic_is_tm(struct mpic *mpic, unsigned int irq)
+{
+	unsigned int src = virq_to_hw(irq);
+
+	return (src >= mpic->timer_vecs[0] && src <= mpic->timer_vecs[7]);
+}
 
 /* Convert a cpu mask from logical to physical cpu numbers. */
 static inline u32 mpic_physmask(u32 cpumask)
@@ -811,6 +842,25 @@ static void mpic_end_ipi(struct irq_data *d)
 
 #endif /* CONFIG_SMP */
 
+static void mpic_unmask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	DBG("%s: enable_tm: %d (tm %d)\n", mpic->name, irq, src);
+	mpic_tm_write(src, mpic_tm_read(src) & ~MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
+static void mpic_mask_tm(struct irq_data *d)
+{
+	struct mpic *mpic = mpic_from_irq_data(d);
+	unsigned int src = virq_to_hw(d->irq) - mpic->timer_vecs[0];
+
+	mpic_tm_write(src, mpic_tm_read(src) | MPIC_VECPRI_MASK);
+	mpic_tm_read(src);
+}
+
 int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
 		      bool force)
 {
@@ -941,6 +991,12 @@ static struct irq_chip mpic_ipi_chip = {
 };
 #endif /* CONFIG_SMP */
 
+static struct irq_chip mpic_tm_chip = {
+	.irq_mask	= mpic_mask_tm,
+	.irq_unmask	= mpic_unmask_tm,
+	.irq_eoi	= mpic_end_irq,
+};
+
 #ifdef CONFIG_MPIC_U3_HT_IRQS
 static struct irq_chip mpic_irq_ht_chip = {
 	.irq_startup	= mpic_startup_ht_irq,
@@ -984,6 +1040,16 @@ static int mpic_host_map(struct irq_host *h, unsigned int virq,
 	}
 #endif /* CONFIG_SMP */
 
+	if (hw >= mpic->timer_vecs[0] && hw <= mpic->timer_vecs[7]) {
+		WARN_ON(!(mpic->flags & MPIC_PRIMARY));
+
+		DBG("mpic: mapping as timer\n");
+		irq_set_chip_data(virq, mpic);
+		irq_set_chip_and_handler(virq, &mpic->hc_tm,
+					 handle_fasteoi_irq);
+		return 0;
+	}
+
 	if (hw >= mpic->irq_count)
 		return -EINVAL;
 
@@ -1140,6 +1206,9 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	mpic->hc_ipi.name = name;
 #endif /* CONFIG_SMP */
 
+	mpic->hc_tm = mpic_tm_chip;
+	mpic->hc_tm.name = name;
+
 	mpic->flags = flags;
 	mpic->isu_size = isu_size;
 	mpic->irq_count = irq_count;
@@ -1150,10 +1219,14 @@ struct mpic * __init mpic_alloc(struct device_node *node,
 	else
 		intvec_top = 255;
 
-	mpic->timer_vecs[0] = intvec_top - 8;
-	mpic->timer_vecs[1] = intvec_top - 7;
-	mpic->timer_vecs[2] = intvec_top - 6;
-	mpic->timer_vecs[3] = intvec_top - 5;
+	mpic->timer_vecs[0] = intvec_top - 12;
+	mpic->timer_vecs[1] = intvec_top - 11;
+	mpic->timer_vecs[2] = intvec_top - 10;
+	mpic->timer_vecs[3] = intvec_top - 9;
+	mpic->timer_vecs[4] = intvec_top - 8;
+	mpic->timer_vecs[5] = intvec_top - 7;
+	mpic->timer_vecs[6] = intvec_top - 6;
+	mpic->timer_vecs[7] = intvec_top - 5;
 	mpic->ipi_vecs[0]   = intvec_top - 4;
 	mpic->ipi_vecs[1]   = intvec_top - 3;
 	mpic->ipi_vecs[2]   = intvec_top - 2;
@@ -1356,15 +1429,17 @@ void __init mpic_init(struct mpic *mpic)
 	/* Set current processor priority to max */
 	mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf);
 
-	/* Initialize timers: just disable them all */
+	/* Initialize timers to our reserved vectors and mask them for now */
 	for (i = 0; i < 4; i++) {
 		mpic_write(mpic->tmregs,
 			   i * MPIC_INFO(TIMER_STRIDE) +
-			   MPIC_INFO(TIMER_DESTINATION), 0);
+			   MPIC_INFO(TIMER_DESTINATION),
+			   1 << hard_smp_processor_id());
 		mpic_write(mpic->tmregs,
 			   i * MPIC_INFO(TIMER_STRIDE) +
 			   MPIC_INFO(TIMER_VECTOR_PRI),
 			   MPIC_VECPRI_MASK |
+			   (9 << MPIC_VECPRI_PRIORITY_SHIFT) |
 			   (mpic->timer_vecs[0] + i));
 	}
 
@@ -1473,6 +1548,11 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 			~MPIC_VECPRI_PRIORITY_MASK;
 		mpic_ipi_write(src - mpic->ipi_vecs[0],
 			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else if (mpic_is_tm(mpic, irq)) {
+		reg = mpic_tm_read(src - mpic->timer_vecs[0]) &
+			~MPIC_VECPRI_PRIORITY_MASK;
+		mpic_tm_write(src - mpic->timer_vecs[0],
+			      reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
 	} else {
 		reg = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI))
 			& ~MPIC_VECPRI_PRIORITY_MASK;
-- 
cgit v1.2.3


From 5e8393ab311d2c34f2965be40ebec99c772284ab Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Tue, 10 May 2011 13:01:47 -0500
Subject: powerpc/e5500: add networking to defconfig

Even though support for the p5020's on-chip ethernet is not yet upstream,
it is not appropriate to disable all networking support (including
loopback, unix domain sockets, external ethernet devices, etc) in the
defconfig.  The networking settings are taken from mpc85xx_smp_defconfig,
minus the drivers for ethernet devices not found on any current e5500
chip.

The other changes are the result of running "make savedefconfig".

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/configs/e55xx_smp_defconfig | 38 ++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig
index 9fa1613e5e2b..f4c5780ea747 100644
--- a/arch/powerpc/configs/e55xx_smp_defconfig
+++ b/arch/powerpc/configs/e55xx_smp_defconfig
@@ -6,10 +6,10 @@ CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SPARSE_IRQ=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
@@ -25,8 +25,32 @@ CONFIG_P5020_DS=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
 # CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_ARPD=y
+CONFIG_INET_ESP=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_IP_SCTP=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_LOOP=y
@@ -34,6 +58,9 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
 CONFIG_MISC_DEVICES=y
 CONFIG_EEPROM_LEGACY=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=y
+CONFIG_NET_ETHERNET=y
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_MOUSEDEV is not set
 # CONFIG_INPUT_KEYBOARD is not set
@@ -64,7 +91,6 @@ CONFIG_NLS=y
 CONFIG_NLS_UTF8=m
 CONFIG_CRC_T10DIF=y
 CONFIG_CRC_ITU_T=m
-CONFIG_LIBCRC32C=m
 CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
@@ -74,12 +100,6 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_VIRQ_DEBUG=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_TALITOS=y
-- 
cgit v1.2.3


From b637cf7bf82d0692609821cd84e3a345e56cd96c Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Tue, 10 May 2011 13:02:06 -0500
Subject: powerpc/fsl: enable verbose bug output

This debug option has no overhead other than a slight increase in
kernel size, and makes bug reports more useful.  While some end users
may prefer to save the space, as a default on a kernel config aimed
primarily at development on reference boards, it should be enabled.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/configs/83xx/mpc8313_rdb_defconfig  | 1 -
 arch/powerpc/configs/83xx/mpc8315_rdb_defconfig  | 1 -
 arch/powerpc/configs/85xx/mpc8540_ads_defconfig  | 1 -
 arch/powerpc/configs/85xx/mpc8560_ads_defconfig  | 1 -
 arch/powerpc/configs/85xx/mpc85xx_cds_defconfig  | 1 -
 arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig | 1 -
 arch/powerpc/configs/e55xx_smp_defconfig         | 1 -
 arch/powerpc/configs/mpc85xx_defconfig           | 1 -
 arch/powerpc/configs/mpc85xx_smp_defconfig       | 1 -
 arch/powerpc/configs/mpc86xx_defconfig           | 1 -
 10 files changed, 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index c683bce4c26e..126ef1b08a01 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -104,7 +104,6 @@ CONFIG_ROOT_NFS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index a721cd3d793f..abcf00ad939e 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -101,7 +101,6 @@ CONFIG_ROOT_NFS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
index 55e0725500dc..11662c217ac0 100644
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
@@ -58,7 +58,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
index d724095530a6..ebe9b30b0721 100644
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
@@ -59,7 +59,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
index 4b44beaa21ae..eb25229b387a 100644
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
@@ -63,7 +63,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_MUTEXES=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
index b614508d6fd2..f51c7ebc181e 100644
--- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
+++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
@@ -168,7 +168,6 @@ CONFIG_MAC_PARTITION=y
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig
index f4c5780ea747..d32283555b53 100644
--- a/arch/powerpc/configs/e55xx_smp_defconfig
+++ b/arch/powerpc/configs/e55xx_smp_defconfig
@@ -95,7 +95,6 @@ CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
index c06a86c33098..96b89df7752a 100644
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -204,7 +204,6 @@ CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
index 942ced90557c..de65841aa04e 100644
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -206,7 +206,6 @@ CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig
index 038a308cbfc4..a1cc8179e9fd 100644
--- a/arch/powerpc/configs/mpc86xx_defconfig
+++ b/arch/powerpc/configs/mpc86xx_defconfig
@@ -171,7 +171,6 @@ CONFIG_MAC_PARTITION=y
 CONFIG_CRC_T10DIF=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
-- 
cgit v1.2.3


From b6e4df4dab86e3cd2475748c49aac16dadb8330e Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Thu, 7 Apr 2011 14:40:55 +0530
Subject: powerpc/85xx: P1020 DTS : re-organize dts files

Creates P1020si.dtsi, containing information for the P1020 SoC. Modifies dts
files for P1020 based systems to use dtsi file

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Acked-by: Grant Likely <grant.likelY@secretlab.ca>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p1020rdb.dts | 316 +------------------------------
 arch/powerpc/boot/dts/p1020si.dtsi | 377 +++++++++++++++++++++++++++++++++++++
 2 files changed, 380 insertions(+), 313 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/p1020si.dtsi

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts
index e0668f877794..7ed4793489ce 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dts
+++ b/arch/powerpc/boot/dts/p1020rdb.dts
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p1020si.dtsi"
+
 / {
-	model = "fsl,P1020";
+	model = "fsl,P1020RDB";
 	compatible = "fsl,P1020RDB";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		serial0 = &serial0;
@@ -26,34 +25,11 @@
 		pci1 = &pci1;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P1020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P1020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
-
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
 
 		/* NOR, NAND Flashes and Vitesse 5 port L2 switch */
 		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,88 +141,14 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p1020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p1020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <16 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p1020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
 		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
 
 			fsl_m25p80@0 {
 				#address-cells = <1>;
@@ -294,66 +196,7 @@
 			};
 		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p1020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x40000>; // L2,256K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
-
 		mdio@24000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,etsec2-mdio";
-			reg = <0x24000 0x1000 0xb0030 0x4>;
 
 			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
@@ -369,10 +212,6 @@
 		};
 
 		mdio@25000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,etsec2-tbi";
-			reg = <0x25000 0x1000 0xb1030 0x4>;
 
 			tbi0: tbi-phy@11 {
 				reg = <0x11>;
@@ -381,97 +220,25 @@
 		};
 
 		enet0: ethernet@b0000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb0000 0x1000>;
-				interrupts = <29 2 30 2 34 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb4000 0x1000>;
-				interrupts = <17 2 18 2 24 2>;
-			};
 		};
 
 		enet1: ethernet@b1000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy0>;
 			tbi-handle = <&tbi0>;
 			phy-connection-type = "sgmii";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb1000 0x1000>;
-				interrupts = <35 2 36 2 40 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb5000 0x1000>;
-				interrupts = <51 2 52 2 67 2>;
-			};
 		};
 
 		enet2: ethernet@b2000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "fsl,etsec2";
-			fsl,num_rx_queues = <0x8>;
-			fsl,num_tx_queues = <0x8>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 
-			queue-group@0 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb2000 0x1000>;
-				interrupts = <31 2 32 2 33 2>;
-			};
-
-			queue-group@1 {
-				#address-cells = <1>;
-				#size-cells = <1>;
-				reg = <0xb6000 0x1000>;
-				interrupts = <25 2 26 2 27 2>;
-			};
 		};
 
 		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
 			phy_type = "ulpi";
 		};
 
@@ -481,82 +248,15 @@
 		   it enables USB2. OTOH, U-Boot does create a new node
 		   when there isn't any. So, just comment it out.
 		usb@23000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x23000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <46 0x2>;
 			phy_type = "ulpi";
 		};
 		*/
 
-		sdhci@2e000 {
-			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-
-		msi@41600 {
-			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p1020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
 	};
 
 	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <16 2>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -573,18 +273,8 @@
 	};
 
 	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <16 2>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi
new file mode 100644
index 000000000000..f6f110096c5f
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020si.dtsi
@@ -0,0 +1,377 @@
+/*
+ * P1020si Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	compatible = "fsl,P1020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P1020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P1020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p1020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p1020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p1020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <16 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p1020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p1020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x40000>; // L2,256K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		mdio@24000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,etsec2-mdio";
+			reg = <0x24000 0x1000 0xb0030 0x4>;
+
+		};
+
+		mdio@25000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,etsec2-tbi";
+			reg = <0x25000 0x1000 0xb1030 0x4>;
+
+		};
+
+		enet0: ethernet@b0000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb0000 0x1000>;
+				interrupts = <29 2 30 2 34 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb4000 0x1000>;
+				interrupts = <17 2 18 2 24 2>;
+			};
+		};
+
+		enet1: ethernet@b1000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb1000 0x1000>;
+				interrupts = <35 2 36 2 40 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb5000 0x1000>;
+				interrupts = <51 2 52 2 67 2>;
+			};
+		};
+
+		enet2: ethernet@b2000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "fsl,etsec2";
+			fsl,num_rx_queues = <0x8>;
+			fsl,num_tx_queues = <0x8>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupt-parent = <&mpic>;
+
+			queue-group@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb2000 0x1000>;
+				interrupts = <31 2 32 2 33 2>;
+			};
+
+			queue-group@1 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0xb6000 0x1000>;
+				interrupts = <25 2 26 2 27 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+		};
+
+		/* USB2 is shared with localbus, so it must be disabled
+		   by default. We can't put 'status = "disabled";' here
+		   since U-Boot doesn't clear the status property when
+		   it enables USB2. OTOH, U-Boot does create a new node
+		   when there isn't any. So, just comment it out.
+		usb@23000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <46 0x2>;
+			phy_type = "ulpi";
+		};
+		*/
+
+		sdhci@2e000 {
+			compatible = "fsl,p1020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p1020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p1020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+	};
+
+	pci1: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <16 2>;
+	};
+};
-- 
cgit v1.2.3


From eb2c5d9965adec2d7cd1946fa39f2dece073dab7 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Fri, 8 Apr 2011 17:57:05 +0530
Subject: powerpc/85xx: P2020 DTS: re-organize dts files

Creates P2020si.dtsi, containing information for P2020 SoC. Modifies dts
files for P2020 based systems to use dtsi file.

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p2020ds.dts             | 374 ++-----------------------
 arch/powerpc/boot/dts/p2020rdb.dts            | 362 ++----------------------
 arch/powerpc/boot/dts/p2020rdb_camp_core0.dts | 237 +++-------------
 arch/powerpc/boot/dts/p2020rdb_camp_core1.dts | 142 ++++++----
 arch/powerpc/boot/dts/p2020si.dtsi            | 382 ++++++++++++++++++++++++++
 5 files changed, 564 insertions(+), 933 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/p2020si.dtsi

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/p2020ds.dts
index 11019142813c..2bcf3683d223 100644
--- a/arch/powerpc/boot/dts/p2020ds.dts
+++ b/arch/powerpc/boot/dts/p2020ds.dts
@@ -1,7 +1,7 @@
 /*
  * P2020 DS Device Tree Source
  *
- * Copyright 2009 Freescale Semiconductor Inc.
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020DS";
 	compatible = "fsl,P2020DS";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
@@ -27,35 +26,13 @@
 		pci2 = &pci2;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
 
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
 		compatible = "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
-
 		ranges = <0x0 0x0 0x0 0xe8000000 0x08000000
 			  0x1 0x0 0x0 0xe0000000 0x08000000
 			  0x2 0x0 0x0 0xffa00000 0x00040000
@@ -158,352 +135,77 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0 0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
-
-		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
 
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		spi@7000 {
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
+		usb@22000 {
+			phy_type = "ulpi";
 		};
 
-		dma@c300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0xc300 0x4>;
-			ranges = <0x0 0xc100 0x200>;
-			cell-index = <1>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
-				interrupts = <76 2>;
+				interrupts = <3 1>;
+				reg = <0x0>;
 			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
+			phy1: ethernet-phy@1 {
 				interrupt-parent = <&mpic>;
-				interrupts = <77 2>;
+				interrupts = <3 1>;
+				reg = <0x1>;
 			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
+			phy2: ethernet-phy@2 {
 				interrupt-parent = <&mpic>;
-				interrupts = <78 2>;
+				interrupts = <3 1>;
+				reg = <0x2>;
 			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <79 2>;
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
-		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
 		};
 
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2, 512k
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
+		mdio@25520 {
+			tbi1: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
 		};
 
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
+		mdio@26520 {
+			tbi2: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
-		};
 
-		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
-			phy_type = "ulpi";
 		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x0>;
-				};
-				phy1: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x1>;
-				};
-				phy2: ethernet-phy@2 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x2>;
-				};
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi1>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi1: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi2>;
 			phy-handle = <&phy2>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi2: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
-		};
-
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
 		};
 
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
 
 		msi@41600 {
 			compatible = "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
-
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
 		};
 	};
 
 	pci0: pcie@ffe08000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe08000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <24 2>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
@@ -528,18 +230,8 @@
 	};
 
 	pci1: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
 		interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
 		interrupt-map = <
 
@@ -667,18 +359,8 @@
 	};
 
 	pci2: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
 		ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
 		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
 		interrupt-map = <
 			/* IDSEL 0x0 */
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts
index e2d48fd4416e..60a0a8c725d6 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/p2020rdb.dts
@@ -9,12 +9,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
@@ -26,34 +25,11 @@
 		pci1 = &pci1;
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
-		};
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
-		};
-	};
-
 	memory {
 		device_type = "memory";
 	};
 
 	localbus@ffe05000 {
-		#address-cells = <2>;
-		#size-cells = <1>;
-		compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
-		reg = <0 0xffe05000 0 0x1000>;
-		interrupts = <19 2>;
-		interrupt-parent = <&mpic>;
 
 		/* NOR and NAND Flashes */
 		ranges = <0x0 0x0 0x0 0xef000000 0x01000000
@@ -165,90 +141,16 @@
 	};
 
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
-
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		serial1: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
-			interrupts = <42 2>;
-			interrupt-parent = <&mpic>;
-		};
+	spi@7000 {
 
-		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
-
-			fsl_m25p80@0 {
+		fsl_m25p80@0 {
 				#address-cells = <1>;
 				#size-cells = <1>;
 				compatible = "fsl,espi-flash";
@@ -294,254 +196,60 @@
 			};
 		};
 
-		dma@c300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0xc300 0x4>;
-			ranges = <0x0 0xc100 0x200>;
-			cell-index = <1>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <76 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <77 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <78 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <79 2>;
-			};
-		};
-
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2,512K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
+		usb@22000 {
+			phy_type = "ulpi";
 		};
 
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
+		mdio@24520 {
+			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+				};
+			phy1: ethernet-phy@1 {
 				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
+				interrupts = <3 1>;
+				reg = <0x1>;
+				};
+		};
+
+		mdio@25520 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
 			};
 		};
 
-		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
-			phy_type = "ulpi";
+		mdio@26520 {
+			status = "disabled";
 		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-mdio";
-				reg = <0x520 0x20>;
-
-				phy0: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x0>;
-				};
-				phy1: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <3 1>;
-					reg = <0x1>;
-				};
-			};
 		};
 
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "sgmii";
-
-			mdio@520 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-				compatible = "fsl,gianfar-tbi";
-				reg = <0x520 0x20>;
-
-				tbi0: tbi-phy@11 {
-					reg = <0x11>;
-					device_type = "tbi-phy";
-				};
-			};
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
 
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
-
-		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
-		};
-
-		msi@41600 {
-			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
-			reg = <0x41600 0x80>;
-			msi-available-ranges = <0 0x100>;
-			interrupts = <
-				0xe0 0
-				0xe1 0
-				0xe2 0
-				0xe3 0
-				0xe4 0
-				0xe5 0
-				0xe6 0
-				0xe7 0>;
-			interrupt-parent = <&mpic>;
-		};
+	};
 
-		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
-		};
+	pci0: pcie@ffe08000 {
+		status = "disabled";
 	};
 
-	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
+	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
-		pcie@0 {
+			pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
 			#address-cells = <3>;
@@ -556,19 +264,9 @@
 		};
 	};
 
-	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
+	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
index b69c3a5dc858..72c912fd3dd9 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
@@ -14,12 +14,11 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet1 = &enet1;
@@ -29,91 +28,33 @@
 	};
 
 	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@0 {
-			device_type = "cpu";
-			reg = <0x0>;
-			next-level-cache = <&L2>;
+		PowerPC,P2020@1 {
+		status = "disabled";
 		};
+
 	};
 
 	memory {
 		device_type = "memory";
 	};
 
-	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		ecm-law@0 {
-			compatible = "fsl,ecm-law";
-			reg = <0x0 0x1000>;
-			fsl,num-laws = <12>;
-		};
-
-		ecm@1000 {
-			compatible = "fsl,p2020-ecm", "fsl,ecm";
-			reg = <0x1000 0x1000>;
-			interrupts = <17 2>;
-			interrupt-parent = <&mpic>;
-		};
-
-		memory-controller@2000 {
-			compatible = "fsl,p2020-memory-controller";
-			reg = <0x2000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <18 2>;
-		};
+	localbus@ffe05000 {
+		status = "disabled";
+	};
 
+	soc@ffe00000 {
 		i2c@3000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <0>;
-			compatible = "fsl-i2c";
-			reg = <0x3000 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
 			rtc@68 {
 				compatible = "dallas,ds1339";
 				reg = <0x68>;
 			};
 		};
 
-		i2c@3100 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			cell-index = <1>;
-			compatible = "fsl-i2c";
-			reg = <0x3100 0x100>;
-			interrupts = <43 2>;
-			interrupt-parent = <&mpic>;
-			dfsrr;
-		};
-
-		serial0: serial@4500 {
-			cell-index = <0>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4500 0x100>;
-			clock-frequency = <0>;
+		serial1: serial@4600 {
+			status = "disabled";
 		};
 
 		spi@7000 {
-			cell-index = <0>;
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,espi";
-			reg = <0x7000 0x1000>;
-			interrupts = <59 0x2>;
-			interrupt-parent = <&mpic>;
-			mode = "cpu";
 
 			fsl_m25p80@0 {
 				#address-cells = <1>;
@@ -161,76 +102,15 @@
 			};
 		};
 
-		gpio: gpio-controller@f000 {
-			#gpio-cells = <2>;
-			compatible = "fsl,mpc8572-gpio";
-			reg = <0xf000 0x100>;
-			interrupts = <47 0x2>;
-			interrupt-parent = <&mpic>;
-			gpio-controller;
-		};
-
-		L2: l2-cache-controller@20000 {
-			compatible = "fsl,p2020-l2-cache-controller";
-			reg = <0x20000 0x1000>;
-			cache-line-size = <32>;	// 32 bytes
-			cache-size = <0x80000>; // L2,512K
-			interrupt-parent = <&mpic>;
-			interrupts = <16 2>;
-		};
-
-		dma@21300 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			compatible = "fsl,eloplus-dma";
-			reg = <0x21300 0x4>;
-			ranges = <0x0 0x21100 0x200>;
-			cell-index = <0>;
-			dma-channel@0 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x0 0x80>;
-				cell-index = <0>;
-				interrupt-parent = <&mpic>;
-				interrupts = <20 2>;
-			};
-			dma-channel@80 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x80 0x80>;
-				cell-index = <1>;
-				interrupt-parent = <&mpic>;
-				interrupts = <21 2>;
-			};
-			dma-channel@100 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x100 0x80>;
-				cell-index = <2>;
-				interrupt-parent = <&mpic>;
-				interrupts = <22 2>;
-			};
-			dma-channel@180 {
-				compatible = "fsl,eloplus-dma-channel";
-				reg = <0x180 0x80>;
-				cell-index = <3>;
-				interrupt-parent = <&mpic>;
-				interrupts = <23 2>;
-			};
+		dma@c300 {
+			status = "disabled";
 		};
 
 		usb@22000 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl-usb2-dr";
-			reg = <0x22000 0x1000>;
-			interrupt-parent = <&mpic>;
-			interrupts = <28 0x2>;
 			phy_type = "ulpi";
 		};
 
 		mdio@24520 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,gianfar-mdio";
-			reg = <0x24520 0x20>;
 
 			phy0: ethernet-phy@0 {
 				interrupt-parent = <&mpic>;
@@ -245,29 +125,21 @@
 		};
 
 		mdio@25520 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			compatible = "fsl,gianfar-tbi";
-			reg = <0x26520 0x20>;
-
 			tbi0: tbi-phy@11 {
 				reg = <0x11>;
 				device_type = "tbi-phy";
 			};
 		};
 
+		mdio@26520 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@24000 {
+			status = "disabled";
+		};
+
 		enet1: ethernet@25000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <1>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x25000 0x1000>;
-			ranges = <0x0 0x25000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <35 2 36 2 40 2>;
-			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi0>;
 			phy-handle = <&phy0>;
 			phy-connection-type = "sgmii";
@@ -275,49 +147,12 @@
 		};
 
 		enet2: ethernet@26000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <2>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x26000 0x1000>;
-			ranges = <0x0 0x26000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <31 2 32 2 33 2>;
-			interrupt-parent = <&mpic>;
 			phy-handle = <&phy1>;
 			phy-connection-type = "rgmii-id";
 		};
 
-		sdhci@2e000 {
-			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
-			reg = <0x2e000 0x1000>;
-			interrupts = <72 0x2>;
-			interrupt-parent = <&mpic>;
-			/* Filled in by U-Boot */
-			clock-frequency = <0>;
-		};
-
-		crypto@30000 {
-			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
-				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
-			reg = <0x30000 0x10000>;
-			interrupts = <45 2 58 2>;
-			interrupt-parent = <&mpic>;
-			fsl,num-channels = <4>;
-			fsl,channel-fifo-len = <24>;
-			fsl,exec-units-mask = <0xbfe>;
-			fsl,descriptor-types-mask = <0x3ab0ebf>;
-		};
 
 		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
 			protected-sources = <
 			42 76 77 78 79 /* serial1 , dma2 */
 			29 30 34 26 /* enet0, pci1 */
@@ -326,26 +161,20 @@
 			>;
 		};
 
-		global-utilities@e0000 {
-			compatible = "fsl,p2020-guts";
-			reg = <0xe0000 0x1000>;
-			fsl,has-rstcr;
+		msi@41600 {
+			status = "disabled";
 		};
+
+
 	};
 
-	pci0: pcie@ffe09000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe09000 0 0x1000>;
-		bus-range = <0 255>;
+	pci0: pcie@ffe08000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <25 2>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -360,4 +189,8 @@
 				  0x0 0x100000>;
 		};
 	};
+
+	pci2: pcie@ffe0a000 {
+		status = "disabled";
+	};
 };
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
index 7a31d46c01b0..eb572ccecb3d 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
@@ -15,27 +15,21 @@
  * option) any later version.
  */
 
-/dts-v1/;
+/include/ "p2020si.dtsi"
+
 / {
-	model = "fsl,P2020";
+	model = "fsl,P2020RDB";
 	compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP";
-	#address-cells = <2>;
-	#size-cells = <2>;
 
 	aliases {
 		ethernet0 = &enet0;
-		serial0 = &serial0;
+		serial0 = &serial1;
 		pci1 = &pci1;
 	};
 
 	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		PowerPC,P2020@1 {
-			device_type = "cpu";
-			reg = <0x1>;
-			next-level-cache = <&L2>;
+		PowerPC,P2020@0 {
+		status = "disabled";
 		};
 	};
 
@@ -43,20 +37,37 @@
 		device_type = "memory";
 	};
 
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
 	soc@ffe00000 {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		device_type = "soc";
-		compatible = "fsl,p2020-immr", "simple-bus";
-		ranges = <0x0  0x0 0xffe00000 0x100000>;
-		bus-frequency = <0>;		// Filled out by uboot.
-
-		serial0: serial@4600 {
-			cell-index = <1>;
-			device_type = "serial";
-			compatible = "ns16550";
-			reg = <0x4600 0x100>;
-			clock-frequency = <0>;
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
 		};
 
 		dma@c300 {
@@ -96,6 +107,10 @@
 			};
 		};
 
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
 		L2: l2-cache-controller@20000 {
 			compatible = "fsl,p2020-l2-cache-controller";
 			reg = <0x20000 0x1000>;
@@ -104,31 +119,49 @@
 			interrupt-parent = <&mpic>;
 		};
 
+		dma@21300 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		mdio@24520 {
+			status = "disabled";
+		};
+
+		mdio@25520 {
+			status = "disabled";
+		};
+
+		mdio@26520 {
+			status = "disabled";
+		};
 
 		enet0: ethernet@24000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
-			cell-index = <0>;
-			device_type = "network";
-			model = "eTSEC";
-			compatible = "gianfar";
-			reg = <0x24000 0x1000>;
-			ranges = <0x0 0x24000 0x1000>;
-			local-mac-address = [ 00 00 00 00 00 00 ];
-			interrupts = <29 2 30 2 34 2>;
-			interrupt-parent = <&mpic>;
 			fixed-link = <1 1 1000 0 0>;
 			phy-connection-type = "rgmii-id";
 
 		};
 
+		enet1: ethernet@25000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@26000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		crypto@30000 {
+			status = "disabled";
+		};
+
 		mpic: pic@40000 {
-			interrupt-controller;
-			#address-cells = <0>;
-			#interrupt-cells = <2>;
-			reg = <0x40000 0x40000>;
-			compatible = "chrp,open-pic";
-			device_type = "open-pic";
 			protected-sources = <
 			17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */
 			16 20 21 22 23 28 	/* L2, dma1, USB */
@@ -152,21 +185,24 @@
 				0xe7 0>;
 			interrupt-parent = <&mpic>;
 		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+
+	};
+
+	pci0: pcie@ffe08000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe09000 {
+		status = "disabled";
 	};
 
-	pci1: pcie@ffe0a000 {
-		compatible = "fsl,mpc8548-pcie";
-		device_type = "pci";
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		reg = <0 0xffe0a000 0 0x1000>;
-		bus-range = <0 255>;
+	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
-		clock-frequency = <33333333>;
-		interrupt-parent = <&mpic>;
-		interrupts = <26 2>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020si.dtsi b/arch/powerpc/boot/dts/p2020si.dtsi
new file mode 100644
index 000000000000..6def17f265d3
--- /dev/null
+++ b/arch/powerpc/boot/dts/p2020si.dtsi
@@ -0,0 +1,382 @@
+/*
+ * P2020 Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+/ {
+	compatible = "fsl,P2020";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,P2020@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			next-level-cache = <&L2>;
+		};
+
+		PowerPC,P2020@1 {
+			device_type = "cpu";
+			reg = <0x1>;
+			next-level-cache = <&L2>;
+		};
+	};
+
+	localbus@ffe05000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,p2020-elbc", "fsl,elbc", "simple-bus";
+		reg = <0 0xffe05000 0 0x1000>;
+		interrupts = <19 2>;
+		interrupt-parent = <&mpic>;
+	};
+
+	soc@ffe00000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,p2020-immr", "simple-bus";
+		ranges = <0x0  0x0 0xffe00000 0x100000>;
+		bus-frequency = <0>;		// Filled out by uboot.
+
+		ecm-law@0 {
+			compatible = "fsl,ecm-law";
+			reg = <0x0 0x1000>;
+			fsl,num-laws = <12>;
+		};
+
+		ecm@1000 {
+			compatible = "fsl,p2020-ecm", "fsl,ecm";
+			reg = <0x1000 0x1000>;
+			interrupts = <17 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		memory-controller@2000 {
+			compatible = "fsl,p2020-memory-controller";
+			reg = <0x2000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <18 2>;
+		};
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			cell-index = <1>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <43 2>;
+			interrupt-parent = <&mpic>;
+			dfsrr;
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <0>;
+			interrupts = <42 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		spi@7000 {
+			cell-index = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			mode = "cpu";
+		};
+
+		dma@c300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0xc300 0x4>;
+			ranges = <0x0 0xc100 0x200>;
+			cell-index = <1>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <76 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <77 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <78 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <79 2>;
+			};
+		};
+
+		gpio: gpio-controller@f000 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8572-gpio";
+			reg = <0xf000 0x100>;
+			interrupts = <47 0x2>;
+			interrupt-parent = <&mpic>;
+			gpio-controller;
+		};
+
+		L2: l2-cache-controller@20000 {
+			compatible = "fsl,p2020-l2-cache-controller";
+			reg = <0x20000 0x1000>;
+			cache-line-size = <32>;	// 32 bytes
+			cache-size = <0x80000>; // L2,512K
+			interrupt-parent = <&mpic>;
+			interrupts = <16 2>;
+		};
+
+		dma@21300 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,eloplus-dma";
+			reg = <0x21300 0x4>;
+			ranges = <0x0 0x21100 0x200>;
+			cell-index = <0>;
+			dma-channel@0 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x0 0x80>;
+				cell-index = <0>;
+				interrupt-parent = <&mpic>;
+				interrupts = <20 2>;
+			};
+			dma-channel@80 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x80 0x80>;
+				cell-index = <1>;
+				interrupt-parent = <&mpic>;
+				interrupts = <21 2>;
+			};
+			dma-channel@100 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x100 0x80>;
+				cell-index = <2>;
+				interrupt-parent = <&mpic>;
+				interrupts = <22 2>;
+			};
+			dma-channel@180 {
+				compatible = "fsl,eloplus-dma-channel";
+				reg = <0x180 0x80>;
+				cell-index = <3>;
+				interrupt-parent = <&mpic>;
+				interrupts = <23 2>;
+			};
+		};
+
+		usb@22000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-usb2-dr";
+			reg = <0x22000 0x1000>;
+			interrupt-parent = <&mpic>;
+			interrupts = <28 0x2>;
+		};
+
+		mdio@24520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-mdio";
+			reg = <0x24520 0x20>;
+		};
+
+		mdio@25520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x26520 0x20>;
+		};
+
+		mdio@26520 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,gianfar-tbi";
+			reg = <0x520 0x20>;
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			ranges = <0x0 0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <29 2 30 2 34 2>;
+			interrupt-parent = <&mpic>;
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 2 36 2 40 2>;
+			interrupt-parent = <&mpic>;
+
+		};
+
+		enet2: ethernet@26000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <2>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x26000 0x1000>;
+			ranges = <0x0 0x26000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <31 2 32 2 33 2>;
+			interrupt-parent = <&mpic>;
+
+		};
+
+		sdhci@2e000 {
+			compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+			reg = <0x2e000 0x1000>;
+			interrupts = <72 0x2>;
+			interrupt-parent = <&mpic>;
+			/* Filled in by U-Boot */
+			clock-frequency = <0>;
+		};
+
+		crypto@30000 {
+			compatible = "fsl,sec3.1", "fsl,sec3.0", "fsl,sec2.4",
+				     "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
+			reg = <0x30000 0x10000>;
+			interrupts = <45 2 58 2>;
+			interrupt-parent = <&mpic>;
+			fsl,num-channels = <4>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0xbfe>;
+			fsl,descriptor-types-mask = <0x3ab0ebf>;
+		};
+
+		mpic: pic@40000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x40000 0x40000>;
+			compatible = "chrp,open-pic";
+			device_type = "open-pic";
+		};
+
+		msi@41600 {
+			compatible = "fsl,p2020-msi", "fsl,mpic-msi";
+			reg = <0x41600 0x80>;
+			msi-available-ranges = <0 0x100>;
+			interrupts = <
+				0xe0 0
+				0xe1 0
+				0xe2 0
+				0xe3 0
+				0xe4 0
+				0xe5 0
+				0xe6 0
+				0xe7 0>;
+			interrupt-parent = <&mpic>;
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			compatible = "fsl,p2020-guts";
+			reg = <0xe0000 0x1000>;
+			fsl,has-rstcr;
+		};
+	};
+
+	pci0: pcie@ffe08000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe08000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <24 2>;
+	};
+
+	pci1: pcie@ffe09000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe09000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <25 2>;
+	};
+
+	pci2: pcie@ffe0a000 {
+		compatible = "fsl,mpc8548-pcie";
+		device_type = "pci";
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0 0xffe0a000 0 0x1000>;
+		bus-range = <0 255>;
+		clock-frequency = <33333333>;
+		interrupt-parent = <&mpic>;
+		interrupts = <26 2>;
+	};
+};
-- 
cgit v1.2.3


From bc99d09abe14b4841454701c47e45f22444a890a Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Wed, 20 Apr 2011 09:42:44 +0530
Subject: powerpc/85xx: Fix PCIe IDSEL for Px020RDB

PCIe device in legacy mode can trigger interrupts using the wires #INTA,
#INTB ,#INTC and #INTD. PCI devices are obligated to use #INTx for
interrupts under legacy mode.  Each PCI slot or device is typically wired
to different inputs on the interrupt controller.

So, Define interrupt-map and interrupt-map-mask properties for device tree
to of map each PCI interrupt signal to the inputs of the interrupt
controller.

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p1020rdb.dts            | 16 ++++++++++++++++
 arch/powerpc/boot/dts/p2020rdb.dts            | 16 ++++++++++++++++
 arch/powerpc/boot/dts/p2020rdb_camp_core0.dts |  8 ++++++++
 arch/powerpc/boot/dts/p2020rdb_camp_core1.dts |  8 ++++++++
 4 files changed, 48 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/p1020rdb.dts
index 7ed4793489ce..d6a8ae458137 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dts
+++ b/arch/powerpc/boot/dts/p1020rdb.dts
@@ -257,6 +257,14 @@
 	pci0: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -275,6 +283,14 @@
 	pci1: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/p2020rdb.dts
index 60a0a8c725d6..3782a58f13be 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/p2020rdb.dts
@@ -249,6 +249,14 @@
 	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 			pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
@@ -267,6 +275,14 @@
 	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
index 72c912fd3dd9..fc8ddddfccb6 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts
@@ -175,6 +175,14 @@
 	pci1: pcie@ffe09000 {
 		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
index eb572ccecb3d..261c34ba45ec 100644
--- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
+++ b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts
@@ -203,6 +203,14 @@
 	pci2: pcie@ffe0a000 {
 		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
 			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
 		pcie@0 {
 			reg = <0x0 0x0 0x0 0x0 0x0>;
 			#size-cells = <2>;
-- 
cgit v1.2.3


From 41cd08560bbf8371bbd00e783e992b0dc7e7c83d Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <prabhakar@freescale.com>
Date: Thu, 28 Apr 2011 12:30:00 +0530
Subject: powerpc/85xx: Create dts of each core in CAMP mode for P1020RDB

Create the dts files for each core and splits the devices between the two
cores for P1020RDB.

Core0 has core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb, eth1,
eth2, sdhc, crypto, global-util, message, pci0, pci1, msi.
Core1 has l2, eth0, crypto.

MPIC is shared between two cores but each core will protect its interrupts
from other core by using "protected-sources" of mpic.

Fix compatible property for global-util node of P1020si.dtsi.

Signed-off-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p1020rdb_camp_core0.dts | 213 ++++++++++++++++++++++++++
 arch/powerpc/boot/dts/p1020rdb_camp_core1.dts | 148 ++++++++++++++++++
 arch/powerpc/boot/dts/p1020si.dtsi            |   2 +-
 3 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
 create mode 100644 arch/powerpc/boot/dts/p1020rdb_camp_core1.dts

(limited to 'arch')

diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
new file mode 100644
index 000000000000..f0bf7f42f097
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts
@@ -0,0 +1,213 @@
+/*
+ * P1020 RDB  Core0 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb,
+ * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi.
+ *
+ * Please note to add "-b 0" for core0's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+	aliases {
+		ethernet1 = &enet1;
+		ethernet2 = &enet2;
+		serial0 = &serial0;
+		pci0 = &pci0;
+		pci1 = &pci1;
+	};
+
+	cpus {
+		PowerPC,P1020@1 {
+		status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		i2c@3000 {
+			rtc@68 {
+				compatible = "dallas,ds1339";
+				reg = <0x68>;
+			};
+		};
+
+		serial1: serial@4600 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			fsl_m25p80@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "fsl,espi-flash";
+				reg = <0>;
+				linux,modalias = "fsl_m25p80";
+				spi-max-frequency = <40000000>;
+
+				partition@0 {
+					/* 512KB for u-boot Bootloader Image */
+					reg = <0x0 0x00080000>;
+					label = "SPI (RO) U-Boot Image";
+					read-only;
+				};
+
+				partition@80000 {
+					/* 512KB for DTB Image */
+					reg = <0x00080000 0x00080000>;
+					label = "SPI (RO) DTB Image";
+					read-only;
+				};
+
+				partition@100000 {
+					/* 4MB for Linux Kernel Image */
+					reg = <0x00100000 0x00400000>;
+					label = "SPI (RO) Linux Kernel Image";
+					read-only;
+				};
+
+				partition@500000 {
+					/* 4MB for Compressed RFS Image */
+					reg = <0x00500000 0x00400000>;
+					label = "SPI (RO) Compressed RFS Image";
+					read-only;
+				};
+
+				partition@900000 {
+					/* 7MB for JFFS2 based RFS */
+					reg = <0x00900000 0x00700000>;
+					label = "SPI (RW) JFFS2 RFS";
+				};
+			};
+		};
+
+		mdio@24000 {
+			phy0: ethernet-phy@0 {
+				interrupt-parent = <&mpic>;
+				interrupts = <3 1>;
+				reg = <0x0>;
+			};
+			phy1: ethernet-phy@1 {
+				interrupt-parent = <&mpic>;
+				interrupts = <2 1>;
+				reg = <0x1>;
+			};
+		};
+
+		mdio@25000 {
+			tbi0: tbi-phy@11 {
+				reg = <0x11>;
+				device_type = "tbi-phy";
+			};
+		};
+
+		enet0: ethernet@b0000 {
+			status = "disabled";
+		};
+
+		enet1: ethernet@b1000 {
+			phy-handle = <&phy0>;
+			tbi-handle = <&tbi0>;
+			phy-connection-type = "sgmii";
+		};
+
+		enet2: ethernet@b2000 {
+			phy-handle = <&phy1>;
+			phy-connection-type = "rgmii-id";
+		};
+
+		usb@22000 {
+			phy_type = "ulpi";
+		};
+
+		/* USB2 is shared with localbus, so it must be disabled
+		   by default. We can't put 'status = "disabled";' here
+		   since U-Boot doesn't clear the status property when
+		   it enables USB2. OTOH, U-Boot does create a new node
+		   when there isn't any. So, just comment it out.
+		usb@23000 {
+			phy_type = "ulpi";
+		};
+		*/
+
+		mpic: pic@40000 {
+			protected-sources = <
+			42 29 30 34	/* serial1, enet0-queue-group0 */
+			17 18 24 45	/* enet0-queue-group1, crypto */
+			>;
+		};
+
+	};
+
+	pci0: pcie@ffe09000 {
+		ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x1
+			0000 0x0 0x0 0x2 &mpic 0x5 0x1
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0xa0000000
+				  0x2000000 0x0 0xa0000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+
+	pci1: pcie@ffe0a000 {
+		ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+		interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x1
+			0000 0x0 0x0 0x2 &mpic 0x1 0x1
+			0000 0x0 0x0 0x3 &mpic 0x2 0x1
+			0000 0x0 0x0 0x4 &mpic 0x3 0x1
+			>;
+		pcie@0 {
+			reg = <0x0 0x0 0x0 0x0 0x0>;
+			#size-cells = <2>;
+			#address-cells = <3>;
+			device_type = "pci";
+			ranges = <0x2000000 0x0 0x80000000
+				  0x2000000 0x0 0x80000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
new file mode 100644
index 000000000000..6ec02204a44e
--- /dev/null
+++ b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts
@@ -0,0 +1,148 @@
+/*
+ * P1020 RDB Core1 Device Tree Source in CAMP mode.
+ *
+ * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache
+ * can be shared, all the other devices must be assigned to one core only.
+ * This dts allows core1 to have l2, eth0, crypto.
+ *
+ * Please note to add "-b 1" for core1's dts compiling.
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "p1020si.dtsi"
+
+/ {
+	model = "fsl,P1020RDB";
+	compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP";
+
+	aliases {
+		ethernet0 = &enet0;
+		serial0 = &serial1;
+		};
+
+	cpus {
+		PowerPC,P1020@0 {
+		status = "disabled";
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	localbus@ffe05000 {
+		status = "disabled";
+	};
+
+	soc@ffe00000 {
+		ecm-law@0 {
+			status = "disabled";
+		};
+
+		ecm@1000 {
+			status = "disabled";
+		};
+
+		memory-controller@2000 {
+			status = "disabled";
+		};
+
+		i2c@3000 {
+			status = "disabled";
+		};
+
+		i2c@3100 {
+			status = "disabled";
+		};
+
+		serial0: serial@4500 {
+			status = "disabled";
+		};
+
+		spi@7000 {
+			status = "disabled";
+		};
+
+		gpio: gpio-controller@f000 {
+			status = "disabled";
+		};
+
+		dma@21300 {
+			status = "disabled";
+		};
+
+		mdio@24000 {
+			status = "disabled";
+		};
+
+		mdio@25000 {
+			status = "disabled";
+		};
+
+		enet0: ethernet@b0000 {
+			fixed-link = <1 1 1000 0 0>;
+			phy-connection-type = "rgmii-id";
+
+		};
+
+		enet1: ethernet@b1000 {
+			status = "disabled";
+		};
+
+		enet2: ethernet@b2000 {
+			status = "disabled";
+		};
+
+		usb@22000 {
+			status = "disabled";
+		};
+
+		sdhci@2e000 {
+			status = "disabled";
+		};
+
+		mpic: pic@40000 {
+			protected-sources = <
+			16 		/* ecm, mem, L2, pci0, pci1 */
+			43 42 59	/* i2c, serial0, spi */
+			47 63 62 	/* gpio, tdm */
+			20 21 22 23	/* dma */
+			03 02 		/* mdio */
+			35 36 40	/* enet1-queue-group0 */
+			51 52 67	/* enet1-queue-group1 */
+			31 32 33	/* enet2-queue-group0 */
+			25 26 27	/* enet2-queue-group1 */
+			28 72 58 	/* usb, sdhci, crypto */
+			0xb0 0xb1 0xb2	/* message */
+			0xb3 0xb4 0xb5
+			0xb6 0xb7
+			0xe0 0xe1 0xe2	/* msi */
+			0xe3 0xe4 0xe5
+			0xe6 0xe7		/* sdhci, crypto , pci */
+			>;
+		};
+
+		msi@41600 {
+			status = "disabled";
+		};
+
+		global-utilities@e0000 {	//global utilities block
+			status = "disabled";
+		};
+
+	};
+
+	pci0: pcie@ffe09000 {
+		status = "disabled";
+	};
+
+	pci1: pcie@ffe0a000 {
+		status = "disabled";
+	};
+};
diff --git a/arch/powerpc/boot/dts/p1020si.dtsi b/arch/powerpc/boot/dts/p1020si.dtsi
index f6f110096c5f..5c5acb66c3fc 100644
--- a/arch/powerpc/boot/dts/p1020si.dtsi
+++ b/arch/powerpc/boot/dts/p1020si.dtsi
@@ -343,7 +343,7 @@
 		};
 
 		global-utilities@e0000 {	//global utilities block
-			compatible = "fsl,p1020-guts";
+			compatible = "fsl,p1020-guts","fsl,p2020-guts";
 			reg = <0xe0000 0x1000>;
 			fsl,has-rstcr;
 		};
-- 
cgit v1.2.3


From 41fb5e62604c5ddd00a784ffb7672dd8df5d76f2 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Tue, 10 May 2011 19:30:44 +0000
Subject: powerpc: Make IRQ_NOREQUEST last to clear, first to set

When creating an irq, don't allow a concurent driver request until
we have caled map, which will likley call set_chip_and_handler to
change the irq_chip and its operations.

Similarly, when tearing down an IRQ, make sure no new uses come
along while we change the irq back to the nop chip and then reset
the descriptor to freed status.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4368b5ed5604..a24d37d4cf51 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -586,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 			irq_map[i].host = host;
 			smp_wmb();
 
-			/* Clear norequest flags */
-			irq_clear_status_flags(i, IRQ_NOREQUEST);
-
 			/* Legacy flags are left to default at this point,
 			 * one can then use irq_create_mapping() to
 			 * explicitly change them
 			 */
 			ops->map(host, i, i);
+
+			/* Clear norequest flags */
+			irq_clear_status_flags(i, IRQ_NOREQUEST);
 		}
 		break;
 	case IRQ_HOST_MAP_LINEAR:
@@ -664,8 +664,6 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 		goto error;
 	}
 
-	irq_clear_status_flags(virq, IRQ_NOREQUEST);
-
 	/* map it */
 	smp_wmb();
 	irq_map[virq].hwirq = hwirq;
@@ -676,6 +674,8 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq,
 		goto errdesc;
 	}
 
+	irq_clear_status_flags(virq, IRQ_NOREQUEST);
+
 	return 0;
 
 errdesc:
@@ -819,6 +819,8 @@ void irq_dispose_mapping(unsigned int virq)
 	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
 		return;
 
+	irq_set_status_flags(virq, IRQ_NOREQUEST);
+
 	/* remove chip and handler */
 	irq_set_chip_and_handler(virq, NULL, NULL);
 
@@ -848,8 +850,6 @@ void irq_dispose_mapping(unsigned int virq)
 	smp_mb();
 	irq_map[virq].hwirq = host->inval_irq;
 
-	irq_set_status_flags(virq, IRQ_NOREQUEST);
-
 	irq_free_descs(virq, 1);
 	/* Free it */
 	irq_free_virt(virq, 1);
-- 
cgit v1.2.3


From 751e1f5099f1568444fe2485f2485ca541d4952e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 19 May 2011 14:44:31 +1000
Subject: powerpc: Remove unused/obsolete CONFIG_XICS

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/Kconfig | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 83c704a637bb..f970ca2b180c 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -70,11 +70,6 @@ config PPC_UDBG_BEAT
 	depends on PPC_CELLEB
 	default n
 
-config XICS
-	depends on PPC_PSERIES
-	bool
-	default y
-
 config IPIC
 	bool
 	default n
-- 
cgit v1.2.3


From 448694a1d50432be63aafccb42d6f54d8cf3d02c Mon Sep 17 00:00:00 2001
From: Jan Glauber <jang@linux.vnet.ibm.com>
Date: Thu, 19 May 2011 16:55:26 -0600
Subject: module: undo module RONX protection correctly.

While debugging I stumbled over two problems in the code that protects module
pages.

First issue is that disabling the protection before freeing init or unload of
a module is not symmetric with the enablement. For instance, if pages are set
to RO the page range from module_core to module_core + core_ro_size is
protected. If a module is unloaded the page range from module_core to
module_core + core_size is set back to RW.
So pages that were not set to RO are also changed to RW.
This is not critical but IMHO it should be symmetric.

Second issue is that while set_memory_rw & set_memory_ro are used for
RO/RW changes only set_memory_nx is involved for NX/X. One would await that
the inverse function is called when the NX protection should be removed,
which is not the case here, unless I'm missing something.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/s390/include/asm/cacheflush.h |  1 +
 arch/s390/mm/pageattr.c            |  5 +++++
 kernel/module.c                    | 25 +++++++++++++------------
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
index 43a5c78046db..3e20383d0921 100644
--- a/arch/s390/include/asm/cacheflush.h
+++ b/arch/s390/include/asm/cacheflush.h
@@ -11,5 +11,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable);
 int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
 
 #endif /* _S390_CACHEFLUSH_H */
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 0607e4b14b27..f05edcc3beff 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -54,3 +54,8 @@ int set_memory_nx(unsigned long addr, int numpages)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(set_memory_nx);
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return 0;
+}
diff --git a/kernel/module.c b/kernel/module.c
index 92112c91b7e9..b99dcebc980d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1607,22 +1607,23 @@ static void set_section_ro_nx(void *base,
 	}
 }
 
-/* Setting memory back to RW+NX before releasing it */
+/* Setting memory back to W+X before releasing it */
 void unset_section_ro_nx(struct module *mod, void *module_region)
 {
-	unsigned long total_pages;
-
 	if (mod->module_core == module_region) {
-		/* Set core as NX+RW */
-		total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size);
-		set_memory_nx((unsigned long)mod->module_core, total_pages);
-		set_memory_rw((unsigned long)mod->module_core, total_pages);
-
+		set_page_attributes(mod->module_core + mod->core_text_size,
+			mod->module_core + mod->core_size,
+			set_memory_x);
+		set_page_attributes(mod->module_core,
+			mod->module_core + mod->core_ro_size,
+			set_memory_rw);
 	} else if (mod->module_init == module_region) {
-		/* Set init as NX+RW */
-		total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size);
-		set_memory_nx((unsigned long)mod->module_init, total_pages);
-		set_memory_rw((unsigned long)mod->module_init, total_pages);
+		set_page_attributes(mod->module_init + mod->init_text_size,
+			mod->module_init + mod->init_size,
+			set_memory_x);
+		set_page_attributes(mod->module_init,
+			mod->module_init + mod->init_ro_size,
+			set_memory_rw);
 	}
 }
 
-- 
cgit v1.2.3


From 23a6c484047bb8ac50e6d5bb718f8d178a4bf32e Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 11 May 2011 11:07:51 +0000
Subject: powerpc/pseries/iommu: Use correct return type in
 dupe_ddw_if_already_created

Otherwise we get silent truncations.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Milton Miller <miltonm@bga.com>
Cc: linuxppc-dev@ozlabs.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 48eec3b87026..44d47ac552a9 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -693,7 +693,7 @@ static void remove_ddw(struct device_node *np)
 }
 
 
-static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+static u64 dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
 {
 	struct device_node *dn;
 	struct pci_dn *pcidn;
-- 
cgit v1.2.3


From 64ac822fb4554fb516bce123a38b35e04e41fff5 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 11 May 2011 12:24:57 +0000
Subject: powerpc/pseries/iommu: Add additional checks when changing iommu mask

Do not check dma supported until we have chosen the right dma ops.
Check that the device is pci before treating it as such.

Check the mask is supported by the selected dma ops before
committing it.

We only need to set iommu ops if it is not the current ops; this
avoids searching the tree for the iommu table unnecessarily.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 44d47ac552a9..05c101e7dcd7 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1026,9 +1026,12 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 	const void *dma_window = NULL;
 	u64 dma_offset;
 
-	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+	if (!dev->dma_mask)
 		return -EIO;
 
+	if (!dev_is_pci(dev))
+		goto check_mask;
+
 	pdev = to_pci_dev(dev);
 
 	/* only attempt to use a new window if 64-bit DMA is requested */
@@ -1059,13 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
 		}
 	}
 
-	/* fall-through to iommu ops */
-	if (!ddw_enabled) {
-		dev_info(dev, "Using 32-bit DMA via iommu\n");
+	/* fall back on iommu ops, restore table pointer with ops */
+	if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
+		dev_info(dev, "Restoring 32-bit DMA via iommu\n");
 		set_dma_ops(dev, &dma_iommu_ops);
 		pci_dma_dev_setup_pSeriesLP(pdev);
 	}
 
+check_mask:
+	if (!dma_supported(dev, dma_mask))
+		return -EIO;
+
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
-- 
cgit v1.2.3


From 2573f6842201a00f139237e4b42ab16711b582af Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 11 May 2011 12:24:58 +0000
Subject: powerpc/pseries/iommu: Remove ddw property when destroying window

If we destroy the window, we need to remove the property recording that
we setup the window.  Otherwise the next kernel we kexec will be
confused.

Also we should remove the property if even if we don't find the
ibm,ddw-applicable window or if one of the property sizes is unexpected;
presumably these came from a prior kernel via kexec, and we will not be
maintaining the window with respect to memory hotplug.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 05c101e7dcd7..a0421ac46d4e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -665,9 +665,12 @@ static void remove_ddw(struct device_node *np)
 
 	ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
 	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
-	if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+	if (!win64)
 		return;
 
+	if (!ddr_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+		goto delprop;
+
 	dwp = win64->value;
 	liobn = (u64)be32_to_cpu(dwp->liobn);
 
@@ -690,8 +693,13 @@ static void remove_ddw(struct device_node *np)
 		pr_debug("%s: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
 			np->full_name, ret, ddr_avail[2], liobn);
-}
 
+delprop:
+	ret = of_remove_property(np, win64);
+	if (ret)
+		pr_warning("%s: failed to remove direct window property: %d\n"
+			np->full_name, ret);
+}
 
 static u64 dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
 {
-- 
cgit v1.2.3


From c85667802bb5093c4054f8a887a90dd0acf82d3e Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 11 May 2011 12:24:59 +0000
Subject: powerpc/pseries/iommu: Find windows after kexec during boot

Move the discovery of windows previously setup from when the pci driver
calls set_dma_mask to an arch_initcall.

When kexecing into a kernel with dynamic dma windows allocated, we need
to find the windows early so that memory hot remove will be able to
delete the tces mapping the to be removed memory and memory hotplug add
will map the new memory into the window.  We should not wait for the
driver to be loaded and the device to be probed.  The iommu init hooks
are before kmalloc is setup, so defer to arch_initcall.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 52 ++++++++++++++++------------------
 1 file changed, 24 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a0421ac46d4e..a48f12644239 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -695,9 +695,9 @@ static void remove_ddw(struct device_node *np)
 			np->full_name, ret, ddr_avail[2], liobn);
 
 delprop:
-	ret = of_remove_property(np, win64);
+	ret = prom_remove_property(np, win64);
 	if (ret)
-		pr_warning("%s: failed to remove direct window property: %d\n"
+		pr_warning("%s: failed to remove direct window property: %d\n",
 			np->full_name, ret);
 }
 
@@ -725,38 +725,38 @@ static u64 dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *
 	return dma_addr;
 }
 
-static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+static int find_existing_ddw_windows(void)
 {
-	struct device_node *dn;
-	struct pci_dn *pcidn;
 	int len;
+	struct device_node *pdn;
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
-	u64 dma_addr = 0;
 
-	dn = pci_device_to_OF_node(dev);
-	pcidn = PCI_DN(dn);
-	direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
-	if (direct64) {
-		if (len < sizeof(struct dynamic_dma_window_prop)) {
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
+		direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+		if (!direct64)
+			continue;
+
+		window = kzalloc(sizeof(*window), GFP_KERNEL);
+		if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+			kfree(window);
 			remove_ddw(pdn);
-		} else {
-			window = kzalloc(sizeof(*window), GFP_KERNEL);
-			if (!window) {
-				remove_ddw(pdn);
-			} else {
-				window->device = pdn;
-				window->prop = direct64;
-				spin_lock(&direct_window_list_lock);
-				list_add(&window->list, &direct_window_list);
-				spin_unlock(&direct_window_list_lock);
-				dma_addr = direct64->dma_base;
-			}
+			continue;
 		}
+
+		window->device = pdn;
+		window->prop = direct64;
+		spin_lock(&direct_window_list_lock);
+		list_add(&window->list, &direct_window_list);
+		spin_unlock(&direct_window_list_lock);
 	}
 
-	return dma_addr;
+	return 0;
 }
+machine_arch_initcall(pseries, find_existing_ddw_windows);
 
 static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 			struct ddw_query_response *query)
@@ -854,10 +854,6 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	if (dma_addr != 0)
 		goto out_unlock;
 
-	dma_addr = dupe_ddw_if_kexec(dev, pdn);
-	if (dma_addr != 0)
-		goto out_unlock;
-
 	/*
 	 * the ibm,ddw-applicable property holds the tokens for:
 	 * ibm,query-pe-dma-window
-- 
cgit v1.2.3


From b73a635f348610304eee543d733a6277f67ba178 Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Wed, 11 May 2011 12:25:00 +0000
Subject: powerpc/pseries/iommu: Cleanup ddw naming

When using a property refering to the availibily of dynamic dma windows
call it ddw_avail not ddr_avail.

dupe_ddw_if_already_created does not dupilcate anything, it only finds
and reuses the windows we already created, so rename it to
find_existing_ddw.  Also, it does not need the pci device node, so
remove that argument.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/iommu.c | 42 +++++++++++++++-------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a48f12644239..01faab9456ca 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -659,16 +659,16 @@ static void remove_ddw(struct device_node *np)
 {
 	struct dynamic_dma_window_prop *dwp;
 	struct property *win64;
-	const u32 *ddr_avail;
+	const u32 *ddw_avail;
 	u64 liobn;
 	int len, ret;
 
-	ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+	ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
 	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
 	if (!win64)
 		return;
 
-	if (!ddr_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+	if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
 		goto delprop;
 
 	dwp = win64->value;
@@ -684,15 +684,15 @@ static void remove_ddw(struct device_node *np)
 		pr_debug("%s successfully cleared tces in window.\n",
 			 np->full_name);
 
-	ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+	ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
 	if (ret)
 		pr_warning("%s: failed to remove direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddr_avail[2], liobn);
+			np->full_name, ret, ddw_avail[2], liobn);
 	else
 		pr_debug("%s: successfully removed direct window: rtas returned "
 			"%d to ibm,remove-pe-dma-window(%x) %llx\n",
-			np->full_name, ret, ddr_avail[2], liobn);
+			np->full_name, ret, ddw_avail[2], liobn);
 
 delprop:
 	ret = prom_remove_property(np, win64);
@@ -701,16 +701,12 @@ delprop:
 			np->full_name, ret);
 }
 
-static u64 dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn)
 {
-	struct device_node *dn;
-	struct pci_dn *pcidn;
 	struct direct_window *window;
 	const struct dynamic_dma_window_prop *direct64;
 	u64 dma_addr = 0;
 
-	dn = pci_device_to_OF_node(dev);
-	pcidn = PCI_DN(dn);
 	spin_lock(&direct_window_list_lock);
 	/* check if we already created a window and dupe that config if so */
 	list_for_each_entry(window, &direct_window_list, list) {
@@ -758,7 +754,7 @@ static int find_existing_ddw_windows(void)
 }
 machine_arch_initcall(pseries, find_existing_ddw_windows);
 
-static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 			struct ddw_query_response *query)
 {
 	struct device_node *dn;
@@ -779,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 	if (pcidn->eeh_pe_config_addr)
 		cfg_addr = pcidn->eeh_pe_config_addr;
 	buid = pcidn->phb->buid;
-	ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
+	ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
 		  cfg_addr, BUID_HI(buid), BUID_LO(buid));
 	dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
-		" returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+		" returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
 		BUID_LO(buid), ret);
 	return ret;
 }
 
-static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
 			struct ddw_create_response *create, int page_shift,
 			int window_shift)
 {
@@ -812,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
 
 	do {
 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
-		ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
+		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr,
 				BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
 	} while (rtas_busy_delay(ret));
 	dev_info(&dev->dev,
 		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
-		"(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+		"(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
 		 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
 		 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
 
@@ -843,14 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	int page_shift;
 	u64 dma_addr, max_addr;
 	struct device_node *dn;
-	const u32 *uninitialized_var(ddr_avail);
+	const u32 *uninitialized_var(ddw_avail);
 	struct direct_window *window;
 	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
 
 	mutex_lock(&direct_window_init_mutex);
 
-	dma_addr = dupe_ddw_if_already_created(dev, pdn);
+	dma_addr = find_existing_ddw(pdn);
 	if (dma_addr != 0)
 		goto out_unlock;
 
@@ -862,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * for the given node in that order.
 	 * the property is actually in the parent, not the PE
 	 */
-	ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
-	if (!ddr_avail || len < 3 * sizeof(u32))
+	ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+	if (!ddw_avail || len < 3 * sizeof(u32))
 		goto out_unlock;
 
        /*
@@ -873,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * of page sizes: supported and supported for migrate-dma.
 	 */
 	dn = pci_device_to_OF_node(dev);
-	ret = query_ddw(dev, ddr_avail, &query);
+	ret = query_ddw(dev, ddw_avail, &query);
 	if (ret != 0)
 		goto out_unlock;
 
@@ -922,7 +918,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		goto out_free_prop;
 	}
 
-	ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
+	ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
 	if (ret != 0)
 		goto out_free_prop;
 
-- 
cgit v1.2.3


From 03bf469add176afd8a1a4c493d9f4e0e520db12b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 11 May 2011 20:58:18 +0000
Subject: powerpc: Make early memory scan more resilient to out of order nodes

We keep track of the size of the lowest block of memory and call
setup_initial_memory_limit() only after we've parsed them all

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Milton Miller <miltonm@bga.com>
---
 arch/powerpc/kernel/prom.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 5f5e6aed2b70..5311a26dcf46 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -68,6 +68,7 @@ int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
 u64 ppc64_rma_size;
 #endif
+static phys_addr_t first_memblock_size;
 
 static int __init early_parse_mem(char *p)
 {
@@ -505,11 +506,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 			size = 0x80000000ul - base;
 	}
 #endif
-
-	/* First MEMBLOCK added, do some special initializations */
-	if (memstart_addr == ~(phys_addr_t)0)
-		setup_initial_memory_limit(base, size);
-	memstart_addr = min((u64)memstart_addr, base);
+	/* Keep track of the beginning of memory -and- the size of
+	 * the very first block in the device-tree as it represents
+	 * the RMA on ppc64 server
+	 */
+	if (base < memstart_addr) {
+		memstart_addr = base;
+		first_memblock_size = size;
+	}
 
 	/* Add the chunk to the MEMBLOCK list */
 	memblock_add(base, size);
@@ -694,6 +698,7 @@ void __init early_init_devtree(void *params)
 
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+	setup_initial_memory_limit(memstart_addr, first_memblock_size);
 
 	/* Save command line for /proc/cmdline and then parse parameters */
 	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
-- 
cgit v1.2.3


From 2c78027a62ea38585da1ff944afdc6146335cb7c Mon Sep 17 00:00:00 2001
From: Gabriel Paubert <paubert@iram.es>
Date: Fri, 13 May 2011 01:03:13 +0000
Subject: powerpc: Fix for Pegasos keyboard and mouse

[See http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-October/086424.html
and followups. Part of the commit message is directly copied from that.]

Commit 540c6c392f01887dcc96bef0a41e63e6c1334f01 tries to find i8042 IRQs in
the device-tree but doesn't fall back to the old hardcoded 1 and 12 in all
failure cases.

Specifically, the case where the device-tree contains nothing matching
pnpPNP,303 or pnpPNP,f03 doesn't seem to be handled well. It sort of falls
through to the old code, but leaves the IRQs set to 0.

Signed-off-by: Gabriel Paubert <paubert@iram.es>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/setup-common.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index ef33a084fcf4..79fca2651b65 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -602,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port)
 		 * name instead */
 		if (!np)
 			np = of_find_node_by_name(NULL, "8042");
+		if (np) {
+			of_i8042_kbd_irq = 1;
+			of_i8042_aux_irq = 12;
+		}
 		break;
 	case FDC_BASE: /* FDC1 */
 		np = of_find_node_by_type(NULL, "fdc");
-- 
cgit v1.2.3


From f38aa708776aefd9e3ba7ec1211c07efe9fa3227 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <sebastian@breakpoint.cc>
Date: Mon, 16 May 2011 08:58:13 +0000
Subject: powerpc: Remove last piece of GEMINI

It seems that Adrian is getting old. He removed almost everything of
GEMINI in commit c53653130 ("[POWERPC] Remove the broken Gemini
support") except this piece.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/head_32.S | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 98c4b29a56f4..ba250d505e07 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -805,19 +805,6 @@ _ENTRY(copy_and_flush)
 	blr
 
 #ifdef CONFIG_SMP
-#ifdef CONFIG_GEMINI
-	.globl	__secondary_start_gemini
-__secondary_start_gemini:
-        mfspr   r4,SPRN_HID0
-        ori     r4,r4,HID0_ICFI
-        li      r3,0
-        ori     r3,r3,HID0_ICE
-        andc    r4,r4,r3
-        mtspr   SPRN_HID0,r4
-        sync
-        b       __secondary_start
-#endif /* CONFIG_GEMINI */
-
 	.globl __secondary_start_mpc86xx
 __secondary_start_mpc86xx:
 	mfspr	r3, SPRN_PIR
-- 
cgit v1.2.3


From a7117c6bddcbfff2fa237a14a853b32cb94bf59a Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Wed, 11 May 2011 12:04:58 +0530
Subject: MIPS: Netlogic XLR/XLS processor IDs.

Add Netlogic Microsystems company ID and processor IDs for XLR
and XLS processors for CPU probe. Add CPU_XLR to cpu_type_enum.

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
Cc:     linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2367/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/cpu.h  | 27 +++++++++++++++++++++
 arch/mips/kernel/cpu-probe.c | 56 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 86877539c6e8..34c0d3cb116f 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -33,6 +33,7 @@
 #define PRID_COMP_TOSHIBA	0x070000
 #define PRID_COMP_LSI		0x080000
 #define PRID_COMP_LEXRA		0x0b0000
+#define PRID_COMP_NETLOGIC	0x0c0000
 #define PRID_COMP_CAVIUM	0x0d0000
 #define PRID_COMP_INGENIC	0xd00000
 
@@ -141,6 +142,31 @@
 
 #define PRID_IMP_JZRISC        0x0200
 
+/*
+ * These are the PRID's for when 23:16 == PRID_COMP_NETLOGIC
+ */
+#define PRID_IMP_NETLOGIC_XLR732	0x0000
+#define PRID_IMP_NETLOGIC_XLR716	0x0200
+#define PRID_IMP_NETLOGIC_XLR532	0x0900
+#define PRID_IMP_NETLOGIC_XLR308	0x0600
+#define PRID_IMP_NETLOGIC_XLR532C	0x0800
+#define PRID_IMP_NETLOGIC_XLR516C	0x0a00
+#define PRID_IMP_NETLOGIC_XLR508C	0x0b00
+#define PRID_IMP_NETLOGIC_XLR308C	0x0f00
+#define PRID_IMP_NETLOGIC_XLS608	0x8000
+#define PRID_IMP_NETLOGIC_XLS408	0x8800
+#define PRID_IMP_NETLOGIC_XLS404	0x8c00
+#define PRID_IMP_NETLOGIC_XLS208	0x8e00
+#define PRID_IMP_NETLOGIC_XLS204	0x8f00
+#define PRID_IMP_NETLOGIC_XLS108	0xce00
+#define PRID_IMP_NETLOGIC_XLS104	0xcf00
+#define PRID_IMP_NETLOGIC_XLS616B	0x4000
+#define PRID_IMP_NETLOGIC_XLS608B	0x4a00
+#define PRID_IMP_NETLOGIC_XLS416B	0x4400
+#define PRID_IMP_NETLOGIC_XLS412B	0x4c00
+#define PRID_IMP_NETLOGIC_XLS408B	0x4e00
+#define PRID_IMP_NETLOGIC_XLS404B	0x4f00
+
 /*
  * Definitions for 7:0 on legacy processors
  */
@@ -234,6 +260,7 @@ enum cpu_type_enum {
 	 */
 	CPU_5KC, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
 	CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,
+	CPU_XLR,
 
 	CPU_LAST
 };
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index f65d4c8c65a6..c7b7eb24e277 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -988,6 +988,59 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
 	}
 }
 
+static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
+{
+	decode_configs(c);
+
+	c->options = (MIPS_CPU_TLB       |
+			MIPS_CPU_4KEX    |
+			MIPS_CPU_COUNTER |
+			MIPS_CPU_DIVEC   |
+			MIPS_CPU_WATCH   |
+			MIPS_CPU_EJTAG   |
+			MIPS_CPU_LLSC);
+
+	switch (c->processor_id & 0xff00) {
+	case PRID_IMP_NETLOGIC_XLR732:
+	case PRID_IMP_NETLOGIC_XLR716:
+	case PRID_IMP_NETLOGIC_XLR532:
+	case PRID_IMP_NETLOGIC_XLR308:
+	case PRID_IMP_NETLOGIC_XLR532C:
+	case PRID_IMP_NETLOGIC_XLR516C:
+	case PRID_IMP_NETLOGIC_XLR508C:
+	case PRID_IMP_NETLOGIC_XLR308C:
+		c->cputype = CPU_XLR;
+		__cpu_name[cpu] = "Netlogic XLR";
+		break;
+
+	case PRID_IMP_NETLOGIC_XLS608:
+	case PRID_IMP_NETLOGIC_XLS408:
+	case PRID_IMP_NETLOGIC_XLS404:
+	case PRID_IMP_NETLOGIC_XLS208:
+	case PRID_IMP_NETLOGIC_XLS204:
+	case PRID_IMP_NETLOGIC_XLS108:
+	case PRID_IMP_NETLOGIC_XLS104:
+	case PRID_IMP_NETLOGIC_XLS616B:
+	case PRID_IMP_NETLOGIC_XLS608B:
+	case PRID_IMP_NETLOGIC_XLS416B:
+	case PRID_IMP_NETLOGIC_XLS412B:
+	case PRID_IMP_NETLOGIC_XLS408B:
+	case PRID_IMP_NETLOGIC_XLS404B:
+		c->cputype = CPU_XLR;
+		__cpu_name[cpu] = "Netlogic XLS";
+		break;
+
+	default:
+		printk(KERN_INFO "Unknown Netlogic chip id [%02x]!\n",
+		       c->processor_id);
+		c->cputype = CPU_XLR;
+		break;
+	}
+
+	c->isa_level = MIPS_CPU_ISA_M64R1;
+	c->tlbsize = ((read_c0_config1() >> 25) & 0x3f) + 1;
+}
+
 #ifdef CONFIG_64BIT
 /* For use by uaccess.h */
 u64 __ua_limit;
@@ -1035,6 +1088,9 @@ __cpuinit void cpu_probe(void)
 	case PRID_COMP_INGENIC:
 		cpu_probe_ingenic(c, cpu);
 		break;
+	case PRID_COMP_NETLOGIC:
+		cpu_probe_netlogic(c, cpu);
+		break;
 	}
 
 	BUG_ON(!__cpu_name[cpu]);
-- 
cgit v1.2.3


From 3c595a515dbb61ae96e8f5607d895820aa06e870 Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Sat, 7 May 2011 01:36:05 +0530
Subject: MIPS: Netlogic: mach-netlogic include files

Add war.h and irq.h with XLR/XLS definitions.

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2331/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../asm/mach-netlogic/cpu-feature-overrides.h      | 47 ++++++++++++++++++++++
 arch/mips/include/asm/mach-netlogic/irq.h          | 14 +++++++
 arch/mips/include/asm/mach-netlogic/war.h          | 26 ++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
 create mode 100644 arch/mips/include/asm/mach-netlogic/irq.h
 create mode 100644 arch/mips/include/asm/mach-netlogic/war.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
new file mode 100644
index 000000000000..3b728275b9b0
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h
@@ -0,0 +1,47 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems
+ * Copyright (C) 2003 Ralf Baechle
+ */
+#ifndef __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_4kex		1
+#define cpu_has_4k_cache	1
+#define cpu_has_watch		1
+#define cpu_has_mips16		0
+#define cpu_has_counter		1
+#define cpu_has_divec		1
+#define cpu_has_vce		0
+#define cpu_has_cache_cdex_p	0
+#define cpu_has_cache_cdex_s	0
+#define cpu_has_prefetch	1
+#define cpu_has_mcheck		1
+#define cpu_has_ejtag		1
+
+#define cpu_has_llsc		1
+#define cpu_has_vtag_icache	0
+#define cpu_has_dc_aliases	0
+#define cpu_has_ic_fills_f_dc	0
+#define cpu_has_dsp		0
+#define cpu_has_mipsmt		0
+#define cpu_has_userlocal	0
+#define cpu_icache_snoops_remote_store	0
+
+#define cpu_has_nofpuex		0
+#define cpu_has_64bits		1
+
+#define cpu_has_mips32r1	1
+#define cpu_has_mips32r2	0
+#define cpu_has_mips64r1	1
+#define cpu_has_mips64r2	0
+
+#define cpu_has_inclusive_pcaches	0
+
+#define cpu_dcache_line_size()	32
+#define cpu_icache_line_size()	32
+
+#endif /* __ASM_MACH_NETLOGIC_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-netlogic/irq.h b/arch/mips/include/asm/mach-netlogic/irq.h
new file mode 100644
index 000000000000..b5902458e7c1
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/irq.h
@@ -0,0 +1,14 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems.
+ */
+#ifndef __ASM_NETLOGIC_IRQ_H
+#define __ASM_NETLOGIC_IRQ_H
+
+#define NR_IRQS			64
+#define MIPS_CPU_IRQ_BASE	0
+
+#endif /* __ASM_NETLOGIC_IRQ_H */
diff --git a/arch/mips/include/asm/mach-netlogic/war.h b/arch/mips/include/asm/mach-netlogic/war.h
new file mode 100644
index 000000000000..22da89327352
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/war.h
@@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Netlogic Microsystems.
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ */
+#ifndef __ASM_MIPS_MACH_NLM_WAR_H
+#define __ASM_MIPS_MACH_NLM_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define RM9000_CDEX_SMP_WAR		0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+#define R10000_LLSC_WAR			0
+#define MIPS34K_MISSED_ITLB_WAR		0
+
+#endif /* __ASM_MIPS_MACH_NLM_WAR_H */
-- 
cgit v1.2.3


From efa0f81c11021c95b1e72c65868115b6fb4ecc6a Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Sat, 7 May 2011 01:36:21 +0530
Subject: MIPS: Netlogic: Cache, TLB support and feature overrides for XLR

CPU_XLR case added to mm/tlbex.c
CPU_XLR case added to mm/c-r4k.c for PINDEX attribute
Feature overrides for XLR cpu.

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2333/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/module.h | 2 ++
 arch/mips/mm/c-r4k.c           | 1 +
 arch/mips/mm/tlbex.c           | 1 +
 3 files changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index d94085a3eafb..bc01a02cacd8 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -118,6 +118,8 @@ search_module_dbetables(unsigned long addr)
 #define MODULE_PROC_FAMILY "LOONGSON2 "
 #elif defined CONFIG_CPU_CAVIUM_OCTEON
 #define MODULE_PROC_FAMILY "OCTEON "
+#elif defined CONFIG_CPU_XLR
+#define MODULE_PROC_FAMILY "XLR "
 #else
 #error MODULE_PROC_FAMILY undefined for your processor configuration
 #endif
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 71bddf8f7d25..d9bc5d3593b6 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1006,6 +1006,7 @@ static void __cpuinit probe_pcache(void)
 	case CPU_25KF:
 	case CPU_SB1:
 	case CPU_SB1A:
+	case CPU_XLR:
 		c->dcache.flags |= MIPS_CACHE_PINDEX;
 		break;
 
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index f5734c2c8097..424ed4b92e6d 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -404,6 +404,7 @@ static void __cpuinit build_tlb_write_entry(u32 **p, struct uasm_label **l,
 	case CPU_5KC:
 	case CPU_TX49XX:
 	case CPU_PR4450:
+	case CPU_XLR:
 		uasm_i_nop(p);
 		tlbw(p);
 		break;
-- 
cgit v1.2.3


From 5c642506740ecbf20fb7a9e482287e4e5c639e5c Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Sat, 7 May 2011 01:36:40 +0530
Subject: MIPS: Platform files for XLR/XLS processor support

* include/asm/netlogic added with files common for all Netlogic processors
  (common with XLP which will be added later)
* include/asm/netlogic/xlr for XLR/XLS chip specific files
* netlogic/xlr for XLR/XLS platform files

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2334/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/netlogic/interrupt.h    |  45 +++++
 arch/mips/include/asm/netlogic/mips-extns.h   |  76 +++++++++
 arch/mips/include/asm/netlogic/psb-bootinfo.h | 109 ++++++++++++
 arch/mips/include/asm/netlogic/xlr/gpio.h     |  73 ++++++++
 arch/mips/include/asm/netlogic/xlr/iomap.h    | 131 +++++++++++++++
 arch/mips/include/asm/netlogic/xlr/pic.h      | 231 ++++++++++++++++++++++++++
 arch/mips/include/asm/netlogic/xlr/xlr.h      |  54 ++++++
 arch/mips/netlogic/xlr/irq.c                  | 216 ++++++++++++++++++++++++
 arch/mips/netlogic/xlr/platform.c             |  98 +++++++++++
 arch/mips/netlogic/xlr/setup.c                | 188 +++++++++++++++++++++
 arch/mips/netlogic/xlr/smp.c                  | 225 +++++++++++++++++++++++++
 arch/mips/netlogic/xlr/smpboot.S              |  94 +++++++++++
 arch/mips/netlogic/xlr/time.c                 |  51 ++++++
 arch/mips/netlogic/xlr/xlr_console.c          |  46 +++++
 14 files changed, 1637 insertions(+)
 create mode 100644 arch/mips/include/asm/netlogic/interrupt.h
 create mode 100644 arch/mips/include/asm/netlogic/mips-extns.h
 create mode 100644 arch/mips/include/asm/netlogic/psb-bootinfo.h
 create mode 100644 arch/mips/include/asm/netlogic/xlr/gpio.h
 create mode 100644 arch/mips/include/asm/netlogic/xlr/iomap.h
 create mode 100644 arch/mips/include/asm/netlogic/xlr/pic.h
 create mode 100644 arch/mips/include/asm/netlogic/xlr/xlr.h
 create mode 100644 arch/mips/netlogic/xlr/irq.c
 create mode 100644 arch/mips/netlogic/xlr/platform.c
 create mode 100644 arch/mips/netlogic/xlr/setup.c
 create mode 100644 arch/mips/netlogic/xlr/smp.c
 create mode 100644 arch/mips/netlogic/xlr/smpboot.S
 create mode 100644 arch/mips/netlogic/xlr/time.c
 create mode 100644 arch/mips/netlogic/xlr/xlr_console.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/netlogic/interrupt.h b/arch/mips/include/asm/netlogic/interrupt.h
new file mode 100644
index 000000000000..a85aadb6cfd7
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/interrupt.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_INTERRUPT_H
+#define _ASM_NLM_INTERRUPT_H
+
+/* Defines for the IRQ numbers */
+
+#define IRQ_IPI_SMP_FUNCTION	3
+#define IRQ_IPI_SMP_RESCHEDULE	4
+#define IRQ_MSGRING		6
+#define IRQ_TIMER		7
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h
new file mode 100644
index 000000000000..8c53d0ba4bf2
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/mips-extns.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_MIPS_EXTS_H
+#define _ASM_NLM_MIPS_EXTS_H
+
+/*
+ * XLR and XLP interrupt request and interrupt mask registers
+ */
+#define read_c0_eirr()		__read_64bit_c0_register($9, 6)
+#define read_c0_eimr()		__read_64bit_c0_register($9, 7)
+#define write_c0_eirr(val)	__write_64bit_c0_register($9, 6, val)
+
+/*
+ * Writing EIMR in 32 bit is a special case, the lower 8 bit of the
+ * EIMR is shadowed in the status register, so we cannot save and
+ * restore status register for split read.
+ */
+#define write_c0_eimr(val)						\
+do {									\
+	if (sizeof(unsigned long) == 4)	{				\
+		unsigned long __flags;					\
+									\
+		local_irq_save(__flags);				\
+		__asm__ __volatile__(					\
+			".set\tmips64\n\t"				\
+			"dsll\t%L0, %L0, 32\n\t"			\
+			"dsrl\t%L0, %L0, 32\n\t"			\
+			"dsll\t%M0, %M0, 32\n\t"			\
+			"or\t%L0, %L0, %M0\n\t"				\
+			"dmtc0\t%L0, $9, 7\n\t"				\
+			".set\tmips0"					\
+			: : "r" (val));					\
+		__flags = (__flags & 0xffff00ff) | (((val) & 0xff) << 8);\
+		local_irq_restore(__flags);				\
+	} else								\
+		__write_64bit_c0_register($9, 7, (val));		\
+} while (0)
+
+static inline int hard_smp_processor_id(void)
+{
+	return __read_32bit_c0_register($15, 1) & 0x3ff;
+}
+
+#endif /*_ASM_NLM_MIPS_EXTS_H */
diff --git a/arch/mips/include/asm/netlogic/psb-bootinfo.h b/arch/mips/include/asm/netlogic/psb-bootinfo.h
new file mode 100644
index 000000000000..6878307f0ee6
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/psb-bootinfo.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NETLOGIC_BOOTINFO_H
+#define _ASM_NETLOGIC_BOOTINFO_H
+
+struct psb_info {
+	uint64_t boot_level;
+	uint64_t io_base;
+	uint64_t output_device;
+	uint64_t uart_print;
+	uint64_t led_output;
+	uint64_t init;
+	uint64_t exit;
+	uint64_t warm_reset;
+	uint64_t wakeup;
+	uint64_t online_cpu_map;
+	uint64_t master_reentry_sp;
+	uint64_t master_reentry_gp;
+	uint64_t master_reentry_fn;
+	uint64_t slave_reentry_fn;
+	uint64_t magic_dword;
+	uint64_t uart_putchar;
+	uint64_t size;
+	uint64_t uart_getchar;
+	uint64_t nmi_handler;
+	uint64_t psb_version;
+	uint64_t mac_addr;
+	uint64_t cpu_frequency;
+	uint64_t board_version;
+	uint64_t malloc;
+	uint64_t free;
+	uint64_t global_shmem_addr;
+	uint64_t global_shmem_size;
+	uint64_t psb_os_cpu_map;
+	uint64_t userapp_cpu_map;
+	uint64_t wakeup_os;
+	uint64_t psb_mem_map;
+	uint64_t board_major_version;
+	uint64_t board_minor_version;
+	uint64_t board_manf_revision;
+	uint64_t board_serial_number;
+	uint64_t psb_physaddr_map;
+	uint64_t xlr_loaderip_config;
+	uint64_t bldr_envp;
+	uint64_t avail_mem_map;
+};
+
+enum {
+	NETLOGIC_IO_SPACE = 0x10,
+	PCIX_IO_SPACE,
+	PCIX_CFG_SPACE,
+	PCIX_MEMORY_SPACE,
+	HT_IO_SPACE,
+	HT_CFG_SPACE,
+	HT_MEMORY_SPACE,
+	SRAM_SPACE,
+	FLASH_CONTROLLER_SPACE
+};
+
+#define NLM_MAX_ARGS	64
+#define NLM_MAX_ENVS	32
+
+/* This is what netlboot passes and linux boot_mem_map is subtly different */
+#define NLM_BOOT_MEM_MAP_MAX	32
+struct nlm_boot_mem_map {
+	int nr_map;
+	struct nlm_boot_mem_map_entry {
+		uint64_t addr;		/* start of memory segment */
+		uint64_t size;		/* size of memory segment */
+		uint32_t type;		/* type of memory segment */
+	} map[NLM_BOOT_MEM_MAP_MAX];
+};
+
+/* Pointer to saved boot loader info */
+extern struct psb_info nlm_prom_info;
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/gpio.h b/arch/mips/include/asm/netlogic/xlr/gpio.h
new file mode 100644
index 000000000000..51f6ad4aeb14
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/gpio.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_GPIO_H
+#define _ASM_NLM_GPIO_H
+
+#define NETLOGIC_GPIO_INT_EN_REG		0
+#define NETLOGIC_GPIO_INPUT_INVERSION_REG	1
+#define NETLOGIC_GPIO_IO_DIR_REG		2
+#define NETLOGIC_GPIO_IO_DATA_WR_REG		3
+#define NETLOGIC_GPIO_IO_DATA_RD_REG		4
+
+#define NETLOGIC_GPIO_SWRESET_REG		8
+#define NETLOGIC_GPIO_DRAM1_CNTRL_REG		9
+#define NETLOGIC_GPIO_DRAM1_RATIO_REG		10
+#define NETLOGIC_GPIO_DRAM1_RESET_REG		11
+#define NETLOGIC_GPIO_DRAM1_STATUS_REG		12
+#define NETLOGIC_GPIO_DRAM2_CNTRL_REG		13
+#define NETLOGIC_GPIO_DRAM2_RATIO_REG		14
+#define NETLOGIC_GPIO_DRAM2_RESET_REG		15
+#define NETLOGIC_GPIO_DRAM2_STATUS_REG		16
+
+#define NETLOGIC_GPIO_PWRON_RESET_CFG_REG	21
+#define NETLOGIC_GPIO_BIST_ALL_GO_STATUS_REG	24
+#define NETLOGIC_GPIO_BIST_CPU_GO_STATUS_REG	25
+#define NETLOGIC_GPIO_BIST_DEV_GO_STATUS_REG	26
+
+#define NETLOGIC_GPIO_FUSE_BANK_REG		35
+#define NETLOGIC_GPIO_CPU_RESET_REG		40
+#define NETLOGIC_GPIO_RNG_REG			43
+
+#define NETLOGIC_PWRON_RESET_PCMCIA_BOOT	17
+#define NETLOGIC_GPIO_LED_BITMAP	0x1700000
+#define NETLOGIC_GPIO_LED_0_SHIFT		20
+#define NETLOGIC_GPIO_LED_1_SHIFT		24
+
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_RESET	0x01
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_HARD_RESET 0x02
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_SOFT_RESET 0x03
+#define NETLOGIC_GPIO_LED_OUTPUT_CODE_MAIN	0x04
+
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/iomap.h b/arch/mips/include/asm/netlogic/xlr/iomap.h
new file mode 100644
index 000000000000..2e3a4dd53045
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/iomap.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_IOMAP_H
+#define _ASM_NLM_IOMAP_H
+
+#define DEFAULT_NETLOGIC_IO_BASE           CKSEG1ADDR(0x1ef00000)
+#define NETLOGIC_IO_DDR2_CHN0_OFFSET       0x01000
+#define NETLOGIC_IO_DDR2_CHN1_OFFSET       0x02000
+#define NETLOGIC_IO_DDR2_CHN2_OFFSET       0x03000
+#define NETLOGIC_IO_DDR2_CHN3_OFFSET       0x04000
+#define NETLOGIC_IO_PIC_OFFSET             0x08000
+#define NETLOGIC_IO_UART_0_OFFSET          0x14000
+#define NETLOGIC_IO_UART_1_OFFSET          0x15100
+
+#define NETLOGIC_IO_SIZE                   0x1000
+
+#define NETLOGIC_IO_BRIDGE_OFFSET          0x00000
+
+#define NETLOGIC_IO_RLD2_CHN0_OFFSET       0x05000
+#define NETLOGIC_IO_RLD2_CHN1_OFFSET       0x06000
+
+#define NETLOGIC_IO_SRAM_OFFSET            0x07000
+
+#define NETLOGIC_IO_PCIX_OFFSET            0x09000
+#define NETLOGIC_IO_HT_OFFSET              0x0A000
+
+#define NETLOGIC_IO_SECURITY_OFFSET        0x0B000
+
+#define NETLOGIC_IO_GMAC_0_OFFSET          0x0C000
+#define NETLOGIC_IO_GMAC_1_OFFSET          0x0D000
+#define NETLOGIC_IO_GMAC_2_OFFSET          0x0E000
+#define NETLOGIC_IO_GMAC_3_OFFSET          0x0F000
+
+/* XLS devices */
+#define NETLOGIC_IO_GMAC_4_OFFSET          0x20000
+#define NETLOGIC_IO_GMAC_5_OFFSET          0x21000
+#define NETLOGIC_IO_GMAC_6_OFFSET          0x22000
+#define NETLOGIC_IO_GMAC_7_OFFSET          0x23000
+
+#define NETLOGIC_IO_PCIE_0_OFFSET          0x1E000
+#define NETLOGIC_IO_PCIE_1_OFFSET          0x1F000
+#define NETLOGIC_IO_SRIO_0_OFFSET          0x1E000
+#define NETLOGIC_IO_SRIO_1_OFFSET          0x1F000
+
+#define NETLOGIC_IO_USB_0_OFFSET           0x24000
+#define NETLOGIC_IO_USB_1_OFFSET           0x25000
+
+#define NETLOGIC_IO_COMP_OFFSET            0x1D000
+/* end XLS devices */
+
+/* XLR devices */
+#define NETLOGIC_IO_SPI4_0_OFFSET          0x10000
+#define NETLOGIC_IO_XGMAC_0_OFFSET         0x11000
+#define NETLOGIC_IO_SPI4_1_OFFSET          0x12000
+#define NETLOGIC_IO_XGMAC_1_OFFSET         0x13000
+/* end XLR devices */
+
+#define NETLOGIC_IO_I2C_0_OFFSET           0x16000
+#define NETLOGIC_IO_I2C_1_OFFSET           0x17000
+
+#define NETLOGIC_IO_GPIO_OFFSET            0x18000
+#define NETLOGIC_IO_FLASH_OFFSET           0x19000
+#define NETLOGIC_IO_TB_OFFSET              0x1C000
+
+#define NETLOGIC_CPLD_OFFSET               KSEG1ADDR(0x1d840000)
+
+/*
+ * Base Address (Virtual) of the PCI Config address space
+ * For now, choose 256M phys in kseg1 = 0xA0000000 + (1<<28)
+ * Config space spans 256 (num of buses) * 256 (num functions) * 256 bytes
+ * ie 1<<24 = 16M
+ */
+#define DEFAULT_PCI_CONFIG_BASE         0x18000000
+#define DEFAULT_HT_TYPE0_CFG_BASE       0x16000000
+#define DEFAULT_HT_TYPE1_CFG_BASE       0x17000000
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+typedef volatile __u32 nlm_reg_t;
+extern unsigned long netlogic_io_base;
+
+/* FIXME read once in write_reg */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define netlogic_read_reg(base, offset)		((base)[(offset)])
+#define netlogic_write_reg(base, offset, value)	((base)[(offset)] = (value))
+#else
+#define netlogic_read_reg(base, offset)		(be32_to_cpu((base)[(offset)]))
+#define netlogic_write_reg(base, offset, value) \
+				((base)[(offset)] = cpu_to_be32((value)))
+#endif
+
+#define netlogic_read_reg_le32(base, offset) (le32_to_cpu((base)[(offset)]))
+#define netlogic_write_reg_le32(base, offset, value) \
+				((base)[(offset)] = cpu_to_le32((value)))
+#define netlogic_io_mmio(offset) ((nlm_reg_t *)(netlogic_io_base+(offset)))
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/mips/include/asm/netlogic/xlr/pic.h b/arch/mips/include/asm/netlogic/xlr/pic.h
new file mode 100644
index 000000000000..5cceb746f080
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/pic.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_XLR_PIC_H
+#define _ASM_NLM_XLR_PIC_H
+
+#define PIC_CLKS_PER_SEC		66666666ULL
+/* PIC hardware interrupt numbers */
+#define PIC_IRT_WD_INDEX		0
+#define PIC_IRT_TIMER_0_INDEX		1
+#define PIC_IRT_TIMER_1_INDEX		2
+#define PIC_IRT_TIMER_2_INDEX		3
+#define PIC_IRT_TIMER_3_INDEX		4
+#define PIC_IRT_TIMER_4_INDEX		5
+#define PIC_IRT_TIMER_5_INDEX		6
+#define PIC_IRT_TIMER_6_INDEX		7
+#define PIC_IRT_TIMER_7_INDEX		8
+#define PIC_IRT_CLOCK_INDEX		PIC_IRT_TIMER_7_INDEX
+#define PIC_IRT_UART_0_INDEX		9
+#define PIC_IRT_UART_1_INDEX		10
+#define PIC_IRT_I2C_0_INDEX		11
+#define PIC_IRT_I2C_1_INDEX		12
+#define PIC_IRT_PCMCIA_INDEX		13
+#define PIC_IRT_GPIO_INDEX		14
+#define PIC_IRT_HYPER_INDEX		15
+#define PIC_IRT_PCIX_INDEX		16
+/* XLS */
+#define PIC_IRT_CDE_INDEX		15
+#define PIC_IRT_BRIDGE_TB_XLS_INDEX	16
+/* XLS */
+#define PIC_IRT_GMAC0_INDEX		17
+#define PIC_IRT_GMAC1_INDEX		18
+#define PIC_IRT_GMAC2_INDEX		19
+#define PIC_IRT_GMAC3_INDEX		20
+#define PIC_IRT_XGS0_INDEX		21
+#define PIC_IRT_XGS1_INDEX		22
+#define PIC_IRT_HYPER_FATAL_INDEX	23
+#define PIC_IRT_PCIX_FATAL_INDEX	24
+#define PIC_IRT_BRIDGE_AERR_INDEX	25
+#define PIC_IRT_BRIDGE_BERR_INDEX	26
+#define PIC_IRT_BRIDGE_TB_XLR_INDEX	27
+#define PIC_IRT_BRIDGE_AERR_NMI_INDEX	28
+/* XLS */
+#define PIC_IRT_GMAC4_INDEX		21
+#define PIC_IRT_GMAC5_INDEX		22
+#define PIC_IRT_GMAC6_INDEX		23
+#define PIC_IRT_GMAC7_INDEX		24
+#define PIC_IRT_BRIDGE_ERR_INDEX	25
+#define PIC_IRT_PCIE_LINK0_INDEX	26
+#define PIC_IRT_PCIE_LINK1_INDEX	27
+#define PIC_IRT_PCIE_LINK2_INDEX	23
+#define PIC_IRT_PCIE_LINK3_INDEX	24
+#define PIC_IRT_PCIE_XLSB0_LINK2_INDEX	28
+#define PIC_IRT_PCIE_XLSB0_LINK3_INDEX	29
+#define PIC_IRT_SRIO_LINK0_INDEX	26
+#define PIC_IRT_SRIO_LINK1_INDEX	27
+#define PIC_IRT_SRIO_LINK2_INDEX	28
+#define PIC_IRT_SRIO_LINK3_INDEX	29
+#define PIC_IRT_PCIE_INT_INDEX		28
+#define PIC_IRT_PCIE_FATAL_INDEX	29
+#define PIC_IRT_GPIO_B_INDEX		30
+#define PIC_IRT_USB_INDEX		31
+/* XLS */
+#define PIC_NUM_IRTS			32
+
+
+#define PIC_CLOCK_TIMER			7
+
+/* PIC Registers */
+#define PIC_CTRL			0x00
+#define PIC_IPI				0x04
+#define PIC_INT_ACK			0x06
+
+#define WD_MAX_VAL_0			0x08
+#define WD_MAX_VAL_1			0x09
+#define WD_MASK_0			0x0a
+#define WD_MASK_1			0x0b
+#define WD_HEARBEAT_0			0x0c
+#define WD_HEARBEAT_1			0x0d
+
+#define PIC_IRT_0_BASE			0x40
+#define PIC_IRT_1_BASE			0x80
+#define PIC_TIMER_MAXVAL_0_BASE		0x100
+#define PIC_TIMER_MAXVAL_1_BASE		0x110
+#define PIC_TIMER_COUNT_0_BASE		0x120
+#define PIC_TIMER_COUNT_1_BASE		0x130
+
+#define PIC_IRT_0(picintr)      (PIC_IRT_0_BASE + (picintr))
+#define PIC_IRT_1(picintr)	(PIC_IRT_1_BASE + (picintr))
+
+#define PIC_TIMER_MAXVAL_0(i)	(PIC_TIMER_MAXVAL_0_BASE + (i))
+#define PIC_TIMER_MAXVAL_1(i)	(PIC_TIMER_MAXVAL_1_BASE + (i))
+#define PIC_TIMER_COUNT_0(i)	(PIC_TIMER_COUNT_0_BASE + (i))
+#define PIC_TIMER_COUNT_1(i)	(PIC_TIMER_COUNT_0_BASE + (i))
+
+/*
+ * Mapping between hardware interrupt numbers and IRQs on CPU
+ * we use a simple scheme to map PIC interrupts 0-31 to IRQs
+ * 8-39. This leaves the IRQ 0-7 for cpu interrupts like
+ * count/compare and FMN
+ */
+#define PIC_IRQ_BASE            8
+#define PIC_INTR_TO_IRQ(i)      (PIC_IRQ_BASE + (i))
+#define PIC_IRQ_TO_INTR(i)      ((i) - PIC_IRQ_BASE)
+
+#define PIC_IRT_FIRST_IRQ	PIC_IRQ_BASE
+#define PIC_WD_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_WD_INDEX)
+#define PIC_TIMER_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_0_INDEX)
+#define PIC_TIMER_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_1_INDEX)
+#define PIC_TIMER_2_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_2_INDEX)
+#define PIC_TIMER_3_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_3_INDEX)
+#define PIC_TIMER_4_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_4_INDEX)
+#define PIC_TIMER_5_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_5_INDEX)
+#define PIC_TIMER_6_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_6_INDEX)
+#define PIC_TIMER_7_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_TIMER_7_INDEX)
+#define PIC_CLOCK_IRQ		(PIC_TIMER_7_IRQ)
+#define PIC_UART_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_UART_0_INDEX)
+#define PIC_UART_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_UART_1_INDEX)
+#define PIC_I2C_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_I2C_0_INDEX)
+#define PIC_I2C_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_I2C_1_INDEX)
+#define PIC_PCMCIA_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_PCMCIA_INDEX)
+#define PIC_GPIO_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GPIO_INDEX)
+#define PIC_HYPER_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_HYPER_INDEX)
+#define PIC_PCIX_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_PCIX_INDEX)
+/* XLS */
+#define PIC_CDE_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_CDE_INDEX)
+#define PIC_BRIDGE_TB_XLS_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLS_INDEX)
+/* end XLS */
+#define PIC_GMAC_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC0_INDEX)
+#define PIC_GMAC_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC1_INDEX)
+#define PIC_GMAC_2_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC2_INDEX)
+#define PIC_GMAC_3_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC3_INDEX)
+#define PIC_XGS_0_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_XGS0_INDEX)
+#define PIC_XGS_1_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_XGS1_INDEX)
+#define PIC_HYPER_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_HYPER_FATAL_INDEX)
+#define PIC_PCIX_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIX_FATAL_INDEX)
+#define PIC_BRIDGE_AERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_INDEX)
+#define PIC_BRIDGE_BERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_BERR_INDEX)
+#define PIC_BRIDGE_TB_XLR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_TB_XLR_INDEX)
+#define PIC_BRIDGE_AERR_NMI_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_AERR_NMI_INDEX)
+/* XLS defines */
+#define PIC_GMAC_4_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC4_INDEX)
+#define PIC_GMAC_5_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC5_INDEX)
+#define PIC_GMAC_6_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC6_INDEX)
+#define PIC_GMAC_7_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GMAC7_INDEX)
+#define PIC_BRIDGE_ERR_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_BRIDGE_ERR_INDEX)
+#define PIC_PCIE_LINK0_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK0_INDEX)
+#define PIC_PCIE_LINK1_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK1_INDEX)
+#define PIC_PCIE_LINK2_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK2_INDEX)
+#define PIC_PCIE_LINK3_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_LINK3_INDEX)
+#define PIC_PCIE_XLSB0_LINK2_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK2_INDEX)
+#define PIC_PCIE_XLSB0_LINK3_IRQ PIC_INTR_TO_IRQ(PIC_IRT_PCIE_XLSB0_LINK3_INDEX)
+#define PIC_SRIO_LINK0_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK0_INDEX)
+#define PIC_SRIO_LINK1_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK1_INDEX)
+#define PIC_SRIO_LINK2_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK2_INDEX)
+#define PIC_SRIO_LINK3_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_SRIO_LINK3_INDEX)
+#define PIC_PCIE_INT_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_INT__INDEX)
+#define PIC_PCIE_FATAL_IRQ	PIC_INTR_TO_IRQ(PIC_IRT_PCIE_FATAL_INDEX)
+#define PIC_GPIO_B_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_GPIO_B_INDEX)
+#define PIC_USB_IRQ		PIC_INTR_TO_IRQ(PIC_IRT_USB_INDEX)
+#define PIC_IRT_LAST_IRQ	PIC_USB_IRQ
+/* end XLS */
+
+#ifndef __ASSEMBLY__
+static inline void pic_send_ipi(u32 ipi)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_IPI, ipi);
+}
+
+static inline u32 pic_read_control(void)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	return netlogic_read_reg(mmio, PIC_CTRL);
+}
+
+static inline void pic_write_control(u32 control)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_CTRL, control);
+}
+
+static inline void pic_update_control(u32 control)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+
+	netlogic_write_reg(mmio, PIC_CTRL,
+		(control | netlogic_read_reg(mmio, PIC_CTRL)));
+}
+
+#define PIC_IRQ_IS_EDGE_TRIGGERED(irq)	(((irq) >= PIC_TIMER_0_IRQ) && \
+					((irq) <= PIC_TIMER_7_IRQ))
+#define PIC_IRQ_IS_IRT(irq)		(((irq) >= PIC_IRT_FIRST_IRQ) && \
+					((irq) <= PIC_IRT_LAST_IRQ))
+#endif
+
+#endif /* _ASM_NLM_XLR_PIC_H */
diff --git a/arch/mips/include/asm/netlogic/xlr/xlr.h b/arch/mips/include/asm/netlogic/xlr/xlr.h
new file mode 100644
index 000000000000..454c236d6854
--- /dev/null
+++ b/arch/mips/include/asm/netlogic/xlr/xlr.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ASM_NLM_XLR_H
+#define _ASM_NLM_XLR_H
+
+/* Platform UART functions */
+struct uart_port;
+unsigned int nlm_xlr_uart_in(struct uart_port *, int);
+void nlm_xlr_uart_out(struct uart_port *, int, int);
+
+/* SMP support functions */
+void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc);
+void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc);
+int nlm_wakeup_secondary_cpus(u32 wakeup_mask);
+void nlm_smp_irq_init(void);
+void nlm_boot_smp_nmi(void);
+void prom_pre_boot_secondary_cpus(void);
+
+extern struct plat_smp_ops nlm_smp_ops;
+extern unsigned long nlm_common_ebase;
+
+#endif /* _ASM_NLM_XLR_H */
diff --git a/arch/mips/netlogic/xlr/irq.c b/arch/mips/netlogic/xlr/irq.c
new file mode 100644
index 000000000000..2033f5656f68
--- /dev/null
+++ b/arch/mips/netlogic/xlr/irq.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include <asm/mipsregs.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/mips-extns.h>
+
+static u64 nlm_irq_mask;
+static DEFINE_SPINLOCK(nlm_pic_lock);
+
+static void xlr_pic_enable(struct irq_data *d)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	unsigned long flags;
+	nlm_reg_t reg;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE);
+	netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE,
+			  reg | (1 << 6) | (1 << 30) | (1 << 31));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+static void xlr_pic_mask(struct irq_data *d)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	unsigned long flags;
+	nlm_reg_t reg;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	reg = netlogic_read_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE);
+	netlogic_write_reg(mmio, PIC_IRT_1_BASE + irq - PIC_IRQ_BASE,
+			  reg | (1 << 6) | (1 << 30) | (0 << 31));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+static void xlr_pic_ack(struct irq_data *d)
+{
+	unsigned long flags;
+	nlm_reg_t *mmio;
+	int irq = d->irq;
+
+	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	spin_lock_irqsave(&nlm_pic_lock, flags);
+	netlogic_write_reg(mmio, PIC_INT_ACK, (1 << (irq - PIC_IRQ_BASE)));
+	spin_unlock_irqrestore(&nlm_pic_lock, flags);
+}
+
+/*
+ * This chip definition handles interrupts routed thru the XLR
+ * hardware PIC, currently IRQs 8-39 are mapped to hardware intr
+ * 0-31 wired the XLR PIC
+ */
+static struct irq_chip xlr_pic = {
+	.name		= "XLR-PIC",
+	.irq_enable	= xlr_pic_enable,
+	.irq_mask	= xlr_pic_mask,
+	.irq_ack	= xlr_pic_ack,
+};
+
+static void rsvd_irq_handler(struct irq_data *d)
+{
+	WARN(d->irq >= PIC_IRQ_BASE, "Bad irq %d", d->irq);
+}
+
+/*
+ * Chip definition for CPU originated interrupts(timer, msg) and
+ * IPIs
+ */
+struct irq_chip nlm_cpu_intr = {
+	.name		= "XLR-CPU-INTR",
+	.irq_enable	= rsvd_irq_handler,
+	.irq_mask	= rsvd_irq_handler,
+	.irq_ack	= rsvd_irq_handler,
+};
+
+void __init init_xlr_irqs(void)
+{
+	nlm_reg_t *mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
+	uint32_t thread_mask = 1;
+	int level, i;
+
+	pr_info("Interrupt thread mask [%x]\n", thread_mask);
+	for (i = 0; i < PIC_NUM_IRTS; i++) {
+		level = PIC_IRQ_IS_EDGE_TRIGGERED(i);
+
+		/* Bind all PIC irqs to boot cpu */
+		netlogic_write_reg(mmio, PIC_IRT_0_BASE + i, thread_mask);
+
+		/*
+		 * Use local scheduling and high polarity for all IRTs
+		 * Invalidate all IRTs, by default
+		 */
+		netlogic_write_reg(mmio, PIC_IRT_1_BASE + i,
+				(level << 30) | (1 << 6) | (PIC_IRQ_BASE + i));
+	}
+
+	/* Make all IRQs as level triggered by default */
+	for (i = 0; i < NR_IRQS; i++) {
+		if (PIC_IRQ_IS_IRT(i))
+			irq_set_chip_and_handler(i, &xlr_pic, handle_level_irq);
+		else
+			irq_set_chip_and_handler(i, &nlm_cpu_intr,
+						handle_level_irq);
+	}
+#ifdef CONFIG_SMP
+	irq_set_chip_and_handler(IRQ_IPI_SMP_FUNCTION, &nlm_cpu_intr,
+			 nlm_smp_function_ipi_handler);
+	irq_set_chip_and_handler(IRQ_IPI_SMP_RESCHEDULE, &nlm_cpu_intr,
+			 nlm_smp_resched_ipi_handler);
+	nlm_irq_mask |=
+	    ((1ULL << IRQ_IPI_SMP_FUNCTION) | (1ULL << IRQ_IPI_SMP_RESCHEDULE));
+#endif
+	/* unmask all PIC related interrupts. If no handler is installed by the
+	 * drivers, it'll just ack the interrupt and return
+	 */
+	for (i = PIC_IRT_FIRST_IRQ; i <= PIC_IRT_LAST_IRQ; i++)
+		nlm_irq_mask |= (1ULL << i);
+
+	nlm_irq_mask |= (1ULL << IRQ_TIMER);
+}
+
+void __init arch_init_irq(void)
+{
+	/* Initialize the irq descriptors */
+	init_xlr_irqs();
+	write_c0_eimr(nlm_irq_mask);
+}
+
+void __cpuinit nlm_smp_irq_init(void)
+{
+	/* set interrupt mask for non-zero cpus */
+	write_c0_eimr(nlm_irq_mask);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	uint64_t eirr;
+	int i;
+
+	eirr = read_c0_eirr() & read_c0_eimr();
+	if (!eirr)
+		return;
+
+	/* no need of EIRR here, writing compare clears interrupt */
+	if (eirr & (1 << IRQ_TIMER)) {
+		do_IRQ(IRQ_TIMER);
+		return;
+	}
+
+	/* TODO use dcltz: optimize below code */
+	for (i = 63; i != -1; i--) {
+		if (eirr & (1ULL << i))
+			break;
+	}
+	if (i == -1) {
+		pr_err("no interrupt !!\n");
+		return;
+	}
+
+	/* Ack eirr */
+	write_c0_eirr(1ULL << i);
+
+	do_IRQ(i);
+}
diff --git a/arch/mips/netlogic/xlr/platform.c b/arch/mips/netlogic/xlr/platform.c
new file mode 100644
index 000000000000..609ec2534642
--- /dev/null
+++ b/arch/mips/netlogic/xlr/platform.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2011, Netlogic Microsystems.
+ * Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/resource.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+unsigned int nlm_xlr_uart_in(struct uart_port *p, int offset)
+{
+	nlm_reg_t *mmio;
+	unsigned int value;
+
+	/* XLR uart does not need any mapping of regs */
+	mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift));
+	value = netlogic_read_reg(mmio, 0);
+
+	/* See XLR/XLS errata */
+	if (offset == UART_MSR)
+		value ^= 0xF0;
+	else if (offset == UART_MCR)
+		value ^= 0x3;
+
+	return value;
+}
+
+void nlm_xlr_uart_out(struct uart_port *p, int offset, int value)
+{
+	nlm_reg_t *mmio;
+
+	/* XLR uart does not need any mapping of regs */
+	mmio = (nlm_reg_t *)(p->membase + (offset << p->regshift));
+
+	/* See XLR/XLS errata */
+	if (offset == UART_MSR)
+		value ^= 0xF0;
+	else if (offset == UART_MCR)
+		value ^= 0x3;
+
+	netlogic_write_reg(mmio, 0, value);
+}
+
+#define PORT(_irq)					\
+	{						\
+		.irq		= _irq,			\
+		.regshift	= 2,			\
+		.iotype		= UPIO_MEM32,		\
+		.flags		= (UPF_SKIP_TEST |	\
+			 UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF),\
+		.uartclk	= PIC_CLKS_PER_SEC,	\
+		.type		= PORT_16550A,		\
+		.serial_in	= nlm_xlr_uart_in,	\
+		.serial_out	= nlm_xlr_uart_out,	\
+	}
+
+static struct plat_serial8250_port xlr_uart_data[] = {
+	PORT(PIC_UART_0_IRQ),
+	PORT(PIC_UART_1_IRQ),
+	{},
+};
+
+static struct platform_device uart_device = {
+	.name		= "serial8250",
+	.id		= PLAT8250_DEV_PLATFORM,
+	.dev = {
+		.platform_data = xlr_uart_data,
+	},
+};
+
+static int __init nlm_uart_init(void)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	xlr_uart_data[0].membase = (void __iomem *)mmio;
+	xlr_uart_data[0].mapbase = CPHYSADDR((unsigned long)mmio);
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_1_OFFSET);
+	xlr_uart_data[1].membase = (void __iomem *)mmio;
+	xlr_uart_data[1].mapbase = CPHYSADDR((unsigned long)mmio);
+
+	return platform_device_register(&uart_device);
+}
+
+arch_initcall(nlm_uart_init);
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
new file mode 100644
index 000000000000..482802569e74
--- /dev/null
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/serial_8250.h>
+#include <linux/pm.h>
+
+#include <asm/reboot.h>
+#include <asm/time.h>
+#include <asm/bootinfo.h>
+#include <asm/smp-ops.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/psb-bootinfo.h>
+
+#include <asm/netlogic/xlr/xlr.h>
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/gpio.h>
+
+unsigned long netlogic_io_base = (unsigned long)(DEFAULT_NETLOGIC_IO_BASE);
+unsigned long nlm_common_ebase = 0x0;
+struct psb_info nlm_prom_info;
+
+static void nlm_early_serial_setup(void)
+{
+	struct uart_port s;
+	nlm_reg_t *uart_base;
+
+	uart_base = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	memset(&s, 0, sizeof(s));
+	s.flags		= ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
+	s.iotype	= UPIO_MEM32;
+	s.regshift	= 2;
+	s.irq		= PIC_UART_0_IRQ;
+	s.uartclk	= PIC_CLKS_PER_SEC;
+	s.serial_in	= nlm_xlr_uart_in;
+	s.serial_out	= nlm_xlr_uart_out;
+	s.mapbase	= (unsigned long)uart_base;
+	s.membase	= (unsigned char __iomem *)uart_base;
+	early_serial_setup(&s);
+}
+
+static void nlm_linux_exit(void)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_GPIO_OFFSET);
+	/* trigger a chip reset by writing 1 to GPIO_SWRESET_REG */
+	netlogic_write_reg(mmio, NETLOGIC_GPIO_SWRESET_REG, 1);
+	for ( ; ; )
+		cpu_wait();
+}
+
+void __init plat_mem_setup(void)
+{
+	panic_timeout	= 5;
+	_machine_restart = (void (*)(char *))nlm_linux_exit;
+	_machine_halt	= nlm_linux_exit;
+	pm_power_off	= nlm_linux_exit;
+}
+
+const char *get_system_type(void)
+{
+	return "Netlogic XLR/XLS Series";
+}
+
+void __init prom_free_prom_memory(void)
+{
+	/* Nothing yet */
+}
+
+static void build_arcs_cmdline(int *argv)
+{
+	int i, remain, len;
+	char *arg;
+
+	remain = sizeof(arcs_cmdline) - 1;
+	arcs_cmdline[0] = '\0';
+	for (i = 0; argv[i] != 0; i++) {
+		arg = (char *)(long)argv[i];
+		len = strlen(arg);
+		if (len + 1 > remain)
+			break;
+		strcat(arcs_cmdline, arg);
+		strcat(arcs_cmdline, " ");
+		remain -=  len + 1;
+	}
+
+	/* Add the default options here */
+	if ((strstr(arcs_cmdline, "console=")) == NULL) {
+		arg = "console=ttyS0,38400 ";
+		len = strlen(arg);
+		if (len > remain)
+			goto fail;
+		strcat(arcs_cmdline, arg);
+		remain -= len;
+	}
+#ifdef CONFIG_BLK_DEV_INITRD
+	if ((strstr(arcs_cmdline, "rdinit=")) == NULL) {
+		arg = "rdinit=/sbin/init ";
+		len = strlen(arg);
+		if (len > remain)
+			goto fail;
+		strcat(arcs_cmdline, arg);
+		remain -= len;
+	}
+#endif
+	return;
+fail:
+	panic("Cannot add %s, command line too big!", arg);
+}
+
+static void prom_add_memory(void)
+{
+	struct nlm_boot_mem_map *bootm;
+	u64 start, size;
+	u64 pref_backup = 512;  /* avoid pref walking beyond end */
+	int i;
+
+	bootm = (void *)(long)nlm_prom_info.psb_mem_map;
+	for (i = 0; i < bootm->nr_map; i++) {
+		if (bootm->map[i].type != BOOT_MEM_RAM)
+			continue;
+		start = bootm->map[i].addr;
+		size   = bootm->map[i].size;
+
+		/* Work around for using bootloader mem */
+		if (i == 0 && start == 0 && size == 0x0c000000)
+			size = 0x0ff00000;
+
+		add_memory_region(start, size - pref_backup, BOOT_MEM_RAM);
+	}
+}
+
+void __init prom_init(void)
+{
+	int *argv, *envp;		/* passed as 32 bit ptrs */
+	struct psb_info *prom_infop;
+
+	/* truncate to 32 bit and sign extend all args */
+	argv = (int *)(long)(int)fw_arg1;
+	envp = (int *)(long)(int)fw_arg2;
+	prom_infop = (struct psb_info *)(long)(int)fw_arg3;
+
+	nlm_prom_info = *prom_infop;
+
+	nlm_early_serial_setup();
+	build_arcs_cmdline(argv);
+	nlm_common_ebase = read_c0_ebase() & (~((1 << 12) - 1));
+	prom_add_memory();
+
+#ifdef CONFIG_SMP
+	nlm_wakeup_secondary_cpus(nlm_prom_info.online_cpu_map);
+	register_smp_ops(&nlm_smp_ops);
+#endif
+}
diff --git a/arch/mips/netlogic/xlr/smp.c b/arch/mips/netlogic/xlr/smp.c
new file mode 100644
index 000000000000..b495a7f1433b
--- /dev/null
+++ b/arch/mips/netlogic/xlr/smp.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+
+#include <asm/mmu_context.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/mips-extns.h>
+
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+void core_send_ipi(int logical_cpu, unsigned int action)
+{
+	int cpu = cpu_logical_map(logical_cpu);
+	u32 tid = cpu & 0x3;
+	u32 pid = (cpu >> 2) & 0x07;
+	u32 ipi = (tid << 16) | (pid << 20);
+
+	if (action & SMP_CALL_FUNCTION)
+		ipi |= IRQ_IPI_SMP_FUNCTION;
+	else if (action & SMP_RESCHEDULE_YOURSELF)
+		ipi |= IRQ_IPI_SMP_RESCHEDULE;
+	else
+		return;
+
+	pic_send_ipi(ipi);
+}
+
+void nlm_send_ipi_single(int cpu, unsigned int action)
+{
+	core_send_ipi(cpu, action);
+}
+
+void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask) {
+		core_send_ipi(cpu, action);
+	}
+}
+
+/* IRQ_IPI_SMP_FUNCTION Handler */
+void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc)
+{
+	smp_call_function_interrupt();
+}
+
+/* IRQ_IPI_SMP_RESCHEDULE  handler */
+void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc)
+{
+	set_need_resched();
+}
+
+void nlm_common_ipi_handler(int irq, struct pt_regs *regs)
+{
+	if (irq == IRQ_IPI_SMP_FUNCTION) {
+		smp_call_function_interrupt();
+	} else {
+		/* Announce that we are for reschduling */
+		set_need_resched();
+	}
+}
+
+/*
+ * Called before going into mips code, early cpu init
+ */
+void nlm_early_init_secondary(void)
+{
+	write_c0_ebase((uint32_t)nlm_common_ebase);
+	/* TLB partition here later */
+}
+
+/*
+ * Code to run on secondary just after probing the CPU
+ */
+static void __cpuinit nlm_init_secondary(void)
+{
+	nlm_smp_irq_init();
+}
+
+void nlm_smp_finish(void)
+{
+#ifdef notyet
+	nlm_common_msgring_cpu_init();
+#endif
+}
+
+void nlm_cpus_done(void)
+{
+}
+
+/*
+ * Boot all other cpus in the system, initialize them, and bring them into
+ * the boot function
+ */
+int nlm_cpu_unblock[NR_CPUS];
+int nlm_cpu_ready[NR_CPUS];
+unsigned long nlm_next_gp;
+unsigned long nlm_next_sp;
+cpumask_t phys_cpu_present_map;
+
+void nlm_boot_secondary(int logical_cpu, struct task_struct *idle)
+{
+	unsigned long gp = (unsigned long)task_thread_info(idle);
+	unsigned long sp = (unsigned long)__KSTK_TOS(idle);
+	int cpu = cpu_logical_map(logical_cpu);
+
+	nlm_next_sp = sp;
+	nlm_next_gp = gp;
+
+	/* barrier */
+	__sync();
+	nlm_cpu_unblock[cpu] = 1;
+}
+
+void __init nlm_smp_setup(void)
+{
+	unsigned int boot_cpu;
+	int num_cpus, i;
+
+	boot_cpu = hard_smp_processor_id();
+	cpus_clear(phys_cpu_present_map);
+
+	cpu_set(boot_cpu, phys_cpu_present_map);
+	__cpu_number_map[boot_cpu] = 0;
+	__cpu_logical_map[0] = boot_cpu;
+	cpu_set(0, cpu_possible_map);
+
+	num_cpus = 1;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (nlm_cpu_ready[i]) {
+			cpu_set(i, phys_cpu_present_map);
+			__cpu_number_map[i] = num_cpus;
+			__cpu_logical_map[num_cpus] = i;
+			cpu_set(num_cpus, cpu_possible_map);
+			++num_cpus;
+		}
+	}
+
+	pr_info("Phys CPU present map: %lx, possible map %lx\n",
+		(unsigned long)phys_cpu_present_map.bits[0],
+		(unsigned long)cpu_possible_map.bits[0]);
+
+	pr_info("Detected %i Slave CPU(s)\n", num_cpus);
+}
+
+void nlm_prepare_cpus(unsigned int max_cpus)
+{
+}
+
+struct plat_smp_ops nlm_smp_ops = {
+	.send_ipi_single	= nlm_send_ipi_single,
+	.send_ipi_mask		= nlm_send_ipi_mask,
+	.init_secondary		= nlm_init_secondary,
+	.smp_finish		= nlm_smp_finish,
+	.cpus_done		= nlm_cpus_done,
+	.boot_secondary		= nlm_boot_secondary,
+	.smp_setup		= nlm_smp_setup,
+	.prepare_cpus		= nlm_prepare_cpus,
+};
+
+unsigned long secondary_entry_point;
+
+int nlm_wakeup_secondary_cpus(u32 wakeup_mask)
+{
+	unsigned int tid, pid, ipi, i, boot_cpu;
+	void *reset_vec;
+
+	secondary_entry_point = (unsigned long)prom_pre_boot_secondary_cpus;
+	reset_vec = (void *)CKSEG1ADDR(0x1fc00000);
+	memcpy(reset_vec, nlm_boot_smp_nmi, 0x80);
+	boot_cpu = hard_smp_processor_id();
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (i == boot_cpu)
+			continue;
+		if (wakeup_mask & (1u << i)) {
+			tid = i & 0x3;
+			pid = (i >> 2) & 0x7;
+			ipi = (tid << 16) | (pid << 20) | (1 << 8);
+			pic_send_ipi(ipi);
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/mips/netlogic/xlr/smpboot.S b/arch/mips/netlogic/xlr/smpboot.S
new file mode 100644
index 000000000000..b8e074402c99
--- /dev/null
+++ b/arch/mips/netlogic/xlr/smpboot.S
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+
+
+/* Don't jump to linux function from Bootloader stack. Change it
+ * here. Kernel might allocate bootloader memory before all the CPUs are
+ * brought up (eg: Inode cache region) and we better don't overwrite this
+ * memory
+ */
+NESTED(prom_pre_boot_secondary_cpus, 16, sp)
+	.set	mips64
+	mfc0	t0, $15, 1	# read ebase
+	andi	t0, 0x1f	# t0 has the processor_id()
+	sll	t0, 2		# offset in cpu array
+
+	PTR_LA	t1, nlm_cpu_ready # mark CPU ready
+	PTR_ADDU t1, t0
+	li	t2, 1
+	sw	t2, 0(t1)
+
+	PTR_LA	t1, nlm_cpu_unblock
+	PTR_ADDU t1, t0
+1:	lw	t2, 0(t1)	# wait till unblocked
+	beqz	t2, 1b
+	nop
+
+	PTR_LA	t1, nlm_next_sp
+	PTR_L	sp, 0(t1)
+	PTR_LA	t1, nlm_next_gp
+	PTR_L	gp, 0(t1)
+
+	PTR_LA	t0, nlm_early_init_secondary
+	jalr	t0
+	nop
+
+	PTR_LA	t0, smp_bootstrap
+	jr	t0
+	nop
+END(prom_pre_boot_secondary_cpus)
+
+NESTED(nlm_boot_smp_nmi, 0, sp)
+	.set push
+	.set noat
+	.set mips64
+	.set noreorder
+
+	/* Clear the  NMI and BEV bits */
+	MFC0	k0, CP0_STATUS
+	li 	k1, 0xffb7ffff
+	and	k0, k0, k1
+	MTC0	k0, CP0_STATUS
+
+	PTR_LA  k1, secondary_entry_point
+	PTR_L	k0, 0(k1)
+	jr	k0
+	nop
+	.set pop
+END(nlm_boot_smp_nmi)
diff --git a/arch/mips/netlogic/xlr/time.c b/arch/mips/netlogic/xlr/time.c
new file mode 100644
index 000000000000..0d81b262593c
--- /dev/null
+++ b/arch/mips/netlogic/xlr/time.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+
+#include <asm/time.h>
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/psb-bootinfo.h>
+
+unsigned int __cpuinit get_c0_compare_int(void)
+{
+	return IRQ_TIMER;
+}
+
+void __init plat_time_init(void)
+{
+	mips_hpt_frequency = nlm_prom_info.cpu_frequency;
+	pr_info("MIPS counter frequency [%ld]\n",
+		(unsigned long)mips_hpt_frequency);
+}
diff --git a/arch/mips/netlogic/xlr/xlr_console.c b/arch/mips/netlogic/xlr/xlr_console.c
new file mode 100644
index 000000000000..759df0692201
--- /dev/null
+++ b/arch/mips/netlogic/xlr/xlr_console.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <asm/netlogic/xlr/iomap.h>
+
+void prom_putchar(char c)
+{
+	nlm_reg_t *mmio;
+
+	mmio = netlogic_io_mmio(NETLOGIC_IO_UART_0_OFFSET);
+	while (netlogic_read_reg(mmio, 0x5) == 0)
+		;
+	netlogic_write_reg(mmio, 0x0, c);
+}
-- 
cgit v1.2.3


From 7f058e852b229ec77b37676b2b78baf2e78ffee8 Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Sat, 7 May 2011 01:36:57 +0530
Subject: MIPS: Kconfig and Makefile update for Netlogic XLR/XLS

Add NLM_XLR_BOARD, CPU_XLR and other config options
Makefile updates, mostly based on r4k

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2334/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig               | 42 +++++++++++++++++++++++++++++++++++++++++
 arch/mips/Makefile              | 12 ++++++++++++
 arch/mips/kernel/Makefile       |  1 +
 arch/mips/lib/Makefile          |  1 +
 arch/mips/mm/Makefile           |  1 +
 arch/mips/netlogic/Kconfig      |  5 +++++
 arch/mips/netlogic/xlr/Makefile |  5 +++++
 7 files changed, 67 insertions(+)
 create mode 100644 arch/mips/netlogic/Kconfig
 create mode 100644 arch/mips/netlogic/xlr/Makefile

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 351c80fbba7e..5016caac6b91 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -736,6 +736,33 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
 		Hikari
 	  Say Y here for most Octeon reference boards.
 
+config NLM_XLR_BOARD
+	bool "Netlogic XLR/XLS based systems"
+	depends on EXPERIMENTAL
+	select BOOT_ELF32
+	select NLM_COMMON
+	select NLM_XLR
+	select SYS_HAS_CPU_XLR
+	select SYS_SUPPORTS_SMP
+	select HW_HAS_PCI
+	select SWAP_IO_SPACE
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_64BIT_KERNEL
+	select 64BIT_PHYS_ADDR
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_HIGHMEM
+	select DMA_COHERENT
+	select NR_CPUS_DEFAULT_32
+	select CEVT_R4K
+	select CSRC_R4K
+	select IRQ_CPU
+	select ZONE_DMA if 64BIT
+	select SYNC_R4K
+	select SYS_HAS_EARLY_PRINTK
+	help
+	  Support for systems based on Netlogic XLR and XLS processors.
+	  Say Y here if you have a XLR or XLS based board.
+
 endchoice
 
 source "arch/mips/alchemy/Kconfig"
@@ -752,6 +779,7 @@ source "arch/mips/txx9/Kconfig"
 source "arch/mips/vr41xx/Kconfig"
 source "arch/mips/cavium-octeon/Kconfig"
 source "arch/mips/loongson/Kconfig"
+source "arch/mips/netlogic/Kconfig"
 
 endmenu
 
@@ -1420,6 +1448,17 @@ config CPU_BMIPS5000
 	help
 	  Broadcom BMIPS5000 processors.
 
+config CPU_XLR
+	bool "Netlogic XLR SoC"
+	depends on SYS_HAS_CPU_XLR
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select CPU_SUPPORTS_64BIT_KERNEL
+	select CPU_SUPPORTS_HIGHMEM
+	select WEAK_ORDERING
+	select WEAK_REORDERING_BEYOND_LLSC
+	select CPU_SUPPORTS_HUGEPAGES
+	help
+	  Netlogic Microsystems XLR/XLS processors.
 endchoice
 
 if CPU_LOONGSON2F
@@ -1550,6 +1589,9 @@ config SYS_HAS_CPU_BMIPS4380
 config SYS_HAS_CPU_BMIPS5000
 	bool
 
+config SYS_HAS_CPU_XLR
+	bool
+
 #
 # CPU may reorder R->R, R->W, W->R, W->W
 # Reordering beyond LL and SC is handled in WEAK_REORDERING_BEYOND_LLSC
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 53e3514ba10e..884819cd0607 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -191,6 +191,18 @@ endif
 #
 include $(srctree)/arch/mips/Kbuild.platforms
 
+#
+# NETLOGIC SOC Common (common)
+#
+cflags-$(CONFIG_NLM_COMMON)		+= -I$(srctree)/arch/mips/include/asm/mach-netlogic
+cflags-$(CONFIG_NLM_COMMON)		+= -I$(srctree)/arch/mips/include/asm/netlogic
+
+#
+# NETLOGIC XLR/XLS SoC, Simulator and boards
+#
+core-$(CONFIG_NLM_XLR)      		+= arch/mips/netlogic/xlr/
+load-$(CONFIG_NLM_XLR_BOARD)		+= 0xffffffff84000000
+
 cflags-y			+= -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)		+= arch/mips/pci/
 
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index cedee2bcbd18..83bba332bbfc 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= r2300_fpu.o r2300_switch.o
 obj-$(CONFIG_CPU_TX49XX)	+= r4k_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_VR41XX)	+= r4k_fpu.o r4k_switch.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= octeon_switch.o
+obj-$(CONFIG_CPU_XLR)		+= r4k_fpu.o r4k_switch.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP_UP)		+= smp-up.o
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 2adead5a8a37..b2cad4fd5fc4 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= r3k_dump_tlb.o
 obj-$(CONFIG_CPU_TX49XX)	+= dump_tlb.o
 obj-$(CONFIG_CPU_VR41XX)	+= dump_tlb.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= dump_tlb.o
+obj-$(CONFIG_CPU_XLR)		+= dump_tlb.o
 
 # libgcc-style stuff needed in the kernel
 obj-y += ashldi3.o ashrdi3.o cmpdi2.o lshrdi3.o ucmpdi2.o
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index d679c772d082..eb4463689faa 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o tlb-r3k.o
 obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= c-octeon.o cex-oct.o tlb-r4k.o
+obj-$(CONFIG_CPU_XLR)		+= c-r4k.o tlb-r4k.o cex-gen.o
 
 obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o
 obj-$(CONFIG_R5000_CPU_SCACHE)  += sc-r5k.o
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
new file mode 100644
index 000000000000..a5ca743613f2
--- /dev/null
+++ b/arch/mips/netlogic/Kconfig
@@ -0,0 +1,5 @@
+config NLM_COMMON
+	bool
+
+config NLM_XLR
+	bool
diff --git a/arch/mips/netlogic/xlr/Makefile b/arch/mips/netlogic/xlr/Makefile
new file mode 100644
index 000000000000..9bd3f731f62e
--- /dev/null
+++ b/arch/mips/netlogic/xlr/Makefile
@@ -0,0 +1,5 @@
+obj-y				+= setup.o platform.o irq.o setup.o time.o
+obj-$(CONFIG_SMP)		+= smp.o smpboot.o
+obj-$(CONFIG_EARLY_PRINTK)	+= xlr_console.o
+
+EXTRA_CFLAGS			+= -Werror
-- 
cgit v1.2.3


From f9cab74fd9b0cf19f52a989694e7a1d8213af3a1 Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Sat, 7 May 2011 01:37:14 +0530
Subject: MIPS: Add default configuration for XLR/XLS processors

Enable XLR CPU support, SMP, initramfs based root filesystem etc.

[ralf@linux-mips.org: shrink the defconfig file through make savedefconfig.]

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2338/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/configs/nlm_xlr_defconfig | 574 ++++++++++++++++++++++++++++++++++++
 1 file changed, 574 insertions(+)
 create mode 100644 arch/mips/configs/nlm_xlr_defconfig

(limited to 'arch')

diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
new file mode 100644
index 000000000000..e4b399fdaa61
--- /dev/null
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -0,0 +1,574 @@
+CONFIG_NLM_XLR_BOARD=y
+CONFIG_HIGHMEM=y
+CONFIG_KSM=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_SMP=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_KEXEC=y
+CONFIG_EXPERIMENTAL=y
+CONFIG_CROSS_COMPILE="mips64-unknown-linux-gnu-"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_AUDIT=y
+CONFIG_NAMESPACES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="usr/dev_file_list usr/rootfs"
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_INITRAMFS_COMPRESSION_GZIP=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EXPERT=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+# CONFIG_PCSPKR_PLATFORM is not set
+# CONFIG_PERF_EVENTS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BINFMT_MISC=m
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_IPV6=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NETLABEL=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_DECNET_NF_GRABULATOR=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_IP_DCCP=m
+CONFIG_RDS=m
+CONFIG_RDS_TCP=m
+CONFIG_TIPC=m
+CONFIG_ATM=m
+CONFIG_ATM_CLIP=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_DECNET=m
+CONFIG_LLC2=m
+CONFIG_IPX=m
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=m
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+CONFIG_ECONET=m
+CONFIG_ECONET_AUNUDP=y
+CONFIG_ECONET_NATIVE=y
+CONFIG_WAN_ROUTER=m
+CONFIG_PHONET=m
+CONFIG_IEEE802154=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_DCB=y
+CONFIG_NET_PKTGEN=m
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_CONNECTOR=y
+CONFIG_MTD=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_CDROM_PKTCDVD=y
+CONFIG_MISC_DEVICES=y
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_SCSI_TGT=m
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_TGT_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_ISCSI_TCP=m
+CONFIG_LIBFCOE=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_RAW=m
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_N_HDLC=m
+# CONFIG_DEVKMEM is not set
+CONFIG_STALDRV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_TIMERIOMEM=m
+CONFIG_RAW_DRIVER=m
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_UIO=y
+CONFIG_UIO_PDRV=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+# CONFIG_PRINT_QUOTA_WARNING is not set
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_FSCACHE_STATS=y
+CONFIG_FSCACHE_HISTOGRAM=y
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_ADFS_FS=m
+CONFIG_AFFS_FS=m
+CONFIG_ECRYPT_FS=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_VXFS_FS=m
+CONFIG_MINIX_FS=m
+CONFIG_OMFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EXOFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_FSCACHE=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_CIFS=m
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_EXPERIMENTAL=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+CONFIG_AFS_FS=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ACORN_PARTITION=y
+CONFIG_ACORN_PARTITION_ICS=y
+CONFIG_ACORN_PARTITION_RISCIX=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SYSV68_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="cp437"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KGDB=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_LSM_MMAP_MIN_ADDR=0
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_SECURITY_SMACK=y
+CONFIG_SECURITY_TOMOYO=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC7=m
-- 
cgit v1.2.3


From 9b130f8004e51c65b20b0f0e17cdee073a719047 Mon Sep 17 00:00:00 2001
From: Jayachandran C <jayachandranc@netlogicmicro.com>
Date: Sat, 7 May 2011 01:37:31 +0530
Subject: MIPS: XLR, XLS: Add PCI support.

Adds pci/pci-xlr.c to support for XLR PCI/PCI-X interface and XLS PCIe
interface.
Update irq.c to ack PCI interrupts, use irq handler data to do the
PCI/PCIe bus ack.

Signed-off-by: Jayachandran C <jayachandranc@netlogicmicro.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2337/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/netlogic/xlr/xlr.h |  21 +++
 arch/mips/netlogic/xlr/irq.c             |  86 ++++++++++++-
 arch/mips/pci/Makefile                   |   1 +
 arch/mips/pci/pci-xlr.c                  | 214 +++++++++++++++++++++++++++++++
 4 files changed, 321 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/pci/pci-xlr.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/netlogic/xlr/xlr.h b/arch/mips/include/asm/netlogic/xlr/xlr.h
index 454c236d6854..3e6372692a04 100644
--- a/arch/mips/include/asm/netlogic/xlr/xlr.h
+++ b/arch/mips/include/asm/netlogic/xlr/xlr.h
@@ -41,6 +41,7 @@ unsigned int nlm_xlr_uart_in(struct uart_port *, int);
 void nlm_xlr_uart_out(struct uart_port *, int, int);
 
 /* SMP support functions */
+struct irq_desc;
 void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc);
 void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc);
 int nlm_wakeup_secondary_cpus(u32 wakeup_mask);
@@ -51,4 +52,24 @@ void prom_pre_boot_secondary_cpus(void);
 extern struct plat_smp_ops nlm_smp_ops;
 extern unsigned long nlm_common_ebase;
 
+/* XLS B silicon "Rook" */
+static inline unsigned int nlm_chip_is_xls_b(void)
+{
+	uint32_t prid = read_c0_prid();
+
+	return ((prid & 0xf000) == 0x4000);
+}
+
+/*
+ *  XLR chip types
+ */
+ /* The XLS product line has chip versions 0x[48c]? */
+static inline unsigned int nlm_chip_is_xls(void)
+{
+	uint32_t prid = read_c0_prid();
+
+	return ((prid & 0xf000) == 0x8000 || (prid & 0xf000) == 0x4000 ||
+		(prid & 0xf000) == 0xc000);
+}
+
 #endif /* _ASM_NLM_XLR_H */
diff --git a/arch/mips/netlogic/xlr/irq.c b/arch/mips/netlogic/xlr/irq.c
index 2033f5656f68..1446d58e364c 100644
--- a/arch/mips/netlogic/xlr/irq.c
+++ b/arch/mips/netlogic/xlr/irq.c
@@ -83,14 +83,71 @@ static void xlr_pic_mask(struct irq_data *d)
 	spin_unlock_irqrestore(&nlm_pic_lock, flags);
 }
 
+#ifdef CONFIG_PCI
+/* Extra ACK needed for XLR on chip PCI controller */
+static void xlr_pci_ack(struct irq_data *d)
+{
+	nlm_reg_t *pci_mmio = netlogic_io_mmio(NETLOGIC_IO_PCIX_OFFSET);
+
+	netlogic_read_reg(pci_mmio, (0x140 >> 2));
+}
+
+/* Extra ACK needed for XLS on chip PCIe controller */
+static void xls_pcie_ack(struct irq_data *d)
+{
+	nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET);
+
+	switch (d->irq) {
+	case PIC_PCIE_LINK0_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK1_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK2_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK3_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff);
+		break;
+	}
+}
+
+/* For XLS B silicon, the 3,4 PCI interrupts are different */
+static void xls_pcie_ack_b(struct irq_data *d)
+{
+	nlm_reg_t *pcie_mmio_le = netlogic_io_mmio(NETLOGIC_IO_PCIE_1_OFFSET);
+
+	switch (d->irq) {
+	case PIC_PCIE_LINK0_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x90 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_LINK1_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x94 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_XLSB0_LINK2_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x190 >> 2), 0xffffffff);
+		break;
+	case PIC_PCIE_XLSB0_LINK3_IRQ:
+		netlogic_write_reg(pcie_mmio_le, (0x194 >> 2), 0xffffffff);
+		break;
+	}
+}
+#endif
+
 static void xlr_pic_ack(struct irq_data *d)
 {
 	unsigned long flags;
 	nlm_reg_t *mmio;
 	int irq = d->irq;
+	void *hd = irq_data_get_irq_handler_data(d);
 
 	WARN(!PIC_IRQ_IS_IRT(irq), "Bad irq %d", irq);
 
+	if (hd) {
+		void (*extra_ack)(void *) = hd;
+		extra_ack(d);
+	}
 	mmio = netlogic_io_mmio(NETLOGIC_IO_PIC_OFFSET);
 	spin_lock_irqsave(&nlm_pic_lock, flags);
 	netlogic_write_reg(mmio, PIC_INT_ACK, (1 << (irq - PIC_IRQ_BASE)));
@@ -162,6 +219,33 @@ void __init init_xlr_irqs(void)
 	nlm_irq_mask |=
 	    ((1ULL << IRQ_IPI_SMP_FUNCTION) | (1ULL << IRQ_IPI_SMP_RESCHEDULE));
 #endif
+
+#ifdef CONFIG_PCI
+	/*
+	 * For PCI interrupts, we need to ack the PIC controller too, overload
+	 * irq handler data to do this
+	 */
+	if (nlm_chip_is_xls()) {
+		if (nlm_chip_is_xls_b()) {
+			irq_set_handler_data(PIC_PCIE_LINK0_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_LINK1_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_XLSB0_LINK2_IRQ,
+							xls_pcie_ack_b);
+			irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ,
+							xls_pcie_ack_b);
+		} else {
+			irq_set_handler_data(PIC_PCIE_LINK0_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK1_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK2_IRQ, xls_pcie_ack);
+			irq_set_handler_data(PIC_PCIE_LINK3_IRQ, xls_pcie_ack);
+		}
+	} else {
+		/* XLR PCI controller ACK */
+		irq_set_handler_data(PIC_PCIE_XLSB0_LINK3_IRQ, xlr_pci_ack);
+	}
+#endif
 	/* unmask all PIC related interrupts. If no handler is installed by the
 	 * drivers, it'll just ack the interrupt and return
 	 */
@@ -199,7 +283,7 @@ asmlinkage void plat_irq_dispatch(void)
 		return;
 	}
 
-	/* TODO use dcltz: optimize below code */
+	/* use dcltz: optimize below code */
 	for (i = 63; i != -1; i--) {
 		if (eirr & (1ULL << i))
 			break;
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index c9209ca6c8e7..f0d5329289d1 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_ZAO_CAPCELLA)	+= fixup-capcella.o
 obj-$(CONFIG_WR_PPMC)		+= fixup-wrppmc.o
 obj-$(CONFIG_MIKROTIK_RB532)	+= pci-rc32434.o ops-rc32434.o fixup-rc32434.o
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= pci-octeon.o pcie-octeon.o
+obj-$(CONFIG_NLM_XLR)		+= pci-xlr.o
 
 ifdef CONFIG_PCI_MSI
 obj-$(CONFIG_CPU_CAVIUM_OCTEON)	+= msi-octeon.o
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
new file mode 100644
index 000000000000..38fece16c435
--- /dev/null
+++ b/arch/mips/pci/pci-xlr.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2003-2011 NetLogic Microsystems, Inc. (NetLogic). All rights
+ * reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the NetLogic
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETLOGIC ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/xlr/iomap.h>
+#include <asm/netlogic/xlr/pic.h>
+#include <asm/netlogic/xlr/xlr.h>
+
+static void *pci_config_base;
+
+#define	pci_cfg_addr(bus, devfn, off) (((bus) << 16) | ((devfn) << 8) | (off))
+
+/* PCI ops */
+static inline u32 pci_cfg_read_32bit(struct pci_bus *bus, unsigned int devfn,
+	int where)
+{
+	u32 data;
+	u32 *cfgaddr;
+
+	cfgaddr = (u32 *)(pci_config_base +
+			pci_cfg_addr(bus->number, devfn, where & ~3));
+	data = *cfgaddr;
+	return cpu_to_le32(data);
+}
+
+static inline void pci_cfg_write_32bit(struct pci_bus *bus, unsigned int devfn,
+	int where, u32 data)
+{
+	u32 *cfgaddr;
+
+	cfgaddr = (u32 *)(pci_config_base +
+			pci_cfg_addr(bus->number, devfn, where & ~3));
+	*cfgaddr = cpu_to_le32(data);
+}
+
+static int nlm_pcibios_read(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 *val)
+{
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	data = pci_cfg_read_32bit(bus, devfn, where);
+
+	if (size == 1)
+		*val = (data >> ((where & 3) << 3)) & 0xff;
+	else if (size == 2)
+		*val = (data >> ((where & 3) << 3)) & 0xffff;
+	else
+		*val = data;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+
+static int nlm_pcibios_write(struct pci_bus *bus, unsigned int devfn,
+		int where, int size, u32 val)
+{
+	u32 data;
+
+	if ((size == 2) && (where & 1))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	else if ((size == 4) && (where & 3))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	data = pci_cfg_read_32bit(bus, devfn, where);
+
+	if (size == 1)
+		data = (data & ~(0xff << ((where & 3) << 3))) |
+			(val << ((where & 3) << 3));
+	else if (size == 2)
+		data = (data & ~(0xffff << ((where & 3) << 3))) |
+			(val << ((where & 3) << 3));
+	else
+		data = val;
+
+	pci_cfg_write_32bit(bus, devfn, where, data);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops nlm_pci_ops = {
+	.read  = nlm_pcibios_read,
+	.write = nlm_pcibios_write
+};
+
+static struct resource nlm_pci_mem_resource = {
+	.name           = "XLR PCI MEM",
+	.start          = 0xd0000000UL,	/* 256MB PCI mem @ 0xd000_0000 */
+	.end            = 0xdfffffffUL,
+	.flags          = IORESOURCE_MEM,
+};
+
+static struct resource nlm_pci_io_resource = {
+	.name           = "XLR IO MEM",
+	.start          = 0x10000000UL,	/* 16MB PCI IO @ 0x1000_0000 */
+	.end            = 0x100fffffUL,
+	.flags          = IORESOURCE_IO,
+};
+
+struct pci_controller nlm_pci_controller = {
+	.index          = 0,
+	.pci_ops        = &nlm_pci_ops,
+	.mem_resource   = &nlm_pci_mem_resource,
+	.mem_offset     = 0x00000000UL,
+	.io_resource    = &nlm_pci_io_resource,
+	.io_offset      = 0x00000000UL,
+};
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	if (!nlm_chip_is_xls())
+		return	PIC_PCIX_IRQ;	/* for XLR just one IRQ*/
+
+	/*
+	 * For XLS PCIe, there is an IRQ per Link, find out which
+	 * link the device is on to assign interrupts
+	*/
+	if (dev->bus->self == NULL)
+		return 0;
+
+	switch	(dev->bus->self->devfn) {
+	case 0x0:
+		return PIC_PCIE_LINK0_IRQ;
+	case 0x8:
+		return PIC_PCIE_LINK1_IRQ;
+	case 0x10:
+		if (nlm_chip_is_xls_b())
+			return PIC_PCIE_XLSB0_LINK2_IRQ;
+		else
+			return PIC_PCIE_LINK2_IRQ;
+	case 0x18:
+		if (nlm_chip_is_xls_b())
+			return PIC_PCIE_XLSB0_LINK3_IRQ;
+		else
+			return PIC_PCIE_LINK3_IRQ;
+	}
+	WARN(1, "Unexpected devfn %d\n", dev->bus->self->devfn);
+	return 0;
+}
+
+/* Do platform specific device initialization at pci_enable_device() time */
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	return 0;
+}
+
+static int __init pcibios_init(void)
+{
+	/* PSB assigns PCI resources */
+	pci_probe_only = 1;
+	pci_config_base = ioremap(DEFAULT_PCI_CONFIG_BASE, 16 << 20);
+
+	/* Extend IO port for memory mapped io */
+	ioport_resource.start =  0;
+	ioport_resource.end   = ~0;
+
+	set_io_port_base(CKSEG1);
+	nlm_pci_controller.io_map_base = CKSEG1;
+
+	pr_info("Registering XLR/XLS PCIX/PCIE Controller.\n");
+	register_pci_controller(&nlm_pci_controller);
+
+	return 0;
+}
+
+arch_initcall(pcibios_init);
+
+struct pci_fixup pcibios_fixups[] = {
+	{0}
+};
-- 
cgit v1.2.3


From c0a5afb9bcf6b5aa5685e4fcf1282cad5fab3d91 Mon Sep 17 00:00:00 2001
From: Maxin John <maxin.john@gmail.com>
Date: Tue, 29 Mar 2011 00:15:55 +0300
Subject: MIPS: Enable kmemleak for MIPS

Signed-off-by: Maxin B. John <maxin.john@gmail.com>
To: Catalin Marinas <catalin.marinas@arm.com>
Cc: Daniel Baluta <dbaluta@ixiacom.com>
Cc: naveen yadav <yad.naveen@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Patchwork: https://patchwork.linux-mips.org/patch/2244/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/vmlinux.lds.S | 1 +
 lib/Kconfig.debug              | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index e4b0b0bec039..cd2ca544454b 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -68,6 +68,7 @@ SECTIONS
 	RODATA
 
 	/* writeable */
+	_sdata = .;				/* Start of data section */
 	.data : {	/* Data */
 		. = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c768bcdda1b7..f0aa00ba3fac 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -398,7 +398,7 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-		(X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
+		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
 
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
-- 
cgit v1.2.3


From 171bb2f19ed6f3627f4f783f658f2f475b2fbd50 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:47 +0200
Subject: MIPS: Lantiq: Add initial support for Lantiq SoCs

Add initial support for Mips based SoCs made by Lantiq. This series will add
support for the XWAY family.

The series allows booting a minimal system using a initramfs or NOR. Missing
drivers and support for Amazon and GPON family will be provided in a later
series.

[Ralf: Remove some cargo cult programming and fixed formatting.]

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2252/
Patchwork: https://patchwork.linux-mips.org/patch/2371/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kbuild.platforms                 |   1 +
 arch/mips/Kconfig                          |  17 ++
 arch/mips/include/asm/mach-lantiq/lantiq.h |  63 ++++++
 arch/mips/include/asm/mach-lantiq/war.h    |  24 +++
 arch/mips/lantiq/Makefile                  |   9 +
 arch/mips/lantiq/Platform                  |   7 +
 arch/mips/lantiq/clk.c                     | 140 +++++++++++++
 arch/mips/lantiq/clk.h                     |  18 ++
 arch/mips/lantiq/early_printk.c            |  33 +++
 arch/mips/lantiq/irq.c                     | 326 +++++++++++++++++++++++++++++
 arch/mips/lantiq/prom.c                    |  71 +++++++
 arch/mips/lantiq/prom.h                    |  24 +++
 arch/mips/lantiq/setup.c                   |  41 ++++
 13 files changed, 774 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-lantiq/lantiq.h
 create mode 100644 arch/mips/include/asm/mach-lantiq/war.h
 create mode 100644 arch/mips/lantiq/Makefile
 create mode 100644 arch/mips/lantiq/Platform
 create mode 100644 arch/mips/lantiq/clk.c
 create mode 100644 arch/mips/lantiq/clk.h
 create mode 100644 arch/mips/lantiq/early_printk.c
 create mode 100644 arch/mips/lantiq/irq.c
 create mode 100644 arch/mips/lantiq/prom.c
 create mode 100644 arch/mips/lantiq/prom.h
 create mode 100644 arch/mips/lantiq/setup.c

(limited to 'arch')

diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index 7ff9b5492041..aef6c917b45a 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -11,6 +11,7 @@ platforms += dec
 platforms += emma
 platforms += jazz
 platforms += jz4740
+platforms += lantiq
 platforms += lasat
 platforms += loongson
 platforms += mipssim
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 5016caac6b91..1787572a76c3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -212,6 +212,23 @@ config MACH_JZ4740
 	select HAVE_PWM
 	select HAVE_CLK
 
+config LANTIQ
+	bool "Lantiq based platforms"
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select CEVT_R4K
+	select CSRC_R4K
+	select SYS_HAS_CPU_MIPS32_R1
+	select SYS_HAS_CPU_MIPS32_R2
+	select SYS_SUPPORTS_BIG_ENDIAN
+	select SYS_SUPPORTS_32BIT_KERNEL
+	select SYS_SUPPORTS_MULTITHREADING
+	select SYS_HAS_EARLY_PRINTK
+	select ARCH_REQUIRE_GPIOLIB
+	select SWAP_IO_SPACE
+	select BOOT_RAW
+	select HAVE_CLK
+
 config LASAT
 	bool "LASAT Networks platforms"
 	select CEVT_R4K
diff --git a/arch/mips/include/asm/mach-lantiq/lantiq.h b/arch/mips/include/asm/mach-lantiq/lantiq.h
new file mode 100644
index 000000000000..ce2f02929d22
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/lantiq.h
@@ -0,0 +1,63 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+#ifndef _LANTIQ_H__
+#define _LANTIQ_H__
+
+#include <linux/irq.h>
+
+/* generic reg access functions */
+#define ltq_r32(reg)		__raw_readl(reg)
+#define ltq_w32(val, reg)	__raw_writel(val, reg)
+#define ltq_w32_mask(clear, set, reg)	\
+	ltq_w32((ltq_r32(reg) & ~(clear)) | (set), reg)
+#define ltq_r8(reg)		__raw_readb(reg)
+#define ltq_w8(val, reg)	__raw_writeb(val, reg)
+
+/* register access macros for EBU and CGU */
+#define ltq_ebu_w32(x, y)	ltq_w32((x), ltq_ebu_membase + (y))
+#define ltq_ebu_r32(x)		ltq_r32(ltq_ebu_membase + (x))
+#define ltq_cgu_w32(x, y)	ltq_w32((x), ltq_cgu_membase + (y))
+#define ltq_cgu_r32(x)		ltq_r32(ltq_cgu_membase + (x))
+
+extern __iomem void *ltq_ebu_membase;
+extern __iomem void *ltq_cgu_membase;
+
+extern unsigned int ltq_get_cpu_ver(void);
+extern unsigned int ltq_get_soc_type(void);
+
+/* clock speeds */
+#define CLOCK_60M	60000000
+#define CLOCK_83M	83333333
+#define CLOCK_111M	111111111
+#define CLOCK_133M	133333333
+#define CLOCK_167M	166666667
+#define CLOCK_200M	200000000
+#define CLOCK_266M	266666666
+#define CLOCK_333M	333333333
+#define CLOCK_400M	400000000
+
+/* spinlock all ebu i/o */
+extern spinlock_t ebu_lock;
+
+/* some irq helpers */
+extern void ltq_disable_irq(struct irq_data *data);
+extern void ltq_mask_and_ack_irq(struct irq_data *data);
+extern void ltq_enable_irq(struct irq_data *data);
+
+/* find out what caused the last cpu reset */
+extern int ltq_reset_cause(void);
+#define LTQ_RST_CAUSE_WDTRST	0x20
+
+#define IOPORT_RESOURCE_START	0x10000000
+#define IOPORT_RESOURCE_END	0xffffffff
+#define IOMEM_RESOURCE_START	0x10000000
+#define IOMEM_RESOURCE_END	0xffffffff
+#define LTQ_FLASH_START		0x10000000
+#define LTQ_FLASH_MAX		0x04000000
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/war.h b/arch/mips/include/asm/mach-lantiq/war.h
new file mode 100644
index 000000000000..01b08ef368d1
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/war.h
@@ -0,0 +1,24 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+#ifndef __ASM_MIPS_MACH_LANTIQ_WAR_H
+#define __ASM_MIPS_MACH_LANTIQ_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR     0
+#define R4600_V1_HIT_CACHEOP_WAR        0
+#define R4600_V2_HIT_CACHEOP_WAR        0
+#define R5432_CP0_INTERRUPT_WAR         0
+#define BCM1250_M3_WAR                  0
+#define SIBYTE_1956_WAR                 0
+#define MIPS4K_ICACHE_REFILL_WAR        0
+#define MIPS_CACHE_SYNC_WAR             0
+#define TX49XX_ICACHE_INDEX_INV_WAR     0
+#define RM9000_CDEX_SMP_WAR             0
+#define ICACHE_REFILLS_WORKAROUND_WAR   0
+#define R10000_LLSC_WAR                 0
+#define MIPS34K_MISSED_ITLB_WAR         0
+
+#endif
diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile
new file mode 100644
index 000000000000..6a30de671f23
--- /dev/null
+++ b/arch/mips/lantiq/Makefile
@@ -0,0 +1,9 @@
+# Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 as published
+# by the Free Software Foundation.
+
+obj-y := irq.o setup.o clk.o prom.o
+
+obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform
new file mode 100644
index 000000000000..eef587f2d872
--- /dev/null
+++ b/arch/mips/lantiq/Platform
@@ -0,0 +1,7 @@
+#
+# Lantiq
+#
+
+platform-$(CONFIG_LANTIQ)	+= lantiq/
+cflags-$(CONFIG_LANTIQ)		+= -I$(srctree)/arch/mips/include/asm/mach-lantiq
+load-$(CONFIG_LANTIQ)		= 0xffffffff80002000
diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
new file mode 100644
index 000000000000..94560899d13e
--- /dev/null
+++ b/arch/mips/lantiq/clk.c
@@ -0,0 +1,140 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/list.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+#include "clk.h"
+
+struct clk {
+	const char *name;
+	unsigned long rate;
+	unsigned long (*get_rate) (void);
+};
+
+static struct clk *cpu_clk;
+static int cpu_clk_cnt;
+
+/* lantiq socs have 3 static clocks */
+static struct clk cpu_clk_generic[] = {
+	{
+		.name = "cpu",
+		.get_rate = ltq_get_cpu_hz,
+	}, {
+		.name = "fpi",
+		.get_rate = ltq_get_fpi_hz,
+	}, {
+		.name = "io",
+		.get_rate = ltq_get_io_region_clock,
+	},
+};
+
+static struct resource ltq_cgu_resource = {
+	.name	= "cgu",
+	.start	= LTQ_CGU_BASE_ADDR,
+	.end	= LTQ_CGU_BASE_ADDR + LTQ_CGU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/* remapped clock register range */
+void __iomem *ltq_cgu_membase;
+
+void clk_init(void)
+{
+	cpu_clk = cpu_clk_generic;
+	cpu_clk_cnt = ARRAY_SIZE(cpu_clk_generic);
+}
+
+static inline int clk_good(struct clk *clk)
+{
+	return clk && !IS_ERR(clk);
+}
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	if (unlikely(!clk_good(clk)))
+		return 0;
+
+	if (clk->rate != 0)
+		return clk->rate;
+
+	if (clk->get_rate != NULL)
+		return clk->get_rate();
+
+	return 0;
+}
+EXPORT_SYMBOL(clk_get_rate);
+
+struct clk *clk_get(struct device *dev, const char *id)
+{
+	int i;
+
+	for (i = 0; i < cpu_clk_cnt; i++)
+		if (!strcmp(id, cpu_clk[i].name))
+			return &cpu_clk[i];
+	BUG();
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(clk_get);
+
+void clk_put(struct clk *clk)
+{
+	/* not used */
+}
+EXPORT_SYMBOL(clk_put);
+
+static inline u32 ltq_get_counter_resolution(void)
+{
+	u32 res;
+
+	__asm__ __volatile__(
+		".set   push\n"
+		".set   mips32r2\n"
+		"rdhwr  %0, $3\n"
+		".set pop\n"
+		: "=&r" (res)
+		: /* no input */
+		: "memory");
+
+	return res;
+}
+
+void __init plat_time_init(void)
+{
+	struct clk *clk;
+
+	if (insert_resource(&iomem_resource, &ltq_cgu_resource) < 0)
+		panic("Failed to insert cgu memory\n");
+
+	if (request_mem_region(ltq_cgu_resource.start,
+			resource_size(&ltq_cgu_resource), "cgu") < 0)
+		panic("Failed to request cgu memory\n");
+
+	ltq_cgu_membase = ioremap_nocache(ltq_cgu_resource.start,
+				resource_size(&ltq_cgu_resource));
+	if (!ltq_cgu_membase) {
+		pr_err("Failed to remap cgu memory\n");
+		unreachable();
+	}
+	clk = clk_get(0, "cpu");
+	mips_hpt_frequency = clk_get_rate(clk) / ltq_get_counter_resolution();
+	write_c0_compare(read_c0_count());
+	clk_put(clk);
+}
diff --git a/arch/mips/lantiq/clk.h b/arch/mips/lantiq/clk.h
new file mode 100644
index 000000000000..3328925f2c3f
--- /dev/null
+++ b/arch/mips/lantiq/clk.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_CLK_H__
+#define _LTQ_CLK_H__
+
+extern void clk_init(void);
+
+extern unsigned long ltq_get_cpu_hz(void);
+extern unsigned long ltq_get_fpi_hz(void);
+extern unsigned long ltq_get_io_region_clock(void);
+
+#endif
diff --git a/arch/mips/lantiq/early_printk.c b/arch/mips/lantiq/early_printk.c
new file mode 100644
index 000000000000..972e05f87631
--- /dev/null
+++ b/arch/mips/lantiq/early_printk.c
@@ -0,0 +1,33 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+
+#include <lantiq.h>
+#include <lantiq_soc.h>
+
+/* no ioremap possible at this early stage, lets use KSEG1 instead  */
+#define LTQ_ASC_BASE	KSEG1ADDR(LTQ_ASC1_BASE_ADDR)
+#define ASC_BUF		1024
+#define LTQ_ASC_FSTAT	((u32 *)(LTQ_ASC_BASE + 0x0048))
+#define LTQ_ASC_TBUF	((u32 *)(LTQ_ASC_BASE + 0x0020))
+#define TXMASK		0x3F00
+#define TXOFFSET	8
+
+void prom_putchar(char c)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	do { } while ((ltq_r32(LTQ_ASC_FSTAT) & TXMASK) >> TXOFFSET);
+	if (c == '\n')
+		ltq_w32('\r', LTQ_ASC_TBUF);
+	ltq_w32(c, LTQ_ASC_TBUF);
+	local_irq_restore(flags);
+}
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
new file mode 100644
index 000000000000..fc89795cafdb
--- /dev/null
+++ b/arch/mips/lantiq/irq.c
@@ -0,0 +1,326 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2010 Thomas Langer <thomas.langer@lantiq.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq_cpu.h>
+
+#include <lantiq_soc.h>
+#include <irq.h>
+
+/* register definitions */
+#define LTQ_ICU_IM0_ISR		0x0000
+#define LTQ_ICU_IM0_IER		0x0008
+#define LTQ_ICU_IM0_IOSR	0x0010
+#define LTQ_ICU_IM0_IRSR	0x0018
+#define LTQ_ICU_IM0_IMR		0x0020
+#define LTQ_ICU_IM1_ISR		0x0028
+#define LTQ_ICU_OFFSET		(LTQ_ICU_IM1_ISR - LTQ_ICU_IM0_ISR)
+
+#define LTQ_EIU_EXIN_C		0x0000
+#define LTQ_EIU_EXIN_INIC	0x0004
+#define LTQ_EIU_EXIN_INEN	0x000C
+
+/* irq numbers used by the external interrupt unit (EIU) */
+#define LTQ_EIU_IR0		(INT_NUM_IM4_IRL0 + 30)
+#define LTQ_EIU_IR1		(INT_NUM_IM3_IRL0 + 31)
+#define LTQ_EIU_IR2		(INT_NUM_IM1_IRL0 + 26)
+#define LTQ_EIU_IR3		INT_NUM_IM1_IRL0
+#define LTQ_EIU_IR4		(INT_NUM_IM1_IRL0 + 1)
+#define LTQ_EIU_IR5		(INT_NUM_IM1_IRL0 + 2)
+#define LTQ_EIU_IR6		(INT_NUM_IM2_IRL0 + 30)
+
+#define MAX_EIU			6
+
+/* irqs generated by device attached to the EBU need to be acked in
+ * a special manner
+ */
+#define LTQ_ICU_EBU_IRQ		22
+
+#define ltq_icu_w32(x, y)	ltq_w32((x), ltq_icu_membase + (y))
+#define ltq_icu_r32(x)		ltq_r32(ltq_icu_membase + (x))
+
+#define ltq_eiu_w32(x, y)	ltq_w32((x), ltq_eiu_membase + (y))
+#define ltq_eiu_r32(x)		ltq_r32(ltq_eiu_membase + (x))
+
+static unsigned short ltq_eiu_irq[MAX_EIU] = {
+	LTQ_EIU_IR0,
+	LTQ_EIU_IR1,
+	LTQ_EIU_IR2,
+	LTQ_EIU_IR3,
+	LTQ_EIU_IR4,
+	LTQ_EIU_IR5,
+};
+
+static struct resource ltq_icu_resource = {
+	.name	= "icu",
+	.start	= LTQ_ICU_BASE_ADDR,
+	.end	= LTQ_ICU_BASE_ADDR + LTQ_ICU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct resource ltq_eiu_resource = {
+	.name	= "eiu",
+	.start	= LTQ_EIU_BASE_ADDR,
+	.end	= LTQ_EIU_BASE_ADDR + LTQ_ICU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_icu_membase;
+static void __iomem *ltq_eiu_membase;
+
+void ltq_disable_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier);
+}
+
+void ltq_mask_and_ack_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	u32 isr = LTQ_ICU_IM0_ISR;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) & ~(1 << irq_nr), ier);
+	ltq_icu_w32((1 << irq_nr), isr);
+}
+
+static void ltq_ack_irq(struct irq_data *d)
+{
+	u32 isr = LTQ_ICU_IM0_ISR;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	isr += LTQ_ICU_OFFSET * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32((1 << irq_nr), isr);
+}
+
+void ltq_enable_irq(struct irq_data *d)
+{
+	u32 ier = LTQ_ICU_IM0_IER;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ier += LTQ_ICU_OFFSET  * (irq_nr / INT_NUM_IM_OFFSET);
+	irq_nr %= INT_NUM_IM_OFFSET;
+	ltq_icu_w32(ltq_icu_r32(ier) | (1 << irq_nr), ier);
+}
+
+static unsigned int ltq_startup_eiu_irq(struct irq_data *d)
+{
+	int i;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ltq_enable_irq(d);
+	for (i = 0; i < MAX_EIU; i++) {
+		if (irq_nr == ltq_eiu_irq[i]) {
+			/* low level - we should really handle set_type */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) |
+				(0x6 << (i * 4)), LTQ_EIU_EXIN_C);
+			/* clear all pending */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INIC) & ~(1 << i),
+				LTQ_EIU_EXIN_INIC);
+			/* enable */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) | (1 << i),
+				LTQ_EIU_EXIN_INEN);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void ltq_shutdown_eiu_irq(struct irq_data *d)
+{
+	int i;
+	int irq_nr = d->irq - INT_NUM_IRQ0;
+
+	ltq_disable_irq(d);
+	for (i = 0; i < MAX_EIU; i++) {
+		if (irq_nr == ltq_eiu_irq[i]) {
+			/* disable */
+			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~(1 << i),
+				LTQ_EIU_EXIN_INEN);
+			break;
+		}
+	}
+}
+
+static struct irq_chip ltq_irq_type = {
+	"icu",
+	.irq_enable = ltq_enable_irq,
+	.irq_disable = ltq_disable_irq,
+	.irq_unmask = ltq_enable_irq,
+	.irq_ack = ltq_ack_irq,
+	.irq_mask = ltq_disable_irq,
+	.irq_mask_ack = ltq_mask_and_ack_irq,
+};
+
+static struct irq_chip ltq_eiu_type = {
+	"eiu",
+	.irq_startup = ltq_startup_eiu_irq,
+	.irq_shutdown = ltq_shutdown_eiu_irq,
+	.irq_enable = ltq_enable_irq,
+	.irq_disable = ltq_disable_irq,
+	.irq_unmask = ltq_enable_irq,
+	.irq_ack = ltq_ack_irq,
+	.irq_mask = ltq_disable_irq,
+	.irq_mask_ack = ltq_mask_and_ack_irq,
+};
+
+static void ltq_hw_irqdispatch(int module)
+{
+	u32 irq;
+
+	irq = ltq_icu_r32(LTQ_ICU_IM0_IOSR + (module * LTQ_ICU_OFFSET));
+	if (irq == 0)
+		return;
+
+	/* silicon bug causes only the msb set to 1 to be valid. all
+	 * other bits might be bogus
+	 */
+	irq = __fls(irq);
+	do_IRQ((int)irq + INT_NUM_IM0_IRL0 + (INT_NUM_IM_OFFSET * module));
+
+	/* if this is a EBU irq, we need to ack it or get a deadlock */
+	if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0))
+		ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_ISTAT) | 0x10,
+			LTQ_EBU_PCC_ISTAT);
+}
+
+#define DEFINE_HWx_IRQDISPATCH(x)					\
+	static void ltq_hw ## x ## _irqdispatch(void)			\
+	{								\
+		ltq_hw_irqdispatch(x);					\
+	}
+DEFINE_HWx_IRQDISPATCH(0)
+DEFINE_HWx_IRQDISPATCH(1)
+DEFINE_HWx_IRQDISPATCH(2)
+DEFINE_HWx_IRQDISPATCH(3)
+DEFINE_HWx_IRQDISPATCH(4)
+
+static void ltq_hw5_irqdispatch(void)
+{
+	do_IRQ(MIPS_CPU_TIMER_IRQ);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
+	unsigned int i;
+
+	if (pending & CAUSEF_IP7) {
+		do_IRQ(MIPS_CPU_TIMER_IRQ);
+		goto out;
+	} else {
+		for (i = 0; i < 5; i++) {
+			if (pending & (CAUSEF_IP2 << i)) {
+				ltq_hw_irqdispatch(i);
+				goto out;
+			}
+		}
+	}
+	pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());
+
+out:
+	return;
+}
+
+static struct irqaction cascade = {
+	.handler = no_action,
+	.flags = IRQF_DISABLED,
+	.name = "cascade",
+};
+
+void __init arch_init_irq(void)
+{
+	int i;
+
+	if (insert_resource(&iomem_resource, &ltq_icu_resource) < 0)
+		panic("Failed to insert icu memory\n");
+
+	if (request_mem_region(ltq_icu_resource.start,
+			resource_size(&ltq_icu_resource), "icu") < 0)
+		panic("Failed to request icu memory\n");
+
+	ltq_icu_membase = ioremap_nocache(ltq_icu_resource.start,
+				resource_size(&ltq_icu_resource));
+	if (!ltq_icu_membase)
+		panic("Failed to remap icu memory\n");
+
+	if (insert_resource(&iomem_resource, &ltq_eiu_resource) < 0)
+		panic("Failed to insert eiu memory\n");
+
+	if (request_mem_region(ltq_eiu_resource.start,
+			resource_size(&ltq_eiu_resource), "eiu") < 0)
+		panic("Failed to request eiu memory\n");
+
+	ltq_eiu_membase = ioremap_nocache(ltq_eiu_resource.start,
+				resource_size(&ltq_eiu_resource));
+	if (!ltq_eiu_membase)
+		panic("Failed to remap eiu memory\n");
+
+	/* make sure all irqs are turned off by default */
+	for (i = 0; i < 5; i++)
+		ltq_icu_w32(0, LTQ_ICU_IM0_IER + (i * LTQ_ICU_OFFSET));
+
+	/* clear all possibly pending interrupts */
+	ltq_icu_w32(~0, LTQ_ICU_IM0_ISR + (i * LTQ_ICU_OFFSET));
+
+	mips_cpu_irq_init();
+
+	for (i = 2; i <= 6; i++)
+		setup_irq(i, &cascade);
+
+	if (cpu_has_vint) {
+		pr_info("Setting up vectored interrupts\n");
+		set_vi_handler(2, ltq_hw0_irqdispatch);
+		set_vi_handler(3, ltq_hw1_irqdispatch);
+		set_vi_handler(4, ltq_hw2_irqdispatch);
+		set_vi_handler(5, ltq_hw3_irqdispatch);
+		set_vi_handler(6, ltq_hw4_irqdispatch);
+		set_vi_handler(7, ltq_hw5_irqdispatch);
+	}
+
+	for (i = INT_NUM_IRQ0;
+		i <= (INT_NUM_IRQ0 + (5 * INT_NUM_IM_OFFSET)); i++)
+		if ((i == LTQ_EIU_IR0) || (i == LTQ_EIU_IR1) ||
+			(i == LTQ_EIU_IR2))
+			irq_set_chip_and_handler(i, &ltq_eiu_type,
+				handle_level_irq);
+		/* EIU3-5 only exist on ar9 and vr9 */
+		else if (((i == LTQ_EIU_IR3) || (i == LTQ_EIU_IR4) ||
+			(i == LTQ_EIU_IR5)) && (ltq_is_ar9() || ltq_is_vr9()))
+			irq_set_chip_and_handler(i, &ltq_eiu_type,
+				handle_level_irq);
+		else
+			irq_set_chip_and_handler(i, &ltq_irq_type,
+				handle_level_irq);
+
+#if !defined(CONFIG_MIPS_MT_SMP) && !defined(CONFIG_MIPS_MT_SMTC)
+	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 |
+		IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
+#else
+	set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 |
+		IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5);
+#endif
+}
+
+unsigned int __cpuinit get_c0_compare_int(void)
+{
+	return CP0_LEGACY_COMPARE_IRQ;
+}
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
new file mode 100644
index 000000000000..56ba007bf1e5
--- /dev/null
+++ b/arch/mips/lantiq/prom.c
@@ -0,0 +1,71 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq.h>
+
+#include "prom.h"
+#include "clk.h"
+
+static struct ltq_soc_info soc_info;
+
+unsigned int ltq_get_cpu_ver(void)
+{
+	return soc_info.rev;
+}
+EXPORT_SYMBOL(ltq_get_cpu_ver);
+
+unsigned int ltq_get_soc_type(void)
+{
+	return soc_info.type;
+}
+EXPORT_SYMBOL(ltq_get_soc_type);
+
+const char *get_system_type(void)
+{
+	return soc_info.sys_type;
+}
+
+void prom_free_prom_memory(void)
+{
+}
+
+static void __init prom_init_cmdline(void)
+{
+	int argc = fw_arg0;
+	char **argv = (char **) KSEG1ADDR(fw_arg1);
+	int i;
+
+	for (i = 0; i < argc; i++) {
+		char *p = (char *)  KSEG1ADDR(argv[i]);
+
+		if (p && *p) {
+			strlcat(arcs_cmdline, p, sizeof(arcs_cmdline));
+			strlcat(arcs_cmdline, " ", sizeof(arcs_cmdline));
+		}
+	}
+}
+
+void __init prom_init(void)
+{
+	struct clk *clk;
+
+	ltq_soc_detect(&soc_info);
+	clk_init();
+	clk = clk_get(0, "cpu");
+	snprintf(soc_info.sys_type, LTQ_SYS_TYPE_LEN - 1, "%s rev1.%d",
+		soc_info.name, soc_info.rev);
+	clk_put(clk);
+	soc_info.sys_type[LTQ_SYS_TYPE_LEN - 1] = '\0';
+	pr_info("SoC: %s\n", soc_info.sys_type);
+	prom_init_cmdline();
+}
diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h
new file mode 100644
index 000000000000..4165ad156782
--- /dev/null
+++ b/arch/mips/lantiq/prom.h
@@ -0,0 +1,24 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_PROM_H__
+#define _LTQ_PROM_H__
+
+#define LTQ_SYS_TYPE_LEN	0x100
+
+struct ltq_soc_info {
+	unsigned char *name;
+	unsigned int rev;
+	unsigned int partnum;
+	unsigned int type;
+	unsigned char sys_type[LTQ_SYS_TYPE_LEN];
+};
+
+extern void ltq_soc_detect(struct ltq_soc_info *i);
+
+#endif
diff --git a/arch/mips/lantiq/setup.c b/arch/mips/lantiq/setup.c
new file mode 100644
index 000000000000..79a2b0c5cc65
--- /dev/null
+++ b/arch/mips/lantiq/setup.c
@@ -0,0 +1,41 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <asm/bootinfo.h>
+
+#include <lantiq_soc.h>
+
+void __init plat_mem_setup(void)
+{
+	/* assume 16M as default incase uboot fails to pass proper ramsize */
+	unsigned long memsize = 16;
+	char **envp = (char **) KSEG1ADDR(fw_arg2);
+
+	ioport_resource.start = IOPORT_RESOURCE_START;
+	ioport_resource.end = IOPORT_RESOURCE_END;
+	iomem_resource.start = IOMEM_RESOURCE_START;
+	iomem_resource.end = IOMEM_RESOURCE_END;
+
+	set_io_port_base((unsigned long) KSEG1);
+
+	while (*envp) {
+		char *e = (char *)KSEG1ADDR(*envp);
+		if (!strncmp(e, "memsize=", 8)) {
+			e += 8;
+			if (strict_strtoul(e, 0, &memsize))
+				pr_warn("bad memsize specified\n");
+		}
+		envp++;
+	}
+	memsize *= 1024 * 1024;
+	add_memory_region(0x00000000, memsize, BOOT_MEM_RAM);
+}
-- 
cgit v1.2.3


From 8ec6d93508f705dacafd5fcd058c69ef405002f9 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:48 +0200
Subject: MIPS: Lantiq: add SoC specific code for XWAY family

Add support for the Lantiq XWAY family of Mips24KEc SoCs.

* Danube (PSB50702)
* Twinpass (PSB4000)
* AR9 (PSB50802)
* Amazon SE (PSB5061)

The Amazon SE is a lightweight SoC and has no PCI as well as a different
clock. We split the code out into seperate files to handle this.

The GPIO pins on the SoCs are multi function and there are several bits
we can use to configure the pins. To be as compatible as possible to
GPIOLIB we add a function

int lq_gpio_request(unsigned int pin, unsigned int alt0,
        unsigned int alt1, unsigned int dir, const char *name);

which lets you configure the 2 "alternate function" bits. This way drivers like
PCI can make use of GPIOLIB without a cubersome wrapper.

The PLL code inside arch/mips/lantiq/xway/clk-xway.c is voodoo to me. It was
taken from a 2.4.20 source tree and was never really changed by me since then.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2249/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                                  |   1 +
 arch/mips/include/asm/mach-lantiq/xway/irq.h       |  18 ++
 .../mips/include/asm/mach-lantiq/xway/lantiq_irq.h |  66 ++++++
 .../mips/include/asm/mach-lantiq/xway/lantiq_soc.h | 140 +++++++++++++
 arch/mips/lantiq/Kconfig                           |  21 ++
 arch/mips/lantiq/Makefile                          |   2 +
 arch/mips/lantiq/Platform                          |   1 +
 arch/mips/lantiq/xway/Makefile                     |   4 +
 arch/mips/lantiq/xway/clk-ase.c                    |  48 +++++
 arch/mips/lantiq/xway/clk-xway.c                   | 223 +++++++++++++++++++++
 arch/mips/lantiq/xway/ebu.c                        |  53 +++++
 arch/mips/lantiq/xway/gpio.c                       | 195 ++++++++++++++++++
 arch/mips/lantiq/xway/pmu.c                        |  70 +++++++
 arch/mips/lantiq/xway/prom-ase.c                   |  39 ++++
 arch/mips/lantiq/xway/prom-xway.c                  |  54 +++++
 arch/mips/lantiq/xway/reset.c                      |  91 +++++++++
 16 files changed, 1026 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-lantiq/xway/irq.h
 create mode 100644 arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
 create mode 100644 arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
 create mode 100644 arch/mips/lantiq/Kconfig
 create mode 100644 arch/mips/lantiq/xway/Makefile
 create mode 100644 arch/mips/lantiq/xway/clk-ase.c
 create mode 100644 arch/mips/lantiq/xway/clk-xway.c
 create mode 100644 arch/mips/lantiq/xway/ebu.c
 create mode 100644 arch/mips/lantiq/xway/gpio.c
 create mode 100644 arch/mips/lantiq/xway/pmu.c
 create mode 100644 arch/mips/lantiq/xway/prom-ase.c
 create mode 100644 arch/mips/lantiq/xway/prom-xway.c
 create mode 100644 arch/mips/lantiq/xway/reset.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 1787572a76c3..b3b49996462e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -787,6 +787,7 @@ source "arch/mips/ath79/Kconfig"
 source "arch/mips/bcm63xx/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
+source "arch/mips/lantiq/Kconfig"
 source "arch/mips/lasat/Kconfig"
 source "arch/mips/pmc-sierra/Kconfig"
 source "arch/mips/powertv/Kconfig"
diff --git a/arch/mips/include/asm/mach-lantiq/xway/irq.h b/arch/mips/include/asm/mach-lantiq/xway/irq.h
new file mode 100644
index 000000000000..a1471d2dd0d2
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/irq.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef __LANTIQ_IRQ_H
+#define __LANTIQ_IRQ_H
+
+#include <lantiq_irq.h>
+
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
new file mode 100644
index 000000000000..b4465a888e20
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h
@@ -0,0 +1,66 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_XWAY_IRQ_H__
+#define _LANTIQ_XWAY_IRQ_H__
+
+#define INT_NUM_IRQ0		8
+#define INT_NUM_IM0_IRL0	(INT_NUM_IRQ0 + 0)
+#define INT_NUM_IM1_IRL0	(INT_NUM_IRQ0 + 32)
+#define INT_NUM_IM2_IRL0	(INT_NUM_IRQ0 + 64)
+#define INT_NUM_IM3_IRL0	(INT_NUM_IRQ0 + 96)
+#define INT_NUM_IM4_IRL0	(INT_NUM_IRQ0 + 128)
+#define INT_NUM_IM_OFFSET	(INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0)
+
+#define LTQ_ASC_TIR(x)		(INT_NUM_IM3_IRL0 + (x * 8))
+#define LTQ_ASC_RIR(x)		(INT_NUM_IM3_IRL0 + (x * 8) + 1)
+#define LTQ_ASC_EIR(x)		(INT_NUM_IM3_IRL0 + (x * 8) + 2)
+
+#define LTQ_ASC_ASE_TIR		INT_NUM_IM2_IRL0
+#define LTQ_ASC_ASE_RIR		(INT_NUM_IM2_IRL0 + 2)
+#define LTQ_ASC_ASE_EIR		(INT_NUM_IM2_IRL0 + 3)
+
+#define LTQ_SSC_TIR		(INT_NUM_IM0_IRL0 + 15)
+#define LTQ_SSC_RIR		(INT_NUM_IM0_IRL0 + 14)
+#define LTQ_SSC_EIR		(INT_NUM_IM0_IRL0 + 16)
+
+#define LTQ_MEI_DYING_GASP_INT	(INT_NUM_IM1_IRL0 + 21)
+#define LTQ_MEI_INT		(INT_NUM_IM1_IRL0 + 23)
+
+#define LTQ_TIMER6_INT		(INT_NUM_IM1_IRL0 + 23)
+#define LTQ_USB_INT		(INT_NUM_IM1_IRL0 + 22)
+#define LTQ_USB_OC_INT		(INT_NUM_IM4_IRL0 + 23)
+
+#define MIPS_CPU_TIMER_IRQ		7
+
+#define LTQ_DMA_CH0_INT		(INT_NUM_IM2_IRL0)
+#define LTQ_DMA_CH1_INT		(INT_NUM_IM2_IRL0 + 1)
+#define LTQ_DMA_CH2_INT		(INT_NUM_IM2_IRL0 + 2)
+#define LTQ_DMA_CH3_INT		(INT_NUM_IM2_IRL0 + 3)
+#define LTQ_DMA_CH4_INT		(INT_NUM_IM2_IRL0 + 4)
+#define LTQ_DMA_CH5_INT		(INT_NUM_IM2_IRL0 + 5)
+#define LTQ_DMA_CH6_INT		(INT_NUM_IM2_IRL0 + 6)
+#define LTQ_DMA_CH7_INT		(INT_NUM_IM2_IRL0 + 7)
+#define LTQ_DMA_CH8_INT		(INT_NUM_IM2_IRL0 + 8)
+#define LTQ_DMA_CH9_INT		(INT_NUM_IM2_IRL0 + 9)
+#define LTQ_DMA_CH10_INT	(INT_NUM_IM2_IRL0 + 10)
+#define LTQ_DMA_CH11_INT	(INT_NUM_IM2_IRL0 + 11)
+#define LTQ_DMA_CH12_INT	(INT_NUM_IM2_IRL0 + 25)
+#define LTQ_DMA_CH13_INT	(INT_NUM_IM2_IRL0 + 26)
+#define LTQ_DMA_CH14_INT	(INT_NUM_IM2_IRL0 + 27)
+#define LTQ_DMA_CH15_INT	(INT_NUM_IM2_IRL0 + 28)
+#define LTQ_DMA_CH16_INT	(INT_NUM_IM2_IRL0 + 29)
+#define LTQ_DMA_CH17_INT	(INT_NUM_IM2_IRL0 + 30)
+#define LTQ_DMA_CH18_INT	(INT_NUM_IM2_IRL0 + 16)
+#define LTQ_DMA_CH19_INT	(INT_NUM_IM2_IRL0 + 21)
+
+#define LTQ_PPE_MBOX_INT	(INT_NUM_IM2_IRL0 + 24)
+
+#define INT_NUM_IM4_IRL14	(INT_NUM_IM4_IRL0 + 14)
+
+#endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
new file mode 100644
index 000000000000..343e82cbf601
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
@@ -0,0 +1,140 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_XWAY_H__
+#define _LTQ_XWAY_H__
+
+#ifdef CONFIG_SOC_TYPE_XWAY
+
+#include <lantiq.h>
+
+/* Chip IDs */
+#define SOC_ID_DANUBE1		0x129
+#define SOC_ID_DANUBE2		0x12B
+#define SOC_ID_TWINPASS		0x12D
+#define SOC_ID_AMAZON_SE	0x152
+#define SOC_ID_ARX188		0x16C
+#define SOC_ID_ARX168		0x16D
+#define SOC_ID_ARX182		0x16F
+
+/* SoC Types */
+#define SOC_TYPE_DANUBE		0x01
+#define SOC_TYPE_TWINPASS	0x02
+#define SOC_TYPE_AR9		0x03
+#define SOC_TYPE_VR9		0x04
+#define SOC_TYPE_AMAZON_SE	0x05
+
+/* ASC0/1 - serial port */
+#define LTQ_ASC0_BASE_ADDR	0x1E100400
+#define LTQ_ASC1_BASE_ADDR	0x1E100C00
+#define LTQ_ASC_SIZE		0x400
+
+/* RCU - reset control unit */
+#define LTQ_RCU_BASE_ADDR	0x1F203000
+#define LTQ_RCU_SIZE		0x1000
+
+/* GPTU - general purpose timer unit */
+#define LTQ_GPTU_BASE_ADDR	0x18000300
+#define LTQ_GPTU_SIZE		0x100
+
+/* EBU - external bus unit */
+#define LTQ_EBU_GPIO_START	0x14000000
+#define LTQ_EBU_GPIO_SIZE	0x1000
+
+#define LTQ_EBU_BASE_ADDR	0x1E105300
+#define LTQ_EBU_SIZE		0x100
+
+#define LTQ_EBU_BUSCON0		0x0060
+#define LTQ_EBU_PCC_CON		0x0090
+#define LTQ_EBU_PCC_IEN		0x00A4
+#define LTQ_EBU_PCC_ISTAT	0x00A0
+#define LTQ_EBU_BUSCON1		0x0064
+#define LTQ_EBU_ADDRSEL1	0x0024
+#define EBU_WRDIS		0x80000000
+
+/* CGU - clock generation unit */
+#define LTQ_CGU_BASE_ADDR	0x1F103000
+#define LTQ_CGU_SIZE		0x1000
+
+/* ICU - interrupt control unit */
+#define LTQ_ICU_BASE_ADDR	0x1F880200
+#define LTQ_ICU_SIZE		0x100
+
+/* EIU - external interrupt unit */
+#define LTQ_EIU_BASE_ADDR	0x1F101000
+#define LTQ_EIU_SIZE		0x1000
+
+/* PMU - power management unit */
+#define LTQ_PMU_BASE_ADDR	0x1F102000
+#define LTQ_PMU_SIZE		0x1000
+
+#define PMU_DMA			0x0020
+#define PMU_USB			0x8041
+#define PMU_LED			0x0800
+#define PMU_GPT			0x1000
+#define PMU_PPE			0x2000
+#define PMU_FPI			0x4000
+#define PMU_SWITCH		0x10000000
+
+/* ETOP - ethernet */
+#define LTQ_PPE32_BASE_ADDR	0xBE180000
+#define LTQ_PPE32_SIZE		0x40000
+
+/* DMA */
+#define LTQ_DMA_BASE_ADDR	0xBE104100
+
+/* PCI */
+#define PCI_CR_BASE_ADDR	0x1E105400
+#define PCI_CR_SIZE		0x400
+
+/* WDT */
+#define LTQ_WDT_BASE_ADDR	0x1F8803F0
+#define LTQ_WDT_SIZE		0x10
+
+/* STP - serial to parallel conversion unit */
+#define LTQ_STP_BASE_ADDR	0x1E100BB0
+#define LTQ_STP_SIZE		0x40
+
+/* GPIO */
+#define LTQ_GPIO0_BASE_ADDR	0x1E100B10
+#define LTQ_GPIO1_BASE_ADDR	0x1E100B40
+#define LTQ_GPIO2_BASE_ADDR	0x1E100B70
+#define LTQ_GPIO_SIZE		0x30
+
+/* SSC */
+#define LTQ_SSC_BASE_ADDR	0x1e100800
+#define LTQ_SSC_SIZE		0x100
+
+/* MEI - dsl core */
+#define LTQ_MEI_BASE_ADDR	0x1E116000
+
+/* DEU - data encryption unit */
+#define LTQ_DEU_BASE_ADDR	0x1E103100
+
+/* MPS - multi processor unit (voice) */
+#define LTQ_MPS_BASE_ADDR	(KSEG1 + 0x1F107000)
+#define LTQ_MPS_CHIPID		((u32 *)(LTQ_MPS_BASE_ADDR + 0x0344))
+
+/* request a non-gpio and set the PIO config */
+extern int  ltq_gpio_request(unsigned int pin, unsigned int alt0,
+	unsigned int alt1, unsigned int dir, const char *name);
+extern void ltq_pmu_enable(unsigned int module);
+extern void ltq_pmu_disable(unsigned int module);
+
+static inline int ltq_is_ar9(void)
+{
+	return (ltq_get_soc_type() == SOC_TYPE_AR9);
+}
+
+static inline int ltq_is_vr9(void)
+{
+	return (ltq_get_soc_type() == SOC_TYPE_VR9);
+}
+
+#endif /* CONFIG_SOC_TYPE_XWAY */
+#endif /* _LTQ_XWAY_H__ */
diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig
new file mode 100644
index 000000000000..2780461e3258
--- /dev/null
+++ b/arch/mips/lantiq/Kconfig
@@ -0,0 +1,21 @@
+if LANTIQ
+
+config SOC_TYPE_XWAY
+	bool
+	default n
+
+choice
+	prompt "SoC Type"
+	default SOC_XWAY
+
+config SOC_AMAZON_SE
+	bool "Amazon SE"
+	select SOC_TYPE_XWAY
+
+config SOC_XWAY
+	bool "XWAY"
+	select SOC_TYPE_XWAY
+	select HW_HAS_PCI
+endchoice
+
+endif
diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile
index 6a30de671f23..a268391eb45e 100644
--- a/arch/mips/lantiq/Makefile
+++ b/arch/mips/lantiq/Makefile
@@ -7,3 +7,5 @@
 obj-y := irq.o setup.o clk.o prom.o
 
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+
+obj-$(CONFIG_SOC_TYPE_XWAY) += xway/
diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform
index eef587f2d872..f3dff05722de 100644
--- a/arch/mips/lantiq/Platform
+++ b/arch/mips/lantiq/Platform
@@ -5,3 +5,4 @@
 platform-$(CONFIG_LANTIQ)	+= lantiq/
 cflags-$(CONFIG_LANTIQ)		+= -I$(srctree)/arch/mips/include/asm/mach-lantiq
 load-$(CONFIG_LANTIQ)		= 0xffffffff80002000
+cflags-$(CONFIG_SOC_TYPE_XWAY)	+= -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
new file mode 100644
index 000000000000..9c85ff9184c6
--- /dev/null
+++ b/arch/mips/lantiq/xway/Makefile
@@ -0,0 +1,4 @@
+obj-y := pmu.o ebu.o reset.o gpio.o
+
+obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o
+obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o
diff --git a/arch/mips/lantiq/xway/clk-ase.c b/arch/mips/lantiq/xway/clk-ase.c
new file mode 100644
index 000000000000..22d823acd536
--- /dev/null
+++ b/arch/mips/lantiq/xway/clk-ase.c
@@ -0,0 +1,48 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+/* cgu registers */
+#define LTQ_CGU_SYS	0x0010
+
+unsigned int ltq_get_io_region_clock(void)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_io_region_clock);
+
+unsigned int ltq_get_fpi_bus_clock(int fpi)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_fpi_bus_clock);
+
+unsigned int ltq_get_cpu_hz(void)
+{
+	if (ltq_cgu_r32(LTQ_CGU_SYS) & (1 << 5))
+		return CLOCK_266M;
+	else
+		return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_cpu_hz);
+
+unsigned int ltq_get_fpi_hz(void)
+{
+	return CLOCK_133M;
+}
+EXPORT_SYMBOL(ltq_get_fpi_hz);
diff --git a/arch/mips/lantiq/xway/clk-xway.c b/arch/mips/lantiq/xway/clk-xway.c
new file mode 100644
index 000000000000..ddd39593c581
--- /dev/null
+++ b/arch/mips/lantiq/xway/clk-xway.c
@@ -0,0 +1,223 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/clk.h>
+
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/div64.h>
+
+#include <lantiq_soc.h>
+
+static unsigned int ltq_ram_clocks[] = {
+	CLOCK_167M, CLOCK_133M, CLOCK_111M, CLOCK_83M };
+#define DDR_HZ ltq_ram_clocks[ltq_cgu_r32(LTQ_CGU_SYS) & 0x3]
+
+#define BASIC_FREQUENCY_1	35328000
+#define BASIC_FREQUENCY_2	36000000
+#define BASIS_REQUENCY_USB	12000000
+
+#define GET_BITS(x, msb, lsb) \
+	(((x) & ((1 << ((msb) + 1)) - 1)) >> (lsb))
+
+#define LTQ_CGU_PLL0_CFG	0x0004
+#define LTQ_CGU_PLL1_CFG	0x0008
+#define LTQ_CGU_PLL2_CFG	0x000C
+#define LTQ_CGU_SYS		0x0010
+#define LTQ_CGU_UPDATE		0x0014
+#define LTQ_CGU_IF_CLK		0x0018
+#define LTQ_CGU_OSC_CON		0x001C
+#define LTQ_CGU_SMD		0x0020
+#define LTQ_CGU_CT1SR		0x0028
+#define LTQ_CGU_CT2SR		0x002C
+#define LTQ_CGU_PCMCR		0x0030
+#define LTQ_CGU_PCI_CR		0x0034
+#define LTQ_CGU_PD_PC		0x0038
+#define LTQ_CGU_FMR		0x003C
+
+#define CGU_PLL0_PHASE_DIVIDER_ENABLE	\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 31))
+#define CGU_PLL0_BYPASS			\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 30))
+#define CGU_PLL0_CFG_DSMSEL		\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 28))
+#define CGU_PLL0_CFG_FRAC_EN		\
+	(ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & (1 << 27))
+#define CGU_PLL1_SRC			\
+	(ltq_cgu_r32(LTQ_CGU_PLL1_CFG) & (1 << 31))
+#define CGU_PLL2_PHASE_DIVIDER_ENABLE	\
+	(ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & (1 << 20))
+#define CGU_SYS_FPI_SEL			(1 << 6)
+#define CGU_SYS_DDR_SEL			0x3
+#define CGU_PLL0_SRC			(1 << 29)
+
+#define CGU_PLL0_CFG_PLLK	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 26, 17)
+#define CGU_PLL0_CFG_PLLN	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 12, 6)
+#define CGU_PLL0_CFG_PLLM	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL0_CFG), 5, 2)
+#define CGU_PLL2_SRC		GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 18, 17)
+#define CGU_PLL2_CFG_INPUT_DIV	GET_BITS(ltq_cgu_r32(LTQ_CGU_PLL2_CFG), 16, 13)
+
+static unsigned int ltq_get_pll0_fdiv(void);
+
+static inline unsigned int get_input_clock(int pll)
+{
+	switch (pll) {
+	case 0:
+		if (ltq_cgu_r32(LTQ_CGU_PLL0_CFG) & CGU_PLL0_SRC)
+			return BASIS_REQUENCY_USB;
+		else if (CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			return BASIC_FREQUENCY_1;
+		else
+			return BASIC_FREQUENCY_2;
+	case 1:
+		if (CGU_PLL1_SRC)
+			return BASIS_REQUENCY_USB;
+		else if (CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			return BASIC_FREQUENCY_1;
+		else
+			return BASIC_FREQUENCY_2;
+	case 2:
+		switch (CGU_PLL2_SRC) {
+		case 0:
+			return ltq_get_pll0_fdiv();
+		case 1:
+			return CGU_PLL2_PHASE_DIVIDER_ENABLE ?
+				BASIC_FREQUENCY_1 :
+				BASIC_FREQUENCY_2;
+		case 2:
+			return BASIS_REQUENCY_USB;
+		}
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned int cal_dsm(int pll, unsigned int num, unsigned int den)
+{
+	u64 res, clock = get_input_clock(pll);
+
+	res = num * clock;
+	do_div(res, den);
+	return res;
+}
+
+static inline unsigned int mash_dsm(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = ((N + 1) << 10) + K;
+	unsigned int den = (M + 1) << 10;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int ssff_dsm_1(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = ((N + 1) << 11) + K + 512;
+	unsigned int den = (M + 1) << 11;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int ssff_dsm_2(int pll, unsigned int M, unsigned int N,
+	unsigned int K)
+{
+	unsigned int num = K >= 512 ?
+		((N + 1) << 12) + K - 512 : ((N + 1) << 12) + K + 3584;
+	unsigned int den = (M + 1) << 12;
+
+	return cal_dsm(pll, num, den);
+}
+
+static inline unsigned int dsm(int pll, unsigned int M, unsigned int N,
+	unsigned int K, unsigned int dsmsel, unsigned int phase_div_en)
+{
+	if (!dsmsel)
+		return mash_dsm(pll, M, N, K);
+	else if (!phase_div_en)
+		return mash_dsm(pll, M, N, K);
+	else
+		return ssff_dsm_2(pll, M, N, K);
+}
+
+static inline unsigned int ltq_get_pll0_fosc(void)
+{
+	if (CGU_PLL0_BYPASS)
+		return get_input_clock(0);
+	else
+		return !CGU_PLL0_CFG_FRAC_EN
+			? dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN, 0,
+				CGU_PLL0_CFG_DSMSEL,
+				CGU_PLL0_PHASE_DIVIDER_ENABLE)
+			: dsm(0, CGU_PLL0_CFG_PLLM, CGU_PLL0_CFG_PLLN,
+				CGU_PLL0_CFG_PLLK, CGU_PLL0_CFG_DSMSEL,
+				CGU_PLL0_PHASE_DIVIDER_ENABLE);
+}
+
+static unsigned int ltq_get_pll0_fdiv(void)
+{
+	unsigned int div = CGU_PLL2_CFG_INPUT_DIV + 1;
+
+	return (ltq_get_pll0_fosc() + (div >> 1)) / div;
+}
+
+unsigned int ltq_get_io_region_clock(void)
+{
+	unsigned int ret = ltq_get_pll0_fosc();
+
+	switch (ltq_cgu_r32(LTQ_CGU_PLL2_CFG) & CGU_SYS_DDR_SEL) {
+	default:
+	case 0:
+		return (ret + 1) / 2;
+	case 1:
+		return (ret * 2 + 2) / 5;
+	case 2:
+		return (ret + 1) / 3;
+	case 3:
+		return (ret + 2) / 4;
+	}
+}
+EXPORT_SYMBOL(ltq_get_io_region_clock);
+
+unsigned int ltq_get_fpi_bus_clock(int fpi)
+{
+	unsigned int ret = ltq_get_io_region_clock();
+
+	if ((fpi == 2) && (ltq_cgu_r32(LTQ_CGU_SYS) & CGU_SYS_FPI_SEL))
+		ret >>= 1;
+	return ret;
+}
+EXPORT_SYMBOL(ltq_get_fpi_bus_clock);
+
+unsigned int ltq_get_cpu_hz(void)
+{
+	switch (ltq_cgu_r32(LTQ_CGU_SYS) & 0xc) {
+	case 0:
+		return CLOCK_333M;
+	case 4:
+		return DDR_HZ;
+	case 8:
+		return DDR_HZ << 1;
+	default:
+		return DDR_HZ >> 1;
+	}
+}
+EXPORT_SYMBOL(ltq_get_cpu_hz);
+
+unsigned int ltq_get_fpi_hz(void)
+{
+	unsigned int ddr_clock = DDR_HZ;
+
+	if (ltq_cgu_r32(LTQ_CGU_SYS) & 0x40)
+		return ddr_clock >> 1;
+	return ddr_clock;
+}
+EXPORT_SYMBOL(ltq_get_fpi_hz);
diff --git a/arch/mips/lantiq/xway/ebu.c b/arch/mips/lantiq/xway/ebu.c
new file mode 100644
index 000000000000..66eb52fa50a1
--- /dev/null
+++ b/arch/mips/lantiq/xway/ebu.c
@@ -0,0 +1,53 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  EBU - the external bus unit attaches PCI, NOR and NAND
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/ioport.h>
+
+#include <lantiq_soc.h>
+
+/* all access to the ebu must be locked */
+DEFINE_SPINLOCK(ebu_lock);
+EXPORT_SYMBOL_GPL(ebu_lock);
+
+static struct resource ltq_ebu_resource = {
+	.name	= "ebu",
+	.start	= LTQ_EBU_BASE_ADDR,
+	.end	= LTQ_EBU_BASE_ADDR + LTQ_EBU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+/* remapped base addr of the clock unit and external bus unit */
+void __iomem *ltq_ebu_membase;
+
+static int __init lantiq_ebu_init(void)
+{
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_ebu_resource) < 0)
+		panic("Failed to insert ebu memory\n");
+
+	if (request_mem_region(ltq_ebu_resource.start,
+			resource_size(&ltq_ebu_resource), "ebu") < 0)
+		panic("Failed to request ebu memory\n");
+
+	/* remap ebu register range */
+	ltq_ebu_membase = ioremap_nocache(ltq_ebu_resource.start,
+				resource_size(&ltq_ebu_resource));
+	if (!ltq_ebu_membase)
+		panic("Failed to remap ebu memory\n");
+
+	/* make sure to unprotect the memory region where flash is located */
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_BUSCON0) & ~EBU_WRDIS, LTQ_EBU_BUSCON0);
+	return 0;
+}
+
+postcore_initcall(lantiq_ebu_init);
diff --git a/arch/mips/lantiq/xway/gpio.c b/arch/mips/lantiq/xway/gpio.c
new file mode 100644
index 000000000000..a321451a5455
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio.c
@@ -0,0 +1,195 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+
+#include <lantiq_soc.h>
+
+#define LTQ_GPIO_OUT		0x00
+#define LTQ_GPIO_IN		0x04
+#define LTQ_GPIO_DIR		0x08
+#define LTQ_GPIO_ALTSEL0	0x0C
+#define LTQ_GPIO_ALTSEL1	0x10
+#define LTQ_GPIO_OD		0x14
+
+#define PINS_PER_PORT		16
+#define MAX_PORTS		3
+
+#define ltq_gpio_getbit(m, r, p)	(!!(ltq_r32(m + r) & (1 << p)))
+#define ltq_gpio_setbit(m, r, p)	ltq_w32_mask(0, (1 << p), m + r)
+#define ltq_gpio_clearbit(m, r, p)	ltq_w32_mask((1 << p), 0, m + r)
+
+struct ltq_gpio {
+	void __iomem *membase;
+	struct gpio_chip chip;
+};
+
+static struct ltq_gpio ltq_gpio_port[MAX_PORTS];
+
+int gpio_to_irq(unsigned int gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(gpio_to_irq);
+
+int irq_to_gpio(unsigned int gpio)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(irq_to_gpio);
+
+int ltq_gpio_request(unsigned int pin, unsigned int alt0,
+	unsigned int alt1, unsigned int dir, const char *name)
+{
+	int id = 0;
+
+	if (pin >= (MAX_PORTS * PINS_PER_PORT))
+		return -EINVAL;
+	if (gpio_request(pin, name)) {
+		pr_err("failed to setup lantiq gpio: %s\n", name);
+		return -EBUSY;
+	}
+	if (dir)
+		gpio_direction_output(pin, 1);
+	else
+		gpio_direction_input(pin);
+	while (pin >= PINS_PER_PORT) {
+		pin -= PINS_PER_PORT;
+		id++;
+	}
+	if (alt0)
+		ltq_gpio_setbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL0, pin);
+	else
+		ltq_gpio_clearbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL0, pin);
+	if (alt1)
+		ltq_gpio_setbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL1, pin);
+	else
+		ltq_gpio_clearbit(ltq_gpio_port[id].membase,
+			LTQ_GPIO_ALTSEL1, pin);
+	return 0;
+}
+EXPORT_SYMBOL(ltq_gpio_request);
+
+static void ltq_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	if (value)
+		ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset);
+	else
+		ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OUT, offset);
+}
+
+static int ltq_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	return ltq_gpio_getbit(ltq_gpio->membase, LTQ_GPIO_IN, offset);
+}
+
+static int ltq_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_OD, offset);
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset);
+
+	return 0;
+}
+
+static int ltq_gpio_direction_output(struct gpio_chip *chip,
+	unsigned int offset, int value)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_OD, offset);
+	ltq_gpio_setbit(ltq_gpio->membase, LTQ_GPIO_DIR, offset);
+	ltq_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
+static int ltq_gpio_req(struct gpio_chip *chip, unsigned offset)
+{
+	struct ltq_gpio *ltq_gpio = container_of(chip, struct ltq_gpio, chip);
+
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL0, offset);
+	ltq_gpio_clearbit(ltq_gpio->membase, LTQ_GPIO_ALTSEL1, offset);
+	return 0;
+}
+
+static int ltq_gpio_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+
+	if (pdev->id >= MAX_PORTS) {
+		dev_err(&pdev->dev, "invalid gpio port %d\n",
+			pdev->id);
+		return -EINVAL;
+	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory for gpio port %d\n",
+			pdev->id);
+		return -ENOENT;
+	}
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev,
+			"failed to request memory for gpio port %d\n",
+			pdev->id);
+		return -EBUSY;
+	}
+	ltq_gpio_port[pdev->id].membase = devm_ioremap_nocache(&pdev->dev,
+		res->start, resource_size(res));
+	if (!ltq_gpio_port[pdev->id].membase) {
+		dev_err(&pdev->dev, "failed to remap memory for gpio port %d\n",
+			pdev->id);
+		return -ENOMEM;
+	}
+	ltq_gpio_port[pdev->id].chip.label = "ltq_gpio";
+	ltq_gpio_port[pdev->id].chip.direction_input = ltq_gpio_direction_input;
+	ltq_gpio_port[pdev->id].chip.direction_output =
+		ltq_gpio_direction_output;
+	ltq_gpio_port[pdev->id].chip.get = ltq_gpio_get;
+	ltq_gpio_port[pdev->id].chip.set = ltq_gpio_set;
+	ltq_gpio_port[pdev->id].chip.request = ltq_gpio_req;
+	ltq_gpio_port[pdev->id].chip.base = PINS_PER_PORT * pdev->id;
+	ltq_gpio_port[pdev->id].chip.ngpio = PINS_PER_PORT;
+	platform_set_drvdata(pdev, &ltq_gpio_port[pdev->id]);
+	return gpiochip_add(&ltq_gpio_port[pdev->id].chip);
+}
+
+static struct platform_driver
+ltq_gpio_driver = {
+	.probe = ltq_gpio_probe,
+	.driver = {
+		.name = "ltq_gpio",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init ltq_gpio_init(void)
+{
+	int ret = platform_driver_register(&ltq_gpio_driver);
+
+	if (ret)
+		pr_info("ltq_gpio : Error registering platfom driver!");
+	return ret;
+}
+
+postcore_initcall(ltq_gpio_init);
diff --git a/arch/mips/lantiq/xway/pmu.c b/arch/mips/lantiq/xway/pmu.c
new file mode 100644
index 000000000000..9d69f01e352b
--- /dev/null
+++ b/arch/mips/lantiq/xway/pmu.c
@@ -0,0 +1,70 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/ioport.h>
+
+#include <lantiq_soc.h>
+
+/* PMU - the power management unit allows us to turn part of the core
+ * on and off
+ */
+
+/* the enable / disable registers */
+#define LTQ_PMU_PWDCR	0x1C
+#define LTQ_PMU_PWDSR	0x20
+
+#define ltq_pmu_w32(x, y)	ltq_w32((x), ltq_pmu_membase + (y))
+#define ltq_pmu_r32(x)		ltq_r32(ltq_pmu_membase + (x))
+
+static struct resource ltq_pmu_resource = {
+	.name	= "pmu",
+	.start	= LTQ_PMU_BASE_ADDR,
+	.end	= LTQ_PMU_BASE_ADDR + LTQ_PMU_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_pmu_membase;
+
+void ltq_pmu_enable(unsigned int module)
+{
+	int err = 1000000;
+
+	ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) & ~module, LTQ_PMU_PWDCR);
+	do {} while (--err && (ltq_pmu_r32(LTQ_PMU_PWDSR) & module));
+
+	if (!err)
+		panic("activating PMU module failed!\n");
+}
+EXPORT_SYMBOL(ltq_pmu_enable);
+
+void ltq_pmu_disable(unsigned int module)
+{
+	ltq_pmu_w32(ltq_pmu_r32(LTQ_PMU_PWDCR) | module, LTQ_PMU_PWDCR);
+}
+EXPORT_SYMBOL(ltq_pmu_disable);
+
+int __init ltq_pmu_init(void)
+{
+	if (insert_resource(&iomem_resource, &ltq_pmu_resource) < 0)
+		panic("Failed to insert pmu memory\n");
+
+	if (request_mem_region(ltq_pmu_resource.start,
+			resource_size(&ltq_pmu_resource), "pmu") < 0)
+		panic("Failed to request pmu memory\n");
+
+	ltq_pmu_membase = ioremap_nocache(ltq_pmu_resource.start,
+				resource_size(&ltq_pmu_resource));
+	if (!ltq_pmu_membase)
+		panic("Failed to remap pmu memory\n");
+	return 0;
+}
+
+core_initcall(ltq_pmu_init);
diff --git a/arch/mips/lantiq/xway/prom-ase.c b/arch/mips/lantiq/xway/prom-ase.c
new file mode 100644
index 000000000000..abe49f4db57f
--- /dev/null
+++ b/arch/mips/lantiq/xway/prom-ase.c
@@ -0,0 +1,39 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+
+#define SOC_AMAZON_SE	"Amazon_SE"
+
+#define PART_SHIFT	12
+#define PART_MASK	0x0FFFFFFF
+#define REV_SHIFT	28
+#define REV_MASK	0xF0000000
+
+void __init ltq_soc_detect(struct ltq_soc_info *i)
+{
+	i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT;
+	i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT;
+	switch (i->partnum) {
+	case SOC_ID_AMAZON_SE:
+		i->name = SOC_AMAZON_SE;
+		i->type = SOC_TYPE_AMAZON_SE;
+		break;
+
+	default:
+		unreachable();
+		break;
+	}
+}
diff --git a/arch/mips/lantiq/xway/prom-xway.c b/arch/mips/lantiq/xway/prom-xway.c
new file mode 100644
index 000000000000..1686692ac24d
--- /dev/null
+++ b/arch/mips/lantiq/xway/prom-xway.c
@@ -0,0 +1,54 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <asm/bootinfo.h>
+#include <asm/time.h>
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+
+#define SOC_DANUBE	"Danube"
+#define SOC_TWINPASS	"Twinpass"
+#define SOC_AR9		"AR9"
+
+#define PART_SHIFT	12
+#define PART_MASK	0x0FFFFFFF
+#define REV_SHIFT	28
+#define REV_MASK	0xF0000000
+
+void __init ltq_soc_detect(struct ltq_soc_info *i)
+{
+	i->partnum = (ltq_r32(LTQ_MPS_CHIPID) & PART_MASK) >> PART_SHIFT;
+	i->rev = (ltq_r32(LTQ_MPS_CHIPID) & REV_MASK) >> REV_SHIFT;
+	switch (i->partnum) {
+	case SOC_ID_DANUBE1:
+	case SOC_ID_DANUBE2:
+		i->name = SOC_DANUBE;
+		i->type = SOC_TYPE_DANUBE;
+		break;
+
+	case SOC_ID_TWINPASS:
+		i->name = SOC_TWINPASS;
+		i->type = SOC_TYPE_DANUBE;
+		break;
+
+	case SOC_ID_ARX188:
+	case SOC_ID_ARX168:
+	case SOC_ID_ARX182:
+		i->name = SOC_AR9;
+		i->type = SOC_TYPE_AR9;
+		break;
+
+	default:
+		unreachable();
+		break;
+	}
+}
diff --git a/arch/mips/lantiq/xway/reset.c b/arch/mips/lantiq/xway/reset.c
new file mode 100644
index 000000000000..a1be36d0e490
--- /dev/null
+++ b/arch/mips/lantiq/xway/reset.c
@@ -0,0 +1,91 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/pm.h>
+#include <linux/module.h>
+#include <asm/reboot.h>
+
+#include <lantiq_soc.h>
+
+#define ltq_rcu_w32(x, y)	ltq_w32((x), ltq_rcu_membase + (y))
+#define ltq_rcu_r32(x)		ltq_r32(ltq_rcu_membase + (x))
+
+/* register definitions */
+#define LTQ_RCU_RST		0x0010
+#define LTQ_RCU_RST_ALL		0x40000000
+
+#define LTQ_RCU_RST_STAT	0x0014
+#define LTQ_RCU_STAT_SHIFT	26
+
+static struct resource ltq_rcu_resource = {
+	.name   = "rcu",
+	.start  = LTQ_RCU_BASE_ADDR,
+	.end    = LTQ_RCU_BASE_ADDR + LTQ_RCU_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+/* remapped base addr of the reset control unit */
+static void __iomem *ltq_rcu_membase;
+
+/* This function is used by the watchdog driver */
+int ltq_reset_cause(void)
+{
+	u32 val = ltq_rcu_r32(LTQ_RCU_RST_STAT);
+	return val >> LTQ_RCU_STAT_SHIFT;
+}
+EXPORT_SYMBOL_GPL(ltq_reset_cause);
+
+static void ltq_machine_restart(char *command)
+{
+	pr_notice("System restart\n");
+	local_irq_disable();
+	ltq_rcu_w32(ltq_rcu_r32(LTQ_RCU_RST) | LTQ_RCU_RST_ALL, LTQ_RCU_RST);
+	unreachable();
+}
+
+static void ltq_machine_halt(void)
+{
+	pr_notice("System halted.\n");
+	local_irq_disable();
+	unreachable();
+}
+
+static void ltq_machine_power_off(void)
+{
+	pr_notice("Please turn off the power now.\n");
+	local_irq_disable();
+	unreachable();
+}
+
+static int __init mips_reboot_setup(void)
+{
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_rcu_resource) < 0)
+		panic("Failed to insert rcu memory\n");
+
+	if (request_mem_region(ltq_rcu_resource.start,
+			resource_size(&ltq_rcu_resource), "rcu") < 0)
+		panic("Failed to request rcu memory\n");
+
+	/* remap rcu register range */
+	ltq_rcu_membase = ioremap_nocache(ltq_rcu_resource.start,
+				resource_size(&ltq_rcu_resource));
+	if (!ltq_rcu_membase)
+		panic("Failed to remap rcu memory\n");
+
+	_machine_restart = ltq_machine_restart;
+	_machine_halt = ltq_machine_halt;
+	pm_power_off = ltq_machine_power_off;
+
+	return 0;
+}
+
+arch_initcall(mips_reboot_setup);
-- 
cgit v1.2.3


From e47d488935ed0b2dd3d59d3ba4e13956ff6849c0 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:49 +0200
Subject: MIPS: Lantiq: Add PCI controller support.

The Lantiq family of SoCs have a EBU (External Bus Unit). This patch adds
the driver that allows us to use the EBU as a PCI controller. In order for
PCI to work the EBU is set to endianess swap all the data. In addition we
need to make use of SWAP_IO_SPACE for device->host DMA to work.

The clock of the PCI works in several modes (internal/external). If this
is not configured correctly the SoC will hang.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2250/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../mips/include/asm/mach-lantiq/lantiq_platform.h |  46 ++++
 arch/mips/pci/Makefile                             |   1 +
 arch/mips/pci/ops-lantiq.c                         | 116 ++++++++
 arch/mips/pci/pci-lantiq.c                         | 297 +++++++++++++++++++++
 arch/mips/pci/pci-lantiq.h                         |  18 ++
 5 files changed, 478 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-lantiq/lantiq_platform.h
 create mode 100644 arch/mips/pci/ops-lantiq.c
 create mode 100644 arch/mips/pci/pci-lantiq.c
 create mode 100644 arch/mips/pci/pci-lantiq.h

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
new file mode 100644
index 000000000000..1f1dba6d0736
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
@@ -0,0 +1,46 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_PLATFORM_H__
+#define _LANTIQ_PLATFORM_H__
+
+#include <linux/mtd/partitions.h>
+
+/* struct used to pass info to the pci core */
+enum {
+	PCI_CLOCK_INT = 0,
+	PCI_CLOCK_EXT
+};
+
+#define PCI_EXIN0	0x0001
+#define PCI_EXIN1	0x0002
+#define PCI_EXIN2	0x0004
+#define PCI_EXIN3	0x0008
+#define PCI_EXIN4	0x0010
+#define PCI_EXIN5	0x0020
+#define PCI_EXIN_MAX	6
+
+#define PCI_GNT1	0x0040
+#define PCI_GNT2	0x0080
+#define PCI_GNT3	0x0100
+#define PCI_GNT4	0x0200
+
+#define PCI_REQ1	0x0400
+#define PCI_REQ2	0x0800
+#define PCI_REQ3	0x1000
+#define PCI_REQ4	0x2000
+#define PCI_REQ_SHIFT	10
+#define PCI_REQ_MASK	0xf
+
+struct ltq_pci_data {
+	int clock;
+	int gpio;
+	int irq[16];
+};
+
+#endif
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index f0d5329289d1..4df879937446 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_SIBYTE_SB1250)	+= fixup-sb1250.o pci-sb1250.o
 obj-$(CONFIG_SIBYTE_BCM112X)	+= fixup-sb1250.o pci-sb1250.o
 obj-$(CONFIG_SIBYTE_BCM1x80)	+= pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)		+= fixup-sni.o ops-sni.o
+obj-$(CONFIG_SOC_XWAY)		+= pci-lantiq.o ops-lantiq.o
 obj-$(CONFIG_TANBAC_TB0219)	+= fixup-tb0219.o
 obj-$(CONFIG_TANBAC_TB0226)	+= fixup-tb0226.o
 obj-$(CONFIG_TANBAC_TB0287)	+= fixup-tb0287.o
diff --git a/arch/mips/pci/ops-lantiq.c b/arch/mips/pci/ops-lantiq.c
new file mode 100644
index 000000000000..1f2afb55cc71
--- /dev/null
+++ b/arch/mips/pci/ops-lantiq.c
@@ -0,0 +1,116 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/addrspace.h>
+#include <linux/vmalloc.h>
+
+#include <lantiq_soc.h>
+
+#include "pci-lantiq.h"
+
+#define LTQ_PCI_CFG_BUSNUM_SHF 16
+#define LTQ_PCI_CFG_DEVNUM_SHF 11
+#define LTQ_PCI_CFG_FUNNUM_SHF 8
+
+#define PCI_ACCESS_READ  0
+#define PCI_ACCESS_WRITE 1
+
+static int ltq_pci_config_access(unsigned char access_type, struct pci_bus *bus,
+	unsigned int devfn, unsigned int where, u32 *data)
+{
+	unsigned long cfg_base;
+	unsigned long flags;
+	u32 temp;
+
+	/* we support slot from 0 to 15 dev_fn & 0x68 (AD29) is the
+	   SoC itself */
+	if ((bus->number != 0) || ((devfn & 0xf8) > 0x78)
+		|| ((devfn & 0xf8) == 0) || ((devfn & 0xf8) == 0x68))
+		return 1;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (bus->number << LTQ_PCI_CFG_BUSNUM_SHF) | (devfn <<
+			LTQ_PCI_CFG_FUNNUM_SHF) | (where & ~0x3);
+
+	/* Perform access */
+	if (access_type == PCI_ACCESS_WRITE) {
+		ltq_w32(swab32(*data), ((u32 *)cfg_base));
+	} else {
+		*data = ltq_r32(((u32 *)(cfg_base)));
+		*data = swab32(*data);
+	}
+	wmb();
+
+	/* clean possible Master abort */
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (0x0 << LTQ_PCI_CFG_FUNNUM_SHF) + 4;
+	temp = ltq_r32(((u32 *)(cfg_base)));
+	temp = swab32(temp);
+	cfg_base = (unsigned long) ltq_pci_mapped_cfg;
+	cfg_base |= (0x68 << LTQ_PCI_CFG_FUNNUM_SHF) + 4;
+	ltq_w32(temp, ((u32 *)cfg_base));
+
+	spin_unlock_irqrestore(&ebu_lock, flags);
+
+	if (((*data) == 0xffffffff) && (access_type == PCI_ACCESS_READ))
+		return 1;
+
+	return 0;
+}
+
+int ltq_pci_read_config_dword(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 *val)
+{
+	u32 data = 0;
+
+	if (ltq_pci_config_access(PCI_ACCESS_READ, bus, devfn, where, &data))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (size == 1)
+		*val = (data >> ((where & 3) << 3)) & 0xff;
+	else if (size == 2)
+		*val = (data >> ((where & 3) << 3)) & 0xffff;
+	else
+		*val = data;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+int ltq_pci_write_config_dword(struct pci_bus *bus, unsigned int devfn,
+	int where, int size, u32 val)
+{
+	u32 data = 0;
+
+	if (size == 4) {
+		data = val;
+	} else {
+		if (ltq_pci_config_access(PCI_ACCESS_READ, bus,
+				devfn, where, &data))
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		if (size == 1)
+			data = (data & ~(0xff << ((where & 3) << 3))) |
+				(val << ((where & 3) << 3));
+		else if (size == 2)
+			data = (data & ~(0xffff << ((where & 3) << 3))) |
+				(val << ((where & 3) << 3));
+	}
+
+	if (ltq_pci_config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	return PCIBIOS_SUCCESSFUL;
+}
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
new file mode 100644
index 000000000000..603d7493e966
--- /dev/null
+++ b/arch/mips/pci/pci-lantiq.c
@@ -0,0 +1,297 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/platform_device.h>
+
+#include <asm/pci.h>
+#include <asm/gpio.h>
+#include <asm/addrspace.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_irq.h>
+#include <lantiq_platform.h>
+
+#include "pci-lantiq.h"
+
+#define LTQ_PCI_CFG_BASE		0x17000000
+#define LTQ_PCI_CFG_SIZE		0x00008000
+#define LTQ_PCI_MEM_BASE		0x18000000
+#define LTQ_PCI_MEM_SIZE		0x02000000
+#define LTQ_PCI_IO_BASE			0x1AE00000
+#define LTQ_PCI_IO_SIZE			0x00200000
+
+#define PCI_CR_FCI_ADDR_MAP0		0x00C0
+#define PCI_CR_FCI_ADDR_MAP1		0x00C4
+#define PCI_CR_FCI_ADDR_MAP2		0x00C8
+#define PCI_CR_FCI_ADDR_MAP3		0x00CC
+#define PCI_CR_FCI_ADDR_MAP4		0x00D0
+#define PCI_CR_FCI_ADDR_MAP5		0x00D4
+#define PCI_CR_FCI_ADDR_MAP6		0x00D8
+#define PCI_CR_FCI_ADDR_MAP7		0x00DC
+#define PCI_CR_CLK_CTRL			0x0000
+#define PCI_CR_PCI_MOD			0x0030
+#define PCI_CR_PC_ARB			0x0080
+#define PCI_CR_FCI_ADDR_MAP11hg		0x00E4
+#define PCI_CR_BAR11MASK		0x0044
+#define PCI_CR_BAR12MASK		0x0048
+#define PCI_CR_BAR13MASK		0x004C
+#define PCI_CS_BASE_ADDR1		0x0010
+#define PCI_CR_PCI_ADDR_MAP11		0x0064
+#define PCI_CR_FCI_BURST_LENGTH		0x00E8
+#define PCI_CR_PCI_EOI			0x002C
+#define PCI_CS_STS_CMD			0x0004
+
+#define PCI_MASTER0_REQ_MASK_2BITS	8
+#define PCI_MASTER1_REQ_MASK_2BITS	10
+#define PCI_MASTER2_REQ_MASK_2BITS	12
+#define INTERNAL_ARB_ENABLE_BIT		0
+
+#define LTQ_CGU_IFCCR		0x0018
+#define LTQ_CGU_PCICR		0x0034
+
+#define ltq_pci_w32(x, y)	ltq_w32((x), ltq_pci_membase + (y))
+#define ltq_pci_r32(x)		ltq_r32(ltq_pci_membase + (x))
+
+#define ltq_pci_cfg_w32(x, y)	ltq_w32((x), ltq_pci_mapped_cfg + (y))
+#define ltq_pci_cfg_r32(x)	ltq_r32(ltq_pci_mapped_cfg + (x))
+
+struct ltq_pci_gpio_map {
+	int pin;
+	int alt0;
+	int alt1;
+	int dir;
+	char *name;
+};
+
+/* the pci core can make use of the following gpios */
+static struct ltq_pci_gpio_map ltq_pci_gpio_map[] = {
+	{ 0, 1, 0, 0, "pci-exin0" },
+	{ 1, 1, 0, 0, "pci-exin1" },
+	{ 2, 1, 0, 0, "pci-exin2" },
+	{ 39, 1, 0, 0, "pci-exin3" },
+	{ 10, 1, 0, 0, "pci-exin4" },
+	{ 9, 1, 0, 0, "pci-exin5" },
+	{ 30, 1, 0, 1, "pci-gnt1" },
+	{ 23, 1, 0, 1, "pci-gnt2" },
+	{ 19, 1, 0, 1, "pci-gnt3" },
+	{ 38, 1, 0, 1, "pci-gnt4" },
+	{ 29, 1, 0, 0, "pci-req1" },
+	{ 31, 1, 0, 0, "pci-req2" },
+	{ 3, 1, 0, 0, "pci-req3" },
+	{ 37, 1, 0, 0, "pci-req4" },
+};
+
+__iomem void *ltq_pci_mapped_cfg;
+static __iomem void *ltq_pci_membase;
+
+int (*ltqpci_plat_dev_init)(struct pci_dev *dev) = NULL;
+
+/* Since the PCI REQ pins can be reused for other functionality, make it
+   possible to exclude those from interpretation by the PCI controller */
+static int ltq_pci_req_mask = 0xf;
+
+static int *ltq_pci_irq_map;
+
+struct pci_ops ltq_pci_ops = {
+	.read	= ltq_pci_read_config_dword,
+	.write	= ltq_pci_write_config_dword
+};
+
+static struct resource pci_io_resource = {
+	.name	= "pci io space",
+	.start	= LTQ_PCI_IO_BASE,
+	.end	= LTQ_PCI_IO_BASE + LTQ_PCI_IO_SIZE - 1,
+	.flags	= IORESOURCE_IO
+};
+
+static struct resource pci_mem_resource = {
+	.name	= "pci memory space",
+	.start	= LTQ_PCI_MEM_BASE,
+	.end	= LTQ_PCI_MEM_BASE + LTQ_PCI_MEM_SIZE - 1,
+	.flags	= IORESOURCE_MEM
+};
+
+static struct pci_controller ltq_pci_controller = {
+	.pci_ops	= &ltq_pci_ops,
+	.mem_resource	= &pci_mem_resource,
+	.mem_offset	= 0x00000000UL,
+	.io_resource	= &pci_io_resource,
+	.io_offset	= 0x00000000UL,
+};
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+	if (ltqpci_plat_dev_init)
+		return ltqpci_plat_dev_init(dev);
+
+	return 0;
+}
+
+static u32 ltq_calc_bar11mask(void)
+{
+	u32 mem, bar11mask;
+
+	/* BAR11MASK value depends on available memory on system. */
+	mem = num_physpages * PAGE_SIZE;
+	bar11mask = (0x0ffffff0 & ~((1 << (fls(mem) - 1)) - 1)) | 8;
+
+	return bar11mask;
+}
+
+static void ltq_pci_setup_gpio(int gpio)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(ltq_pci_gpio_map); i++) {
+		if (gpio & (1 << i)) {
+			ltq_gpio_request(ltq_pci_gpio_map[i].pin,
+				ltq_pci_gpio_map[i].alt0,
+				ltq_pci_gpio_map[i].alt1,
+				ltq_pci_gpio_map[i].dir,
+				ltq_pci_gpio_map[i].name);
+		}
+	}
+	ltq_gpio_request(21, 0, 0, 1, "pci-reset");
+	ltq_pci_req_mask = (gpio >> PCI_REQ_SHIFT) & PCI_REQ_MASK;
+}
+
+static int __devinit ltq_pci_startup(struct ltq_pci_data *conf)
+{
+	u32 temp_buffer;
+
+	/* set clock to 33Mhz */
+	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR);
+	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR);
+
+	/* external or internal clock ? */
+	if (conf->clock) {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~(1 << 16),
+			LTQ_CGU_IFCCR);
+		ltq_cgu_w32((1 << 30), LTQ_CGU_PCICR);
+	} else {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | (1 << 16),
+			LTQ_CGU_IFCCR);
+		ltq_cgu_w32((1 << 31) | (1 << 30), LTQ_CGU_PCICR);
+	}
+
+	/* setup pci clock and gpis used by pci */
+	ltq_pci_setup_gpio(conf->gpio);
+
+	/* enable auto-switching between PCI and EBU */
+	ltq_pci_w32(0xa, PCI_CR_CLK_CTRL);
+
+	/* busy, i.e. configuration is not done, PCI access has to be retried */
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) & ~(1 << 24), PCI_CR_PCI_MOD);
+	wmb();
+	/* BUS Master/IO/MEM access */
+	ltq_pci_cfg_w32(ltq_pci_cfg_r32(PCI_CS_STS_CMD) | 7, PCI_CS_STS_CMD);
+
+	/* enable external 2 PCI masters */
+	temp_buffer = ltq_pci_r32(PCI_CR_PC_ARB);
+	temp_buffer &= (~(ltq_pci_req_mask << 16));
+	/* enable internal arbiter */
+	temp_buffer |= (1 << INTERNAL_ARB_ENABLE_BIT);
+	/* enable internal PCI master reqest */
+	temp_buffer &= (~(3 << PCI_MASTER0_REQ_MASK_2BITS));
+
+	/* enable EBU request */
+	temp_buffer &= (~(3 << PCI_MASTER1_REQ_MASK_2BITS));
+
+	/* enable all external masters request */
+	temp_buffer &= (~(3 << PCI_MASTER2_REQ_MASK_2BITS));
+	ltq_pci_w32(temp_buffer, PCI_CR_PC_ARB);
+	wmb();
+
+	/* setup BAR memory regions */
+	ltq_pci_w32(0x18000000, PCI_CR_FCI_ADDR_MAP0);
+	ltq_pci_w32(0x18400000, PCI_CR_FCI_ADDR_MAP1);
+	ltq_pci_w32(0x18800000, PCI_CR_FCI_ADDR_MAP2);
+	ltq_pci_w32(0x18c00000, PCI_CR_FCI_ADDR_MAP3);
+	ltq_pci_w32(0x19000000, PCI_CR_FCI_ADDR_MAP4);
+	ltq_pci_w32(0x19400000, PCI_CR_FCI_ADDR_MAP5);
+	ltq_pci_w32(0x19800000, PCI_CR_FCI_ADDR_MAP6);
+	ltq_pci_w32(0x19c00000, PCI_CR_FCI_ADDR_MAP7);
+	ltq_pci_w32(0x1ae00000, PCI_CR_FCI_ADDR_MAP11hg);
+	ltq_pci_w32(ltq_calc_bar11mask(), PCI_CR_BAR11MASK);
+	ltq_pci_w32(0, PCI_CR_PCI_ADDR_MAP11);
+	ltq_pci_w32(0, PCI_CS_BASE_ADDR1);
+	/* both TX and RX endian swap are enabled */
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_EOI) | 3, PCI_CR_PCI_EOI);
+	wmb();
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR12MASK) | 0x80000000,
+		PCI_CR_BAR12MASK);
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_BAR13MASK) | 0x80000000,
+		PCI_CR_BAR13MASK);
+	/*use 8 dw burst length */
+	ltq_pci_w32(0x303, PCI_CR_FCI_BURST_LENGTH);
+	ltq_pci_w32(ltq_pci_r32(PCI_CR_PCI_MOD) | (1 << 24), PCI_CR_PCI_MOD);
+	wmb();
+
+	/* setup irq line */
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_CON) | 0xc, LTQ_EBU_PCC_CON);
+	ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_IEN) | 0x10, LTQ_EBU_PCC_IEN);
+
+	/* toggle reset pin */
+	__gpio_set_value(21, 0);
+	wmb();
+	mdelay(1);
+	__gpio_set_value(21, 1);
+	return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	if (ltq_pci_irq_map[slot])
+		return ltq_pci_irq_map[slot];
+	printk(KERN_ERR "lq_pci: trying to map irq for unknown slot %d\n",
+		slot);
+
+	return 0;
+}
+
+static int __devinit ltq_pci_probe(struct platform_device *pdev)
+{
+	struct ltq_pci_data *ltq_pci_data =
+		(struct ltq_pci_data *) pdev->dev.platform_data;
+	pci_probe_only = 0;
+	ltq_pci_irq_map = ltq_pci_data->irq;
+	ltq_pci_membase = ioremap_nocache(PCI_CR_BASE_ADDR, PCI_CR_SIZE);
+	ltq_pci_mapped_cfg =
+		ioremap_nocache(LTQ_PCI_CFG_BASE, LTQ_PCI_CFG_BASE);
+	ltq_pci_controller.io_map_base =
+		(unsigned long)ioremap(LTQ_PCI_IO_BASE, LTQ_PCI_IO_SIZE - 1);
+	ltq_pci_startup(ltq_pci_data);
+	register_pci_controller(&ltq_pci_controller);
+
+	return 0;
+}
+
+static struct platform_driver
+ltq_pci_driver = {
+	.probe = ltq_pci_probe,
+	.driver = {
+		.name = "ltq_pci",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init pcibios_init(void)
+{
+	int ret = platform_driver_register(&ltq_pci_driver);
+	if (ret)
+		printk(KERN_INFO "ltq_pci: Error registering platfom driver!");
+	return ret;
+}
+
+arch_initcall(pcibios_init);
diff --git a/arch/mips/pci/pci-lantiq.h b/arch/mips/pci/pci-lantiq.h
new file mode 100644
index 000000000000..66bf6cd6be3c
--- /dev/null
+++ b/arch/mips/pci/pci-lantiq.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_PCI_H__
+#define _LTQ_PCI_H__
+
+extern __iomem void *ltq_pci_mapped_cfg;
+extern int ltq_pci_read_config_dword(struct pci_bus *bus,
+	unsigned int devfn, int where, int size, u32 *val);
+extern int ltq_pci_write_config_dword(struct pci_bus *bus,
+	unsigned int devfn, int where, int size, u32 val);
+
+#endif
-- 
cgit v1.2.3


From 24aff71fa8df0d6a73dab17f3f2285a24b8f658f Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:53 +0200
Subject: MIPS: Lantiq: Add platform device support

This patch adds the wrappers for registering our platform devices.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2254/
Patchwork: https://patchwork.linux-mips.org/patch/2360/
Patchwork: https://patchwork.linux-mips.org/patch/2359/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lantiq/Makefile       |   2 +-
 arch/mips/lantiq/devices.c      | 122 ++++++++++++++++++++++++++++++++++++++++
 arch/mips/lantiq/devices.h      |  23 ++++++++
 arch/mips/lantiq/xway/Makefile  |   2 +-
 arch/mips/lantiq/xway/devices.c |  98 ++++++++++++++++++++++++++++++++
 arch/mips/lantiq/xway/devices.h |  18 ++++++
 6 files changed, 263 insertions(+), 2 deletions(-)
 create mode 100644 arch/mips/lantiq/devices.c
 create mode 100644 arch/mips/lantiq/devices.h
 create mode 100644 arch/mips/lantiq/xway/devices.c
 create mode 100644 arch/mips/lantiq/xway/devices.h

(limited to 'arch')

diff --git a/arch/mips/lantiq/Makefile b/arch/mips/lantiq/Makefile
index a268391eb45e..e5dae0e24b00 100644
--- a/arch/mips/lantiq/Makefile
+++ b/arch/mips/lantiq/Makefile
@@ -4,7 +4,7 @@
 # under the terms of the GNU General Public License version 2 as published
 # by the Free Software Foundation.
 
-obj-y := irq.o setup.o clk.o prom.o
+obj-y := irq.o setup.o clk.o prom.o devices.o
 
 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
 
diff --git a/arch/mips/lantiq/devices.c b/arch/mips/lantiq/devices.c
new file mode 100644
index 000000000000..7b82c34cb169
--- /dev/null
+++ b/arch/mips/lantiq/devices.c
@@ -0,0 +1,122 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/etherdevice.h>
+#include <linux/reboot.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+
+#include <lantiq_soc.h>
+
+#include "devices.h"
+
+/* nor flash */
+static struct resource ltq_nor_resource = {
+	.name	= "nor",
+	.start	= LTQ_FLASH_START,
+	.end	= LTQ_FLASH_START + LTQ_FLASH_MAX - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+static struct platform_device ltq_nor = {
+	.name		= "ltq_nor",
+	.resource	= &ltq_nor_resource,
+	.num_resources	= 1,
+};
+
+void __init ltq_register_nor(struct physmap_flash_data *data)
+{
+	ltq_nor.dev.platform_data = data;
+	platform_device_register(&ltq_nor);
+}
+
+/* watchdog */
+static struct resource ltq_wdt_resource = {
+	.name	= "watchdog",
+	.start  = LTQ_WDT_BASE_ADDR,
+	.end    = LTQ_WDT_BASE_ADDR + LTQ_WDT_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+void __init ltq_register_wdt(void)
+{
+	platform_device_register_simple("ltq_wdt", 0, &ltq_wdt_resource, 1);
+}
+
+/* asc ports */
+static struct resource ltq_asc0_resources[] = {
+	{
+		.name	= "asc0",
+		.start  = LTQ_ASC0_BASE_ADDR,
+		.end    = LTQ_ASC0_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_TIR(0)),
+	IRQ_RES(rx, LTQ_ASC_RIR(0)),
+	IRQ_RES(err, LTQ_ASC_EIR(0)),
+};
+
+static struct resource ltq_asc1_resources[] = {
+	{
+		.name	= "asc1",
+		.start  = LTQ_ASC1_BASE_ADDR,
+		.end    = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_TIR(1)),
+	IRQ_RES(rx, LTQ_ASC_RIR(1)),
+	IRQ_RES(err, LTQ_ASC_EIR(1)),
+};
+
+void __init ltq_register_asc(int port)
+{
+	switch (port) {
+	case 0:
+		platform_device_register_simple("ltq_asc", 0,
+			ltq_asc0_resources, ARRAY_SIZE(ltq_asc0_resources));
+		break;
+	case 1:
+		platform_device_register_simple("ltq_asc", 1,
+			ltq_asc1_resources, ARRAY_SIZE(ltq_asc1_resources));
+		break;
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_PCI
+/* pci */
+static struct platform_device ltq_pci = {
+	.name		= "ltq_pci",
+	.num_resources	= 0,
+};
+
+void __init ltq_register_pci(struct ltq_pci_data *data)
+{
+	ltq_pci.dev.platform_data = data;
+	platform_device_register(&ltq_pci);
+}
+#else
+void __init ltq_register_pci(struct ltq_pci_data *data)
+{
+	pr_err("kernel is compiled without PCI support\n");
+}
+#endif
diff --git a/arch/mips/lantiq/devices.h b/arch/mips/lantiq/devices.h
new file mode 100644
index 000000000000..2947bb19a528
--- /dev/null
+++ b/arch/mips/lantiq/devices.h
@@ -0,0 +1,23 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_DEVICES_H__
+#define _LTQ_DEVICES_H__
+
+#include <lantiq_platform.h>
+#include <linux/mtd/physmap.h>
+
+#define IRQ_RES(resname, irq) \
+	{.name = #resname, .start = (irq), .flags = IORESOURCE_IRQ}
+
+extern void ltq_register_nor(struct physmap_flash_data *data);
+extern void ltq_register_wdt(void);
+extern void ltq_register_asc(int port);
+extern void ltq_register_pci(struct ltq_pci_data *data);
+
+#endif
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
index 9c85ff9184c6..74ce438ad8e7 100644
--- a/arch/mips/lantiq/xway/Makefile
+++ b/arch/mips/lantiq/xway/Makefile
@@ -1,4 +1,4 @@
-obj-y := pmu.o ebu.o reset.o gpio.o
+obj-y := pmu.o ebu.o reset.o gpio.o devices.o
 
 obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o
 obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o
diff --git a/arch/mips/lantiq/xway/devices.c b/arch/mips/lantiq/xway/devices.c
new file mode 100644
index 000000000000..a71b3b532a01
--- /dev/null
+++ b/arch/mips/lantiq/xway/devices.c
@@ -0,0 +1,98 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/mtd/physmap.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/etherdevice.h>
+#include <linux/reboot.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+
+#include <lantiq_soc.h>
+#include <lantiq_irq.h>
+#include <lantiq_platform.h>
+
+#include "devices.h"
+
+/* gpio */
+static struct resource ltq_gpio_resource[] = {
+	{
+		.name	= "gpio0",
+		.start  = LTQ_GPIO0_BASE_ADDR,
+		.end    = LTQ_GPIO0_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name	= "gpio1",
+		.start  = LTQ_GPIO1_BASE_ADDR,
+		.end    = LTQ_GPIO1_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}, {
+		.name	= "gpio2",
+		.start  = LTQ_GPIO2_BASE_ADDR,
+		.end    = LTQ_GPIO2_BASE_ADDR + LTQ_GPIO_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	}
+};
+
+void __init ltq_register_gpio(void)
+{
+	platform_device_register_simple("ltq_gpio", 0,
+		&ltq_gpio_resource[0], 1);
+	platform_device_register_simple("ltq_gpio", 1,
+		&ltq_gpio_resource[1], 1);
+
+	/* AR9 and VR9 have an extra gpio block */
+	if (ltq_is_ar9() || ltq_is_vr9()) {
+		platform_device_register_simple("ltq_gpio", 2,
+			&ltq_gpio_resource[2], 1);
+	}
+}
+
+/* serial to parallel conversion */
+static struct resource ltq_stp_resource = {
+	.name   = "stp",
+	.start  = LTQ_STP_BASE_ADDR,
+	.end    = LTQ_STP_BASE_ADDR + LTQ_STP_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+void __init ltq_register_gpio_stp(void)
+{
+	platform_device_register_simple("ltq_stp", 0, &ltq_stp_resource, 1);
+}
+
+/* asc ports - amazon se has its own serial mapping */
+static struct resource ltq_ase_asc_resources[] = {
+	{
+		.name	= "asc0",
+		.start  = LTQ_ASC1_BASE_ADDR,
+		.end    = LTQ_ASC1_BASE_ADDR + LTQ_ASC_SIZE - 1,
+		.flags  = IORESOURCE_MEM,
+	},
+	IRQ_RES(tx, LTQ_ASC_ASE_TIR),
+	IRQ_RES(rx, LTQ_ASC_ASE_RIR),
+	IRQ_RES(err, LTQ_ASC_ASE_EIR),
+};
+
+void __init ltq_register_ase_asc(void)
+{
+	platform_device_register_simple("ltq_asc", 0,
+		ltq_ase_asc_resources, ARRAY_SIZE(ltq_ase_asc_resources));
+}
diff --git a/arch/mips/lantiq/xway/devices.h b/arch/mips/lantiq/xway/devices.h
new file mode 100644
index 000000000000..51f56b5a9fbd
--- /dev/null
+++ b/arch/mips/lantiq/xway/devices.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LTQ_DEVICES_XWAY_H__
+#define _LTQ_DEVICES_XWAY_H__
+
+#include "../devices.h"
+
+extern void ltq_register_gpio(void);
+extern void ltq_register_gpio_stp(void);
+extern void ltq_register_ase_asc(void);
+
+#endif
-- 
cgit v1.2.3


From a053ac17024561f3a2fd02424b5f92823282b5ad Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:54 +0200
Subject: MIPS: Lantiq: Add mips_machine support

This patch adds support for Gabor's mips_machine patch.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: Gabor Juhos <juhosg@openwrt.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2251/
Patchwork: https://patchwork.linux-mips.org/patch/2358/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/Kconfig                  |  1 +
 arch/mips/lantiq/machtypes.h       | 18 ++++++++++++++++++
 arch/mips/lantiq/prom.h            |  1 +
 arch/mips/lantiq/setup.c           | 25 +++++++++++++++++++++++++
 arch/mips/lantiq/xway/Makefile     |  4 ++--
 arch/mips/lantiq/xway/setup-ase.c  | 19 +++++++++++++++++++
 arch/mips/lantiq/xway/setup-xway.c | 20 ++++++++++++++++++++
 7 files changed, 86 insertions(+), 2 deletions(-)
 create mode 100644 arch/mips/lantiq/machtypes.h
 create mode 100644 arch/mips/lantiq/xway/setup-ase.c
 create mode 100644 arch/mips/lantiq/xway/setup-xway.c

(limited to 'arch')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b3b49996462e..2d1cf9740953 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -228,6 +228,7 @@ config LANTIQ
 	select SWAP_IO_SPACE
 	select BOOT_RAW
 	select HAVE_CLK
+	select MIPS_MACHINE
 
 config LASAT
 	bool "LASAT Networks platforms"
diff --git a/arch/mips/lantiq/machtypes.h b/arch/mips/lantiq/machtypes.h
new file mode 100644
index 000000000000..ffcacfc0e5ed
--- /dev/null
+++ b/arch/mips/lantiq/machtypes.h
@@ -0,0 +1,18 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _LANTIQ_MACH_H__
+#define _LANTIQ_MACH_H__
+
+#include <asm/mips_machine.h>
+
+enum lantiq_mach_type {
+	LTQ_MACH_GENERIC = 0,
+};
+
+#endif
diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h
index 4165ad156782..b4229d94280f 100644
--- a/arch/mips/lantiq/prom.h
+++ b/arch/mips/lantiq/prom.h
@@ -20,5 +20,6 @@ struct ltq_soc_info {
 };
 
 extern void ltq_soc_detect(struct ltq_soc_info *i);
+extern void ltq_soc_setup(void);
 
 #endif
diff --git a/arch/mips/lantiq/setup.c b/arch/mips/lantiq/setup.c
index 79a2b0c5cc65..9b8af77ed0f9 100644
--- a/arch/mips/lantiq/setup.c
+++ b/arch/mips/lantiq/setup.c
@@ -14,6 +14,10 @@
 
 #include <lantiq_soc.h>
 
+#include "machtypes.h"
+#include "devices.h"
+#include "prom.h"
+
 void __init plat_mem_setup(void)
 {
 	/* assume 16M as default incase uboot fails to pass proper ramsize */
@@ -39,3 +43,24 @@ void __init plat_mem_setup(void)
 	memsize *= 1024 * 1024;
 	add_memory_region(0x00000000, memsize, BOOT_MEM_RAM);
 }
+
+static int __init
+lantiq_setup(void)
+{
+	ltq_soc_setup();
+	mips_machine_setup();
+	return 0;
+}
+
+arch_initcall(lantiq_setup);
+
+static void __init
+lantiq_generic_init(void)
+{
+	/* Nothing to do */
+}
+
+MIPS_MACHINE(LTQ_MACH_GENERIC,
+	     "Generic",
+	     "Generic Lantiq based board",
+	     lantiq_generic_init);
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
index 74ce438ad8e7..8c06a97b5ca6 100644
--- a/arch/mips/lantiq/xway/Makefile
+++ b/arch/mips/lantiq/xway/Makefile
@@ -1,4 +1,4 @@
 obj-y := pmu.o ebu.o reset.o gpio.o devices.o
 
-obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o
-obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o
+obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o
+obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o
diff --git a/arch/mips/lantiq/xway/setup-ase.c b/arch/mips/lantiq/xway/setup-ase.c
new file mode 100644
index 000000000000..f6f326798a39
--- /dev/null
+++ b/arch/mips/lantiq/xway/setup-ase.c
@@ -0,0 +1,19 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+#include "devices.h"
+
+void __init ltq_soc_setup(void)
+{
+	ltq_register_ase_asc();
+	ltq_register_gpio();
+	ltq_register_wdt();
+}
diff --git a/arch/mips/lantiq/xway/setup-xway.c b/arch/mips/lantiq/xway/setup-xway.c
new file mode 100644
index 000000000000..c292f643a858
--- /dev/null
+++ b/arch/mips/lantiq/xway/setup-xway.c
@@ -0,0 +1,20 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <lantiq_soc.h>
+
+#include "../prom.h"
+#include "devices.h"
+
+void __init ltq_soc_setup(void)
+{
+	ltq_register_asc(0);
+	ltq_register_asc(1);
+	ltq_register_gpio();
+	ltq_register_wdt();
+}
-- 
cgit v1.2.3


From 973c32eb7f2d5c45d0e68b0083ead9ee763d9a6f Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:55 +0200
Subject: MIPS: Lantiq: Add machtypes for lantiq eval kits

This patch adds mach specific code for the Lantiq EASY50712/50601 evaluation
boards

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2255/
Patchwork: https://patchwork.linux-mips.org/patch/2361/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lantiq/Kconfig               |  2 +
 arch/mips/lantiq/machtypes.h           |  2 +
 arch/mips/lantiq/xway/Kconfig          | 23 ++++++++++++
 arch/mips/lantiq/xway/Makefile         |  3 ++
 arch/mips/lantiq/xway/mach-easy50601.c | 57 ++++++++++++++++++++++++++++
 arch/mips/lantiq/xway/mach-easy50712.c | 68 ++++++++++++++++++++++++++++++++++
 6 files changed, 155 insertions(+)
 create mode 100644 arch/mips/lantiq/xway/Kconfig
 create mode 100644 arch/mips/lantiq/xway/mach-easy50601.c
 create mode 100644 arch/mips/lantiq/xway/mach-easy50712.c

(limited to 'arch')

diff --git a/arch/mips/lantiq/Kconfig b/arch/mips/lantiq/Kconfig
index 2780461e3258..3fccf2104513 100644
--- a/arch/mips/lantiq/Kconfig
+++ b/arch/mips/lantiq/Kconfig
@@ -18,4 +18,6 @@ config SOC_XWAY
 	select HW_HAS_PCI
 endchoice
 
+source "arch/mips/lantiq/xway/Kconfig"
+
 endif
diff --git a/arch/mips/lantiq/machtypes.h b/arch/mips/lantiq/machtypes.h
index ffcacfc0e5ed..7e01b8c484eb 100644
--- a/arch/mips/lantiq/machtypes.h
+++ b/arch/mips/lantiq/machtypes.h
@@ -13,6 +13,8 @@
 
 enum lantiq_mach_type {
 	LTQ_MACH_GENERIC = 0,
+	LTQ_MACH_EASY50712,	/* Danube evaluation board */
+	LTQ_MACH_EASY50601,	/* Amazon SE evaluation board */
 };
 
 #endif
diff --git a/arch/mips/lantiq/xway/Kconfig b/arch/mips/lantiq/xway/Kconfig
new file mode 100644
index 000000000000..2b857de36620
--- /dev/null
+++ b/arch/mips/lantiq/xway/Kconfig
@@ -0,0 +1,23 @@
+if SOC_XWAY
+
+menu "MIPS Machine"
+
+config LANTIQ_MACH_EASY50712
+	bool "Easy50712 - Danube"
+	default y
+
+endmenu
+
+endif
+
+if SOC_AMAZON_SE
+
+menu "MIPS Machine"
+
+config LANTIQ_MACH_EASY50601
+	bool "Easy50601 - Amazon SE"
+	default y
+
+endmenu
+
+endif
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
index 8c06a97b5ca6..b1d3640c3531 100644
--- a/arch/mips/lantiq/xway/Makefile
+++ b/arch/mips/lantiq/xway/Makefile
@@ -2,3 +2,6 @@ obj-y := pmu.o ebu.o reset.o gpio.o devices.o
 
 obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o
 obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o
+
+obj-$(CONFIG_LANTIQ_MACH_EASY50712) += mach-easy50712.o
+obj-$(CONFIG_LANTIQ_MACH_EASY50601) += mach-easy50601.o
diff --git a/arch/mips/lantiq/xway/mach-easy50601.c b/arch/mips/lantiq/xway/mach-easy50601.c
new file mode 100644
index 000000000000..d5aaf637ab19
--- /dev/null
+++ b/arch/mips/lantiq/xway/mach-easy50601.c
@@ -0,0 +1,57 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/input.h>
+
+#include <lantiq.h>
+
+#include "../machtypes.h"
+#include "devices.h"
+
+static struct mtd_partition easy50601_partitions[] = {
+	{
+		.name	= "uboot",
+		.offset	= 0x0,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "uboot_env",
+		.offset	= 0x10000,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "linux",
+		.offset	= 0x20000,
+		.size	= 0xE0000,
+	},
+	{
+		.name	= "rootfs",
+		.offset	= 0x100000,
+		.size	= 0x300000,
+	},
+};
+
+static struct physmap_flash_data easy50601_flash_data = {
+	.nr_parts	= ARRAY_SIZE(easy50601_partitions),
+	.parts		= easy50601_partitions,
+};
+
+static void __init easy50601_init(void)
+{
+	ltq_register_nor(&easy50601_flash_data);
+}
+
+MIPS_MACHINE(LTQ_MACH_EASY50601,
+			"EASY50601",
+			"EASY50601 Eval Board",
+			easy50601_init);
diff --git a/arch/mips/lantiq/xway/mach-easy50712.c b/arch/mips/lantiq/xway/mach-easy50712.c
new file mode 100644
index 000000000000..e5e7e09b7c14
--- /dev/null
+++ b/arch/mips/lantiq/xway/mach-easy50712.c
@@ -0,0 +1,68 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+#include <linux/input.h>
+
+#include <lantiq_soc.h>
+#include <irq.h>
+
+#include "../machtypes.h"
+#include "devices.h"
+
+static struct mtd_partition easy50712_partitions[] = {
+	{
+		.name	= "uboot",
+		.offset	= 0x0,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "uboot_env",
+		.offset	= 0x10000,
+		.size	= 0x10000,
+	},
+	{
+		.name	= "linux",
+		.offset	= 0x20000,
+		.size	= 0xe0000,
+	},
+	{
+		.name	= "rootfs",
+		.offset	= 0x100000,
+		.size	= 0x300000,
+	},
+};
+
+static struct physmap_flash_data easy50712_flash_data = {
+	.nr_parts	= ARRAY_SIZE(easy50712_partitions),
+	.parts		= easy50712_partitions,
+};
+
+static struct ltq_pci_data ltq_pci_data = {
+	.clock	= PCI_CLOCK_INT,
+	.gpio	= PCI_GNT1 | PCI_REQ1,
+	.irq	= {
+		[14] = INT_NUM_IM0_IRL0 + 22,
+	},
+};
+
+static void __init easy50712_init(void)
+{
+	ltq_register_gpio_stp();
+	ltq_register_nor(&easy50712_flash_data);
+	ltq_register_pci(&ltq_pci_data);
+}
+
+MIPS_MACHINE(LTQ_MACH_EASY50712,
+	     "EASY50712",
+	     "EASY50712 Eval Board",
+	      easy50712_init);
-- 
cgit v1.2.3


From 935c500c377d8e414bbe08e0e169f6c85d2a4273 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 30 Mar 2011 09:27:56 +0200
Subject: MIPS: Lantiq: Add more gpio drivers

The XWAY family allows to extend the number of gpios by using shift
registers or latches. This patch adds the 2 drivers needed for this. The
extended gpios are output only.

[ralf@linux-mips.org: Fixed ltq_stp_probe section() attributes.]

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2258/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lantiq/xway/Makefile   |   2 +-
 arch/mips/lantiq/xway/gpio_ebu.c | 126 +++++++++++++++++++++++++++++++
 arch/mips/lantiq/xway/gpio_stp.c | 157 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 284 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/lantiq/xway/gpio_ebu.c
 create mode 100644 arch/mips/lantiq/xway/gpio_stp.c

(limited to 'arch')

diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
index b1d3640c3531..6b5e07e03387 100644
--- a/arch/mips/lantiq/xway/Makefile
+++ b/arch/mips/lantiq/xway/Makefile
@@ -1,4 +1,4 @@
-obj-y := pmu.o ebu.o reset.o gpio.o devices.o
+obj-y := pmu.o ebu.o reset.o gpio.o gpio_stp.o gpio_ebu.o devices.o
 
 obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o
 obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o
diff --git a/arch/mips/lantiq/xway/gpio_ebu.c b/arch/mips/lantiq/xway/gpio_ebu.c
new file mode 100644
index 000000000000..a479355abdb9
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio_ebu.c
@@ -0,0 +1,126 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2010 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+
+#include <lantiq_soc.h>
+
+/*
+ * By attaching hardware latches to the EBU it is possible to create output
+ * only gpios. This driver configures a special memory address, which when
+ * written to outputs 16 bit to the latches.
+ */
+
+#define LTQ_EBU_BUSCON	0x1e7ff		/* 16 bit access, slowest timing */
+#define LTQ_EBU_WP	0x80000000	/* write protect bit */
+
+/* we keep a shadow value of the last value written to the ebu */
+static int ltq_ebu_gpio_shadow = 0x0;
+static void __iomem *ltq_ebu_gpio_membase;
+
+static void ltq_ebu_apply(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ebu_lock, flags);
+	ltq_ebu_w32(LTQ_EBU_BUSCON, LTQ_EBU_BUSCON1);
+	*((__u16 *)ltq_ebu_gpio_membase) = ltq_ebu_gpio_shadow;
+	ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1);
+	spin_unlock_irqrestore(&ebu_lock, flags);
+}
+
+static void ltq_ebu_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (value)
+		ltq_ebu_gpio_shadow |= (1 << offset);
+	else
+		ltq_ebu_gpio_shadow &= ~(1 << offset);
+	ltq_ebu_apply();
+}
+
+static int ltq_ebu_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	ltq_ebu_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct gpio_chip ltq_ebu_chip = {
+	.label = "ltq_ebu",
+	.direction_output = ltq_ebu_direction_output,
+	.set = ltq_ebu_set,
+	.base = 72,
+	.ngpio = 16,
+	.can_sleep = 1,
+	.owner = THIS_MODULE,
+};
+
+static int ltq_ebu_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory resource\n");
+		return -ENOENT;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request memory resource\n");
+		return -EBUSY;
+	}
+
+	ltq_ebu_gpio_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_ebu_gpio_membase) {
+		dev_err(&pdev->dev, "Failed to ioremap mem region\n");
+		return -ENOMEM;
+	}
+
+	/* grab the default shadow value passed form the platform code */
+	ltq_ebu_gpio_shadow = (unsigned int) pdev->dev.platform_data;
+
+	/* tell the ebu controller which memory address we will be using */
+	ltq_ebu_w32(pdev->resource->start | 0x1, LTQ_EBU_ADDRSEL1);
+
+	/* write protect the region */
+	ltq_ebu_w32(LTQ_EBU_BUSCON | LTQ_EBU_WP, LTQ_EBU_BUSCON1);
+
+	ret = gpiochip_add(&ltq_ebu_chip);
+	if (!ret)
+		ltq_ebu_apply();
+	return ret;
+}
+
+static struct platform_driver ltq_ebu_driver = {
+	.probe = ltq_ebu_probe,
+	.driver = {
+		.name = "ltq_ebu",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init ltq_ebu_init(void)
+{
+	int ret = platform_driver_register(&ltq_ebu_driver);
+
+	if (ret)
+		pr_info("ltq_ebu : Error registering platfom driver!");
+	return ret;
+}
+
+postcore_initcall(ltq_ebu_init);
diff --git a/arch/mips/lantiq/xway/gpio_stp.c b/arch/mips/lantiq/xway/gpio_stp.c
new file mode 100644
index 000000000000..67d59d690340
--- /dev/null
+++ b/arch/mips/lantiq/xway/gpio_stp.c
@@ -0,0 +1,157 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2007 John Crispin <blogic@openwrt.org>
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+
+#include <lantiq_soc.h>
+
+#define LTQ_STP_CON0		0x00
+#define LTQ_STP_CON1		0x04
+#define LTQ_STP_CPU0		0x08
+#define LTQ_STP_CPU1		0x0C
+#define LTQ_STP_AR		0x10
+
+#define LTQ_STP_CON_SWU		(1 << 31)
+#define LTQ_STP_2HZ		0
+#define LTQ_STP_4HZ		(1 << 23)
+#define LTQ_STP_8HZ		(2 << 23)
+#define LTQ_STP_10HZ		(3 << 23)
+#define LTQ_STP_SPEED_MASK	(0xf << 23)
+#define LTQ_STP_UPD_FPI		(1 << 31)
+#define LTQ_STP_UPD_MASK	(3 << 30)
+#define LTQ_STP_ADSL_SRC	(3 << 24)
+
+#define LTQ_STP_GROUP0		(1 << 0)
+
+#define LTQ_STP_RISING		0
+#define LTQ_STP_FALLING		(1 << 26)
+#define LTQ_STP_EDGE_MASK	(1 << 26)
+
+#define ltq_stp_r32(reg)	__raw_readl(ltq_stp_membase + reg)
+#define ltq_stp_w32(val, reg)	__raw_writel(val, ltq_stp_membase + reg)
+#define ltq_stp_w32_mask(clear, set, reg) \
+		ltq_w32((ltq_r32(ltq_stp_membase + reg) & ~(clear)) | (set), \
+		ltq_stp_membase + (reg))
+
+static int ltq_stp_shadow = 0xffff;
+static void __iomem *ltq_stp_membase;
+
+static void ltq_stp_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (value)
+		ltq_stp_shadow |= (1 << offset);
+	else
+		ltq_stp_shadow &= ~(1 << offset);
+	ltq_stp_w32(ltq_stp_shadow, LTQ_STP_CPU0);
+}
+
+static int ltq_stp_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	ltq_stp_set(chip, offset, value);
+
+	return 0;
+}
+
+static struct gpio_chip ltq_stp_chip = {
+	.label = "ltq_stp",
+	.direction_output = ltq_stp_direction_output,
+	.set = ltq_stp_set,
+	.base = 48,
+	.ngpio = 24,
+	.can_sleep = 1,
+	.owner = THIS_MODULE,
+};
+
+static int ltq_stp_hw_init(void)
+{
+	/* the 3 pins used to control the external stp */
+	ltq_gpio_request(4, 1, 0, 1, "stp-st");
+	ltq_gpio_request(5, 1, 0, 1, "stp-d");
+	ltq_gpio_request(6, 1, 0, 1, "stp-sh");
+
+	/* sane defaults */
+	ltq_stp_w32(0, LTQ_STP_AR);
+	ltq_stp_w32(0, LTQ_STP_CPU0);
+	ltq_stp_w32(0, LTQ_STP_CPU1);
+	ltq_stp_w32(LTQ_STP_CON_SWU, LTQ_STP_CON0);
+	ltq_stp_w32(0, LTQ_STP_CON1);
+
+	/* rising or falling edge */
+	ltq_stp_w32_mask(LTQ_STP_EDGE_MASK, LTQ_STP_FALLING, LTQ_STP_CON0);
+
+	/* per default stp 15-0 are set */
+	ltq_stp_w32_mask(0, LTQ_STP_GROUP0, LTQ_STP_CON1);
+
+	/* stp are update periodically by the FPI bus */
+	ltq_stp_w32_mask(LTQ_STP_UPD_MASK, LTQ_STP_UPD_FPI, LTQ_STP_CON1);
+
+	/* set stp update speed */
+	ltq_stp_w32_mask(LTQ_STP_SPEED_MASK, LTQ_STP_8HZ, LTQ_STP_CON1);
+
+	/* tell the hardware that pin (led) 0 and 1 are controlled
+	 *  by the dsl arc
+	 */
+	ltq_stp_w32_mask(0, LTQ_STP_ADSL_SRC, LTQ_STP_CON0);
+
+	ltq_pmu_enable(PMU_LED);
+	return 0;
+}
+
+static int __devinit ltq_stp_probe(struct platform_device *pdev)
+{
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int ret = 0;
+
+	if (!res)
+		return -ENOENT;
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request STP memory\n");
+		return -EBUSY;
+	}
+	ltq_stp_membase = devm_ioremap_nocache(&pdev->dev, res->start,
+		resource_size(res));
+	if (!ltq_stp_membase) {
+		dev_err(&pdev->dev, "failed to remap STP memory\n");
+		return -ENOMEM;
+	}
+	ret = gpiochip_add(&ltq_stp_chip);
+	if (!ret)
+		ret = ltq_stp_hw_init();
+
+	return ret;
+}
+
+static struct platform_driver ltq_stp_driver = {
+	.probe = ltq_stp_probe,
+	.driver = {
+		.name = "ltq_stp",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init ltq_stp_init(void)
+{
+	int ret = platform_driver_register(&ltq_stp_driver);
+
+	if (ret)
+		pr_info("ltq_stp: error registering platfom driver");
+	return ret;
+}
+
+postcore_initcall(ltq_stp_init);
-- 
cgit v1.2.3


From dfec1a827d2bdc35d0990afd100f79a685ec0985 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Fri, 6 May 2011 00:10:00 +0200
Subject: MIPS: Lantiq: Add DMA support

This patch adds support for the DMA engine found inside the XWAY family of
SoCs. The engine has 5 ports and 20 channels.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2355/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../mips/include/asm/mach-lantiq/xway/lantiq_soc.h |   3 +-
 arch/mips/include/asm/mach-lantiq/xway/xway_dma.h  |  60 +++++
 arch/mips/lantiq/xway/Makefile                     |   2 +-
 arch/mips/lantiq/xway/devices.h                    |   1 +
 arch/mips/lantiq/xway/dma.c                        | 253 +++++++++++++++++++++
 5 files changed, 317 insertions(+), 2 deletions(-)
 create mode 100644 arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
 create mode 100644 arch/mips/lantiq/xway/dma.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
index 343e82cbf601..4827afbe3739 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
@@ -86,7 +86,8 @@
 #define LTQ_PPE32_SIZE		0x40000
 
 /* DMA */
-#define LTQ_DMA_BASE_ADDR	0xBE104100
+#define LTQ_DMA_BASE_ADDR	0x1E104100
+#define LTQ_DMA_SIZE		0x800
 
 /* PCI */
 #define PCI_CR_BASE_ADDR	0x1E105400
diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
new file mode 100644
index 000000000000..872943a4b90e
--- /dev/null
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
@@ -0,0 +1,60 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef LTQ_DMA_H__
+#define LTQ_DMA_H__
+
+#define LTQ_DESC_SIZE		0x08	/* each descriptor is 64bit */
+#define LTQ_DESC_NUM		0x40	/* 64 descriptors / channel */
+
+#define LTQ_DMA_OWN		BIT(31)	/* owner bit */
+#define LTQ_DMA_C		BIT(30) /* complete bit */
+#define LTQ_DMA_SOP		BIT(29) /* start of packet */
+#define LTQ_DMA_EOP		BIT(28) /* end of packet */
+#define LTQ_DMA_TX_OFFSET(x)	((x & 0x1f) << 23) /* data bytes offset */
+#define LTQ_DMA_RX_OFFSET(x)	((x & 0x7) << 23) /* data bytes offset */
+#define LTQ_DMA_SIZE_MASK	(0xffff) /* the size field is 16 bit */
+
+struct ltq_dma_desc {
+	u32 ctl;
+	u32 addr;
+};
+
+struct ltq_dma_channel {
+	int nr;				/* the channel number */
+	int irq;			/* the mapped irq */
+	int desc;			/* the current descriptor */
+	struct ltq_dma_desc *desc_base;	/* the descriptor base */
+	int phys;			/* physical addr */
+};
+
+enum {
+	DMA_PORT_ETOP = 0,
+	DMA_PORT_DEU,
+};
+
+extern void ltq_dma_enable_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_disable_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_ack_irq(struct ltq_dma_channel *ch);
+extern void ltq_dma_open(struct ltq_dma_channel *ch);
+extern void ltq_dma_close(struct ltq_dma_channel *ch);
+extern void ltq_dma_alloc_tx(struct ltq_dma_channel *ch);
+extern void ltq_dma_alloc_rx(struct ltq_dma_channel *ch);
+extern void ltq_dma_free(struct ltq_dma_channel *ch);
+extern void ltq_dma_init_port(int p);
+
+#endif
diff --git a/arch/mips/lantiq/xway/Makefile b/arch/mips/lantiq/xway/Makefile
index 6b5e07e03387..c517f2e77563 100644
--- a/arch/mips/lantiq/xway/Makefile
+++ b/arch/mips/lantiq/xway/Makefile
@@ -1,4 +1,4 @@
-obj-y := pmu.o ebu.o reset.o gpio.o gpio_stp.o gpio_ebu.o devices.o
+obj-y := pmu.o ebu.o reset.o gpio.o gpio_stp.o gpio_ebu.o devices.o dma.o
 
 obj-$(CONFIG_SOC_XWAY) += clk-xway.o prom-xway.o setup-xway.o
 obj-$(CONFIG_SOC_AMAZON_SE) += clk-ase.o prom-ase.o setup-ase.o
diff --git a/arch/mips/lantiq/xway/devices.h b/arch/mips/lantiq/xway/devices.h
index 51f56b5a9fbd..d57308423408 100644
--- a/arch/mips/lantiq/xway/devices.h
+++ b/arch/mips/lantiq/xway/devices.h
@@ -10,6 +10,7 @@
 #define _LTQ_DEVICES_XWAY_H__
 
 #include "../devices.h"
+#include <linux/phy.h>
 
 extern void ltq_register_gpio(void);
 extern void ltq_register_gpio_stp(void);
diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
new file mode 100644
index 000000000000..4278a459d6c4
--- /dev/null
+++ b/arch/mips/lantiq/xway/dma.c
@@ -0,0 +1,253 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+
+#include <lantiq_soc.h>
+#include <xway_dma.h>
+
+#define LTQ_DMA_CTRL		0x10
+#define LTQ_DMA_CPOLL		0x14
+#define LTQ_DMA_CS		0x18
+#define LTQ_DMA_CCTRL		0x1C
+#define LTQ_DMA_CDBA		0x20
+#define LTQ_DMA_CDLEN		0x24
+#define LTQ_DMA_CIS		0x28
+#define LTQ_DMA_CIE		0x2C
+#define LTQ_DMA_PS		0x40
+#define LTQ_DMA_PCTRL		0x44
+#define LTQ_DMA_IRNEN		0xf4
+
+#define DMA_DESCPT		BIT(3)		/* descriptor complete irq */
+#define DMA_TX			BIT(8)		/* TX channel direction */
+#define DMA_CHAN_ON		BIT(0)		/* channel on / off bit */
+#define DMA_PDEN		BIT(6)		/* enable packet drop */
+#define DMA_CHAN_RST		BIT(1)		/* channel on / off bit */
+#define DMA_RESET		BIT(0)		/* channel on / off bit */
+#define DMA_IRQ_ACK		0x7e		/* IRQ status register */
+#define DMA_POLL		BIT(31)		/* turn on channel polling */
+#define DMA_CLK_DIV4		BIT(6)		/* polling clock divider */
+#define DMA_2W_BURST		BIT(1)		/* 2 word burst length */
+#define DMA_MAX_CHANNEL		20		/* the soc has 20 channels */
+#define DMA_ETOP_ENDIANESS	(0xf << 8) /* endianess swap etop channels */
+#define DMA_WEIGHT	(BIT(17) | BIT(16))	/* default channel wheight */
+
+#define ltq_dma_r32(x)			ltq_r32(ltq_dma_membase + (x))
+#define ltq_dma_w32(x, y)		ltq_w32(x, ltq_dma_membase + (y))
+#define ltq_dma_w32_mask(x, y, z)	ltq_w32_mask(x, y, \
+						ltq_dma_membase + (z))
+
+static struct resource ltq_dma_resource = {
+	.name	= "dma",
+	.start	= LTQ_DMA_BASE_ADDR,
+	.end	= LTQ_DMA_BASE_ADDR + LTQ_DMA_SIZE - 1,
+	.flags  = IORESOURCE_MEM,
+};
+
+static void __iomem *ltq_dma_membase;
+
+void
+ltq_dma_enable_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_enable_irq);
+
+void
+ltq_dma_disable_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(1 << ch->nr, 0, LTQ_DMA_IRNEN);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_disable_irq);
+
+void
+ltq_dma_ack_irq(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32(DMA_IRQ_ACK, LTQ_DMA_CIS);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_ack_irq);
+
+void
+ltq_dma_open(struct ltq_dma_channel *ch)
+{
+	unsigned long flag;
+
+	local_irq_save(flag);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(0, DMA_CHAN_ON, LTQ_DMA_CCTRL);
+	ltq_dma_enable_irq(ch);
+	local_irq_restore(flag);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_open);
+
+void
+ltq_dma_close(struct ltq_dma_channel *ch)
+{
+	unsigned long flag;
+
+	local_irq_save(flag);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	ltq_dma_disable_irq(ch);
+	local_irq_restore(flag);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_close);
+
+static void
+ltq_dma_alloc(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ch->desc = 0;
+	ch->desc_base = dma_alloc_coherent(NULL,
+				LTQ_DESC_NUM * LTQ_DESC_SIZE,
+				&ch->phys, GFP_ATOMIC);
+	memset(ch->desc_base, 0, LTQ_DESC_NUM * LTQ_DESC_SIZE);
+
+	local_irq_save(flags);
+	ltq_dma_w32(ch->nr, LTQ_DMA_CS);
+	ltq_dma_w32(ch->phys, LTQ_DMA_CDBA);
+	ltq_dma_w32(LTQ_DESC_NUM, LTQ_DMA_CDLEN);
+	ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	wmb();
+	ltq_dma_w32_mask(0, DMA_CHAN_RST, LTQ_DMA_CCTRL);
+	while (ltq_dma_r32(LTQ_DMA_CCTRL) & DMA_CHAN_RST)
+		;
+	local_irq_restore(flags);
+}
+
+void
+ltq_dma_alloc_tx(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ltq_dma_alloc(ch);
+
+	local_irq_save(flags);
+	ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	ltq_dma_w32(DMA_WEIGHT | DMA_TX, LTQ_DMA_CCTRL);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_alloc_tx);
+
+void
+ltq_dma_alloc_rx(struct ltq_dma_channel *ch)
+{
+	unsigned long flags;
+
+	ltq_dma_alloc(ch);
+
+	local_irq_save(flags);
+	ltq_dma_w32(DMA_DESCPT, LTQ_DMA_CIE);
+	ltq_dma_w32_mask(0, 1 << ch->nr, LTQ_DMA_IRNEN);
+	ltq_dma_w32(DMA_WEIGHT, LTQ_DMA_CCTRL);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_alloc_rx);
+
+void
+ltq_dma_free(struct ltq_dma_channel *ch)
+{
+	if (!ch->desc_base)
+		return;
+	ltq_dma_close(ch);
+	dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE,
+		ch->desc_base, ch->phys);
+}
+EXPORT_SYMBOL_GPL(ltq_dma_free);
+
+void
+ltq_dma_init_port(int p)
+{
+	ltq_dma_w32(p, LTQ_DMA_PS);
+	switch (p) {
+	case DMA_PORT_ETOP:
+		/*
+		 * Tell the DMA engine to swap the endianess of data frames and
+		 * drop packets if the channel arbitration fails.
+		 */
+		ltq_dma_w32_mask(0, DMA_ETOP_ENDIANESS | DMA_PDEN,
+			LTQ_DMA_PCTRL);
+		break;
+
+	case DMA_PORT_DEU:
+		ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2),
+			LTQ_DMA_PCTRL);
+		break;
+
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(ltq_dma_init_port);
+
+int __init
+ltq_dma_init(void)
+{
+	int i;
+
+	/* insert and request the memory region */
+	if (insert_resource(&iomem_resource, &ltq_dma_resource) < 0)
+		panic("Failed to insert dma memory\n");
+
+	if (request_mem_region(ltq_dma_resource.start,
+			resource_size(&ltq_dma_resource), "dma") < 0)
+		panic("Failed to request dma memory\n");
+
+	/* remap dma register range */
+	ltq_dma_membase = ioremap_nocache(ltq_dma_resource.start,
+				resource_size(&ltq_dma_resource));
+	if (!ltq_dma_membase)
+		panic("Failed to remap dma memory\n");
+
+	/* power up and reset the dma engine */
+	ltq_pmu_enable(PMU_DMA);
+	ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL);
+
+	/* disable all interrupts */
+	ltq_dma_w32(0, LTQ_DMA_IRNEN);
+
+	/* reset/configure each channel */
+	for (i = 0; i < DMA_MAX_CHANNEL; i++) {
+		ltq_dma_w32(i, LTQ_DMA_CS);
+		ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL);
+		ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL);
+		ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL);
+	}
+	return 0;
+}
+
+postcore_initcall(ltq_dma_init);
-- 
cgit v1.2.3


From 504d4721ee8e432af4b5f196a08af38bc4dac5fe Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Fri, 6 May 2011 00:10:01 +0200
Subject: MIPS: Lantiq: Add ethernet driver

This patch adds the driver for the ETOP Packet Processing Engine (PPE32)
found inside the XWAY family of Lantiq MIPS SoCs. This driver makes 100MBit
ethernet work. Support for all 8 dma channels, gbit and the embedded switch
found on the ar9/vr9 still needs to be implemented.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Cc: linux-mips@linux-mips.org
Cc: netdev@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2357/
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../mips/include/asm/mach-lantiq/lantiq_platform.h |   7 +
 .../mips/include/asm/mach-lantiq/xway/lantiq_soc.h |   4 +-
 arch/mips/lantiq/xway/devices.c                    |  23 +
 arch/mips/lantiq/xway/devices.h                    |   1 +
 drivers/net/Kconfig                                |   7 +
 drivers/net/Makefile                               |   1 +
 drivers/net/lantiq_etop.c                          | 805 +++++++++++++++++++++
 7 files changed, 846 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/lantiq_etop.c

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
index 1f1dba6d0736..a305f1d0259e 100644
--- a/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
+++ b/arch/mips/include/asm/mach-lantiq/lantiq_platform.h
@@ -10,6 +10,7 @@
 #define _LANTIQ_PLATFORM_H__
 
 #include <linux/mtd/partitions.h>
+#include <linux/socket.h>
 
 /* struct used to pass info to the pci core */
 enum {
@@ -43,4 +44,10 @@ struct ltq_pci_data {
 	int irq[16];
 };
 
+/* struct used to pass info to network drivers */
+struct ltq_eth_data {
+	struct sockaddr mac;
+	int mii_mode;
+};
+
 #endif
diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
index 4827afbe3739..8a3c6be669d2 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_soc.h
@@ -82,8 +82,8 @@
 #define PMU_SWITCH		0x10000000
 
 /* ETOP - ethernet */
-#define LTQ_PPE32_BASE_ADDR	0xBE180000
-#define LTQ_PPE32_SIZE		0x40000
+#define LTQ_ETOP_BASE_ADDR	0x1E180000
+#define LTQ_ETOP_SIZE		0x40000
 
 /* DMA */
 #define LTQ_DMA_BASE_ADDR	0x1E104100
diff --git a/arch/mips/lantiq/xway/devices.c b/arch/mips/lantiq/xway/devices.c
index a71b3b532a01..e09e789dfc27 100644
--- a/arch/mips/lantiq/xway/devices.c
+++ b/arch/mips/lantiq/xway/devices.c
@@ -96,3 +96,26 @@ void __init ltq_register_ase_asc(void)
 	platform_device_register_simple("ltq_asc", 0,
 		ltq_ase_asc_resources, ARRAY_SIZE(ltq_ase_asc_resources));
 }
+
+/* ethernet */
+static struct resource ltq_etop_resources = {
+	.name	= "etop",
+	.start	= LTQ_ETOP_BASE_ADDR,
+	.end	= LTQ_ETOP_BASE_ADDR + LTQ_ETOP_SIZE - 1,
+	.flags	= IORESOURCE_MEM,
+};
+
+static struct platform_device ltq_etop = {
+	.name		= "ltq_etop",
+	.resource	= &ltq_etop_resources,
+	.num_resources	= 1,
+};
+
+void __init
+ltq_register_etop(struct ltq_eth_data *eth)
+{
+	if (eth) {
+		ltq_etop.dev.platform_data = eth;
+		platform_device_register(&ltq_etop);
+	}
+}
diff --git a/arch/mips/lantiq/xway/devices.h b/arch/mips/lantiq/xway/devices.h
index d57308423408..e90493471bc1 100644
--- a/arch/mips/lantiq/xway/devices.h
+++ b/arch/mips/lantiq/xway/devices.h
@@ -15,5 +15,6 @@
 extern void ltq_register_gpio(void);
 extern void ltq_register_gpio_stp(void);
 extern void ltq_register_ase_asc(void);
+extern void ltq_register_etop(struct ltq_eth_data *eth);
 
 #endif
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 6c884ef1b069..19f04a34783a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2017,6 +2017,13 @@ config FTMAC100
 	  from Faraday. It is used on Faraday A320, Andes AG101 and some
 	  other ARM/NDS32 SoC's.
 
+config LANTIQ_ETOP
+	tristate "Lantiq SoC ETOP driver"
+	depends on SOC_TYPE_XWAY
+	help
+	  Support for the MII0 inside the Lantiq SoC
+
+
 source "drivers/net/fs_enet/Kconfig"
 
 source "drivers/net/octeon/Kconfig"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index e5a7375685ad..209fbb70619b 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -259,6 +259,7 @@ obj-$(CONFIG_MLX4_CORE) += mlx4/
 obj-$(CONFIG_ENC28J60) += enc28j60.o
 obj-$(CONFIG_ETHOC) += ethoc.o
 obj-$(CONFIG_GRETH) += greth.o
+obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
 
 obj-$(CONFIG_XTENSA_XT2000_SONIC) += xtsonic.o
 
diff --git a/drivers/net/lantiq_etop.c b/drivers/net/lantiq_etop.c
new file mode 100644
index 000000000000..45f252b7da30
--- /dev/null
+++ b/drivers/net/lantiq_etop.c
@@ -0,0 +1,805 @@
+/*
+ *   This program is free software; you can redistribute it and/or modify it
+ *   under the terms of the GNU General Public License version 2 as published
+ *   by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Copyright (C) 2011 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <linux/ethtool.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/checksum.h>
+
+#include <lantiq_soc.h>
+#include <xway_dma.h>
+#include <lantiq_platform.h>
+
+#define LTQ_ETOP_MDIO		0x11804
+#define MDIO_REQUEST		0x80000000
+#define MDIO_READ		0x40000000
+#define MDIO_ADDR_MASK		0x1f
+#define MDIO_ADDR_OFFSET	0x15
+#define MDIO_REG_MASK		0x1f
+#define MDIO_REG_OFFSET		0x10
+#define MDIO_VAL_MASK		0xffff
+
+#define PPE32_CGEN		0x800
+#define LQ_PPE32_ENET_MAC_CFG	0x1840
+
+#define LTQ_ETOP_ENETS0		0x11850
+#define LTQ_ETOP_MAC_DA0	0x1186C
+#define LTQ_ETOP_MAC_DA1	0x11870
+#define LTQ_ETOP_CFG		0x16020
+#define LTQ_ETOP_IGPLEN		0x16080
+
+#define MAX_DMA_CHAN		0x8
+#define MAX_DMA_CRC_LEN		0x4
+#define MAX_DMA_DATA_LEN	0x600
+
+#define ETOP_FTCU		BIT(28)
+#define ETOP_MII_MASK		0xf
+#define ETOP_MII_NORMAL		0xd
+#define ETOP_MII_REVERSE	0xe
+#define ETOP_PLEN_UNDER		0x40
+#define ETOP_CGEN		0x800
+
+/* use 2 static channels for TX/RX */
+#define LTQ_ETOP_TX_CHANNEL	1
+#define LTQ_ETOP_RX_CHANNEL	6
+#define IS_TX(x)		(x == LTQ_ETOP_TX_CHANNEL)
+#define IS_RX(x)		(x == LTQ_ETOP_RX_CHANNEL)
+
+#define ltq_etop_r32(x)		ltq_r32(ltq_etop_membase + (x))
+#define ltq_etop_w32(x, y)	ltq_w32(x, ltq_etop_membase + (y))
+#define ltq_etop_w32_mask(x, y, z)	\
+		ltq_w32_mask(x, y, ltq_etop_membase + (z))
+
+#define DRV_VERSION	"1.0"
+
+static void __iomem *ltq_etop_membase;
+
+struct ltq_etop_chan {
+	int idx;
+	int tx_free;
+	struct net_device *netdev;
+	struct napi_struct napi;
+	struct ltq_dma_channel dma;
+	struct sk_buff *skb[LTQ_DESC_NUM];
+};
+
+struct ltq_etop_priv {
+	struct net_device *netdev;
+	struct ltq_eth_data *pldata;
+	struct resource *res;
+
+	struct mii_bus *mii_bus;
+	struct phy_device *phydev;
+
+	struct ltq_etop_chan ch[MAX_DMA_CHAN];
+	int tx_free[MAX_DMA_CHAN >> 1];
+
+	spinlock_t lock;
+};
+
+static int
+ltq_etop_alloc_skb(struct ltq_etop_chan *ch)
+{
+	ch->skb[ch->dma.desc] = dev_alloc_skb(MAX_DMA_DATA_LEN);
+	if (!ch->skb[ch->dma.desc])
+		return -ENOMEM;
+	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(NULL,
+		ch->skb[ch->dma.desc]->data, MAX_DMA_DATA_LEN,
+		DMA_FROM_DEVICE);
+	ch->dma.desc_base[ch->dma.desc].addr =
+		CPHYSADDR(ch->skb[ch->dma.desc]->data);
+	ch->dma.desc_base[ch->dma.desc].ctl =
+		LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) |
+		MAX_DMA_DATA_LEN;
+	skb_reserve(ch->skb[ch->dma.desc], NET_IP_ALIGN);
+	return 0;
+}
+
+static void
+ltq_etop_hw_receive(struct ltq_etop_chan *ch)
+{
+	struct ltq_etop_priv *priv = netdev_priv(ch->netdev);
+	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+	struct sk_buff *skb = ch->skb[ch->dma.desc];
+	int len = (desc->ctl & LTQ_DMA_SIZE_MASK) - MAX_DMA_CRC_LEN;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (ltq_etop_alloc_skb(ch)) {
+		netdev_err(ch->netdev,
+			"failed to allocate new rx buffer, stopping DMA\n");
+		ltq_dma_close(&ch->dma);
+	}
+	ch->dma.desc++;
+	ch->dma.desc %= LTQ_DESC_NUM;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	skb_put(skb, len);
+	skb->dev = ch->netdev;
+	skb->protocol = eth_type_trans(skb, ch->netdev);
+	netif_receive_skb(skb);
+}
+
+static int
+ltq_etop_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct ltq_etop_chan *ch = container_of(napi,
+				struct ltq_etop_chan, napi);
+	int rx = 0;
+	int complete = 0;
+
+	while ((rx < budget) && !complete) {
+		struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+
+		if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) {
+			ltq_etop_hw_receive(ch);
+			rx++;
+		} else {
+			complete = 1;
+		}
+	}
+	if (complete || !rx) {
+		napi_complete(&ch->napi);
+		ltq_dma_ack_irq(&ch->dma);
+	}
+	return rx;
+}
+
+static int
+ltq_etop_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct ltq_etop_chan *ch =
+		container_of(napi, struct ltq_etop_chan, napi);
+	struct ltq_etop_priv *priv = netdev_priv(ch->netdev);
+	struct netdev_queue *txq =
+		netdev_get_tx_queue(ch->netdev, ch->idx >> 1);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	while ((ch->dma.desc_base[ch->tx_free].ctl &
+			(LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) {
+		dev_kfree_skb_any(ch->skb[ch->tx_free]);
+		ch->skb[ch->tx_free] = NULL;
+		memset(&ch->dma.desc_base[ch->tx_free], 0,
+			sizeof(struct ltq_dma_desc));
+		ch->tx_free++;
+		ch->tx_free %= LTQ_DESC_NUM;
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (netif_tx_queue_stopped(txq))
+		netif_tx_start_queue(txq);
+	napi_complete(&ch->napi);
+	ltq_dma_ack_irq(&ch->dma);
+	return 1;
+}
+
+static irqreturn_t
+ltq_etop_dma_irq(int irq, void *_priv)
+{
+	struct ltq_etop_priv *priv = _priv;
+	int ch = irq - LTQ_DMA_CH0_INT;
+
+	napi_schedule(&priv->ch[ch].napi);
+	return IRQ_HANDLED;
+}
+
+static void
+ltq_etop_free_channel(struct net_device *dev, struct ltq_etop_chan *ch)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	ltq_dma_free(&ch->dma);
+	if (ch->dma.irq)
+		free_irq(ch->dma.irq, priv);
+	if (IS_RX(ch->idx)) {
+		int desc;
+		for (desc = 0; desc < LTQ_DESC_NUM; desc++)
+			dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+	}
+}
+
+static void
+ltq_etop_hw_exit(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	ltq_pmu_disable(PMU_PPE);
+	for (i = 0; i < MAX_DMA_CHAN; i++)
+		if (IS_TX(i) || IS_RX(i))
+			ltq_etop_free_channel(dev, &priv->ch[i]);
+}
+
+static int
+ltq_etop_hw_init(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	ltq_pmu_enable(PMU_PPE);
+
+	switch (priv->pldata->mii_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		ltq_etop_w32_mask(ETOP_MII_MASK,
+			ETOP_MII_REVERSE, LTQ_ETOP_CFG);
+		break;
+
+	case PHY_INTERFACE_MODE_MII:
+		ltq_etop_w32_mask(ETOP_MII_MASK,
+			ETOP_MII_NORMAL, LTQ_ETOP_CFG);
+		break;
+
+	default:
+		netdev_err(dev, "unknown mii mode %d\n",
+			priv->pldata->mii_mode);
+		return -ENOTSUPP;
+	}
+
+	/* enable crc generation */
+	ltq_etop_w32(PPE32_CGEN, LQ_PPE32_ENET_MAC_CFG);
+
+	ltq_dma_init_port(DMA_PORT_ETOP);
+
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		int irq = LTQ_DMA_CH0_INT + i;
+		struct ltq_etop_chan *ch = &priv->ch[i];
+
+		ch->idx = ch->dma.nr = i;
+
+		if (IS_TX(i)) {
+			ltq_dma_alloc_tx(&ch->dma);
+			request_irq(irq, ltq_etop_dma_irq, IRQF_DISABLED,
+				"etop_tx", priv);
+		} else if (IS_RX(i)) {
+			ltq_dma_alloc_rx(&ch->dma);
+			for (ch->dma.desc = 0; ch->dma.desc < LTQ_DESC_NUM;
+					ch->dma.desc++)
+				if (ltq_etop_alloc_skb(ch))
+					return -ENOMEM;
+			ch->dma.desc = 0;
+			request_irq(irq, ltq_etop_dma_irq, IRQF_DISABLED,
+				"etop_rx", priv);
+		}
+		ch->dma.irq = irq;
+	}
+	return 0;
+}
+
+static void
+ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "Lantiq ETOP");
+	strcpy(info->bus_info, "internal");
+	strcpy(info->version, DRV_VERSION);
+}
+
+static int
+ltq_etop_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int
+ltq_etop_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int
+ltq_etop_nway_reset(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	return phy_start_aneg(priv->phydev);
+}
+
+static const struct ethtool_ops ltq_etop_ethtool_ops = {
+	.get_drvinfo = ltq_etop_get_drvinfo,
+	.get_settings = ltq_etop_get_settings,
+	.set_settings = ltq_etop_set_settings,
+	.nway_reset = ltq_etop_nway_reset,
+};
+
+static int
+ltq_etop_mdio_wr(struct mii_bus *bus, int phy_addr, int phy_reg, u16 phy_data)
+{
+	u32 val = MDIO_REQUEST |
+		((phy_addr & MDIO_ADDR_MASK) << MDIO_ADDR_OFFSET) |
+		((phy_reg & MDIO_REG_MASK) << MDIO_REG_OFFSET) |
+		phy_data;
+
+	while (ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_REQUEST)
+		;
+	ltq_etop_w32(val, LTQ_ETOP_MDIO);
+	return 0;
+}
+
+static int
+ltq_etop_mdio_rd(struct mii_bus *bus, int phy_addr, int phy_reg)
+{
+	u32 val = MDIO_REQUEST | MDIO_READ |
+		((phy_addr & MDIO_ADDR_MASK) << MDIO_ADDR_OFFSET) |
+		((phy_reg & MDIO_REG_MASK) << MDIO_REG_OFFSET);
+
+	while (ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_REQUEST)
+		;
+	ltq_etop_w32(val, LTQ_ETOP_MDIO);
+	while (ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_REQUEST)
+		;
+	val = ltq_etop_r32(LTQ_ETOP_MDIO) & MDIO_VAL_MASK;
+	return val;
+}
+
+static void
+ltq_etop_mdio_link(struct net_device *dev)
+{
+	/* nothing to do  */
+}
+
+static int
+ltq_etop_mdio_probe(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = NULL;
+	int phy_addr;
+
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
+		if (priv->mii_bus->phy_map[phy_addr]) {
+			phydev = priv->mii_bus->phy_map[phy_addr];
+			break;
+		}
+	}
+
+	if (!phydev) {
+		netdev_err(dev, "no PHY found\n");
+		return -ENODEV;
+	}
+
+	phydev = phy_connect(dev, dev_name(&phydev->dev), &ltq_etop_mdio_link,
+			0, priv->pldata->mii_mode);
+
+	if (IS_ERR(phydev)) {
+		netdev_err(dev, "Could not attach to PHY\n");
+		return PTR_ERR(phydev);
+	}
+
+	phydev->supported &= (SUPPORTED_10baseT_Half
+			      | SUPPORTED_10baseT_Full
+			      | SUPPORTED_100baseT_Half
+			      | SUPPORTED_100baseT_Full
+			      | SUPPORTED_Autoneg
+			      | SUPPORTED_MII
+			      | SUPPORTED_TP);
+
+	phydev->advertising = phydev->supported;
+	priv->phydev = phydev;
+	pr_info("%s: attached PHY [%s] (phy_addr=%s, irq=%d)\n",
+	       dev->name, phydev->drv->name,
+	       dev_name(&phydev->dev), phydev->irq);
+
+	return 0;
+}
+
+static int
+ltq_etop_mdio_init(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+	int err;
+
+	priv->mii_bus = mdiobus_alloc();
+	if (!priv->mii_bus) {
+		netdev_err(dev, "failed to allocate mii bus\n");
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	priv->mii_bus->priv = dev;
+	priv->mii_bus->read = ltq_etop_mdio_rd;
+	priv->mii_bus->write = ltq_etop_mdio_wr;
+	priv->mii_bus->name = "ltq_mii";
+	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0);
+	priv->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
+	if (!priv->mii_bus->irq) {
+		err = -ENOMEM;
+		goto err_out_free_mdiobus;
+	}
+
+	for (i = 0; i < PHY_MAX_ADDR; ++i)
+		priv->mii_bus->irq[i] = PHY_POLL;
+
+	if (mdiobus_register(priv->mii_bus)) {
+		err = -ENXIO;
+		goto err_out_free_mdio_irq;
+	}
+
+	if (ltq_etop_mdio_probe(dev)) {
+		err = -ENXIO;
+		goto err_out_unregister_bus;
+	}
+	return 0;
+
+err_out_unregister_bus:
+	mdiobus_unregister(priv->mii_bus);
+err_out_free_mdio_irq:
+	kfree(priv->mii_bus->irq);
+err_out_free_mdiobus:
+	mdiobus_free(priv->mii_bus);
+err_out:
+	return err;
+}
+
+static void
+ltq_etop_mdio_cleanup(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	phy_disconnect(priv->phydev);
+	mdiobus_unregister(priv->mii_bus);
+	kfree(priv->mii_bus->irq);
+	mdiobus_free(priv->mii_bus);
+}
+
+static int
+ltq_etop_open(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		struct ltq_etop_chan *ch = &priv->ch[i];
+
+		if (!IS_TX(i) && (!IS_RX(i)))
+			continue;
+		ltq_dma_open(&ch->dma);
+		napi_enable(&ch->napi);
+	}
+	phy_start(priv->phydev);
+	netif_tx_start_all_queues(dev);
+	return 0;
+}
+
+static int
+ltq_etop_stop(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	int i;
+
+	netif_tx_stop_all_queues(dev);
+	phy_stop(priv->phydev);
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		struct ltq_etop_chan *ch = &priv->ch[i];
+
+		if (!IS_RX(i) && !IS_TX(i))
+			continue;
+		napi_disable(&ch->napi);
+		ltq_dma_close(&ch->dma);
+	}
+	return 0;
+}
+
+static int
+ltq_etop_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	int queue = skb_get_queue_mapping(skb);
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue);
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	struct ltq_etop_chan *ch = &priv->ch[(queue << 1) | 1];
+	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
+	int len;
+	unsigned long flags;
+	u32 byte_offset;
+
+	len = skb->len < ETH_ZLEN ? ETH_ZLEN : skb->len;
+
+	if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) || ch->skb[ch->dma.desc]) {
+		dev_kfree_skb_any(skb);
+		netdev_err(dev, "tx ring full\n");
+		netif_tx_stop_queue(txq);
+		return NETDEV_TX_BUSY;
+	}
+
+	/* dma needs to start on a 16 byte aligned address */
+	byte_offset = CPHYSADDR(skb->data) % 16;
+	ch->skb[ch->dma.desc] = skb;
+
+	dev->trans_start = jiffies;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len,
+						DMA_TO_DEVICE)) - byte_offset;
+	wmb();
+	desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP |
+		LTQ_DMA_TX_OFFSET(byte_offset) | (len & LTQ_DMA_SIZE_MASK);
+	ch->dma.desc++;
+	ch->dma.desc %= LTQ_DESC_NUM;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (ch->dma.desc_base[ch->dma.desc].ctl & LTQ_DMA_OWN)
+		netif_tx_stop_queue(txq);
+
+	return NETDEV_TX_OK;
+}
+
+static int
+ltq_etop_change_mtu(struct net_device *dev, int new_mtu)
+{
+	int ret = eth_change_mtu(dev, new_mtu);
+
+	if (!ret) {
+		struct ltq_etop_priv *priv = netdev_priv(dev);
+		unsigned long flags;
+
+		spin_lock_irqsave(&priv->lock, flags);
+		ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu,
+			LTQ_ETOP_IGPLEN);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
+	return ret;
+}
+
+static int
+ltq_etop_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+
+	/* TODO: mii-toll reports "No MII transceiver present!." ?!*/
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
+}
+
+static int
+ltq_etop_set_mac_address(struct net_device *dev, void *p)
+{
+	int ret = eth_mac_addr(dev, p);
+
+	if (!ret) {
+		struct ltq_etop_priv *priv = netdev_priv(dev);
+		unsigned long flags;
+
+		/* store the mac for the unicast filter */
+		spin_lock_irqsave(&priv->lock, flags);
+		ltq_etop_w32(*((u32 *)dev->dev_addr), LTQ_ETOP_MAC_DA0);
+		ltq_etop_w32(*((u16 *)&dev->dev_addr[4]) << 16,
+			LTQ_ETOP_MAC_DA1);
+		spin_unlock_irqrestore(&priv->lock, flags);
+	}
+	return ret;
+}
+
+static void
+ltq_etop_set_multicast_list(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	unsigned long flags;
+
+	/* ensure that the unicast filter is not enabled in promiscious mode */
+	spin_lock_irqsave(&priv->lock, flags);
+	if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI))
+		ltq_etop_w32_mask(ETOP_FTCU, 0, LTQ_ETOP_ENETS0);
+	else
+		ltq_etop_w32_mask(0, ETOP_FTCU, LTQ_ETOP_ENETS0);
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static u16
+ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	/* we are currently only using the first queue */
+	return 0;
+}
+
+static int
+ltq_etop_init(struct net_device *dev)
+{
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	struct sockaddr mac;
+	int err;
+
+	ether_setup(dev);
+	dev->watchdog_timeo = 10 * HZ;
+	err = ltq_etop_hw_init(dev);
+	if (err)
+		goto err_hw;
+	ltq_etop_change_mtu(dev, 1500);
+
+	memcpy(&mac, &priv->pldata->mac, sizeof(struct sockaddr));
+	if (!is_valid_ether_addr(mac.sa_data)) {
+		pr_warn("etop: invalid MAC, using random\n");
+		random_ether_addr(mac.sa_data);
+	}
+
+	err = ltq_etop_set_mac_address(dev, &mac);
+	if (err)
+		goto err_netdev;
+	ltq_etop_set_multicast_list(dev);
+	err = ltq_etop_mdio_init(dev);
+	if (err)
+		goto err_netdev;
+	return 0;
+
+err_netdev:
+	unregister_netdev(dev);
+	free_netdev(dev);
+err_hw:
+	ltq_etop_hw_exit(dev);
+	return err;
+}
+
+static void
+ltq_etop_tx_timeout(struct net_device *dev)
+{
+	int err;
+
+	ltq_etop_hw_exit(dev);
+	err = ltq_etop_hw_init(dev);
+	if (err)
+		goto err_hw;
+	dev->trans_start = jiffies;
+	netif_wake_queue(dev);
+	return;
+
+err_hw:
+	ltq_etop_hw_exit(dev);
+	netdev_err(dev, "failed to restart etop after TX timeout\n");
+}
+
+static const struct net_device_ops ltq_eth_netdev_ops = {
+	.ndo_open = ltq_etop_open,
+	.ndo_stop = ltq_etop_stop,
+	.ndo_start_xmit = ltq_etop_tx,
+	.ndo_change_mtu = ltq_etop_change_mtu,
+	.ndo_do_ioctl = ltq_etop_ioctl,
+	.ndo_set_mac_address = ltq_etop_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_set_multicast_list = ltq_etop_set_multicast_list,
+	.ndo_select_queue = ltq_etop_select_queue,
+	.ndo_init = ltq_etop_init,
+	.ndo_tx_timeout = ltq_etop_tx_timeout,
+};
+
+static int __init
+ltq_etop_probe(struct platform_device *pdev)
+{
+	struct net_device *dev;
+	struct ltq_etop_priv *priv;
+	struct resource *res;
+	int err;
+	int i;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get etop resource\n");
+		err = -ENOENT;
+		goto err_out;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, res->start,
+		resource_size(res), dev_name(&pdev->dev));
+	if (!res) {
+		dev_err(&pdev->dev, "failed to request etop resource\n");
+		err = -EBUSY;
+		goto err_out;
+	}
+
+	ltq_etop_membase = devm_ioremap_nocache(&pdev->dev,
+		res->start, resource_size(res));
+	if (!ltq_etop_membase) {
+		dev_err(&pdev->dev, "failed to remap etop engine %d\n",
+			pdev->id);
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	dev = alloc_etherdev_mq(sizeof(struct ltq_etop_priv), 4);
+	strcpy(dev->name, "eth%d");
+	dev->netdev_ops = &ltq_eth_netdev_ops;
+	dev->ethtool_ops = &ltq_etop_ethtool_ops;
+	priv = netdev_priv(dev);
+	priv->res = res;
+	priv->pldata = dev_get_platdata(&pdev->dev);
+	priv->netdev = dev;
+	spin_lock_init(&priv->lock);
+
+	for (i = 0; i < MAX_DMA_CHAN; i++) {
+		if (IS_TX(i))
+			netif_napi_add(dev, &priv->ch[i].napi,
+				ltq_etop_poll_tx, 8);
+		else if (IS_RX(i))
+			netif_napi_add(dev, &priv->ch[i].napi,
+				ltq_etop_poll_rx, 32);
+		priv->ch[i].netdev = dev;
+	}
+
+	err = register_netdev(dev);
+	if (err)
+		goto err_free;
+
+	platform_set_drvdata(pdev, dev);
+	return 0;
+
+err_free:
+	kfree(dev);
+err_out:
+	return err;
+}
+
+static int __devexit
+ltq_etop_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+
+	if (dev) {
+		netif_tx_stop_all_queues(dev);
+		ltq_etop_hw_exit(dev);
+		ltq_etop_mdio_cleanup(dev);
+		unregister_netdev(dev);
+	}
+	return 0;
+}
+
+static struct platform_driver ltq_mii_driver = {
+	.remove = __devexit_p(ltq_etop_remove),
+	.driver = {
+		.name = "ltq_etop",
+		.owner = THIS_MODULE,
+	},
+};
+
+int __init
+init_ltq_etop(void)
+{
+	int ret = platform_driver_probe(&ltq_mii_driver, ltq_etop_probe);
+
+	if (ret)
+		pr_err("ltq_etop: Error registering platfom driver!");
+	return ret;
+}
+
+static void __exit
+exit_ltq_etop(void)
+{
+	platform_driver_unregister(&ltq_mii_driver);
+}
+
+module_init(init_ltq_etop);
+module_exit(exit_ltq_etop);
+
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_DESCRIPTION("Lantiq SoC ETOP");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From f1f0ceaada9d040a41023017c87abb1d651b44af Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Fri, 6 May 2011 00:10:02 +0200
Subject: MIPS: Lantiq: Add etop board support

Register the etop platform device inside the machtype specific init code.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Ralph Hempel <ralph.hempel@lantiq.com>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2356/
Patchwork: https://patchwork.linux-mips.org/patch/2370/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lantiq/xway/mach-easy50712.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/lantiq/xway/mach-easy50712.c b/arch/mips/lantiq/xway/mach-easy50712.c
index e5e7e09b7c14..ea5027b3239d 100644
--- a/arch/mips/lantiq/xway/mach-easy50712.c
+++ b/arch/mips/lantiq/xway/mach-easy50712.c
@@ -12,6 +12,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/input.h>
+#include <linux/phy.h>
 
 #include <lantiq_soc.h>
 #include <irq.h>
@@ -55,11 +56,16 @@ static struct ltq_pci_data ltq_pci_data = {
 	},
 };
 
+static struct ltq_eth_data ltq_eth_data = {
+	.mii_mode = PHY_INTERFACE_MODE_MII,
+};
+
 static void __init easy50712_init(void)
 {
 	ltq_register_gpio_stp();
 	ltq_register_nor(&easy50712_flash_data);
 	ltq_register_pci(&ltq_pci_data);
+	ltq_register_etop(&ltq_eth_data);
 }
 
 MIPS_MACHINE(LTQ_MACH_EASY50712,
-- 
cgit v1.2.3


From 4f0ad950880a33df792b1e63649e29f8784b0163 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 May 2011 09:21:28 +0100
Subject: MIPS: IP27: Remove pointless switch statement.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/sgi-ip27/ip27-timer.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index a152538d3c97..3f810c9cbf83 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -66,18 +66,7 @@ static int rt_next_event(unsigned long delta, struct clock_event_device *evt)
 static void rt_set_mode(enum clock_event_mode mode,
 		struct clock_event_device *evt)
 {
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* The only mode supported */
-		break;
-
-	case CLOCK_EVT_MODE_PERIODIC:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_RESUME:
-		/* Nothing to do  */
-		break;
-	}
+	/* Nothing to do ...  */
 }
 
 int rt_timer_irq;
-- 
cgit v1.2.3


From c19c20ac6338435469a2c222ef5dc55e0469a6dc Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 May 2011 09:21:28 +0100
Subject: MIPS: Use single define for pending work on syscall exit

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/thread_info.h | 3 +++
 arch/mips/kernel/entry.S            | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d71160de4d10..97f8bf6639e7 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -149,6 +149,9 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define _TIF_FPUBOUND		(1<<TIF_FPUBOUND)
 #define _TIF_LOAD_WATCH		(1<<TIF_LOAD_WATCH)
 
+/* work to do in syscall_trace_leave() */
+#define _TIF_WORK_SYSCALL_EXIT	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(0x0000ffef &				\
 					~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT))
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ffa331029e08..a917e34b792b 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -167,7 +167,7 @@ work_notifysig:				# deal with pending signals and
 FEXPORT(syscall_exit_work_partial)
 	SAVE_STATIC
 syscall_exit_work:
-	li	t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+	li	t0, _TIF_WORK_SYSCALL_EXIT
 	and	t0, a2			# a2 is preloaded with TI_FLAGS
 	beqz	t0, work_pending	# trace bit set?
 	local_irq_enable		# could let do_syscall_trace()
-- 
cgit v1.2.3


From 8b659a393171aed3dafa1d7455ac9eec1f3ed315 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 May 2011 09:21:29 +0100
Subject: MIPS: Split do_syscall_trace into two functions.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/ptrace.h |  3 ++-
 arch/mips/kernel/entry.S       |  5 ++---
 arch/mips/kernel/ptrace.c      | 43 ++++++++++++++++++++++++++++++++++--------
 arch/mips/kernel/scall32-o32.S |  3 +--
 arch/mips/kernel/scall64-64.S  |  3 +--
 arch/mips/kernel/scall64-n32.S |  3 +--
 arch/mips/kernel/scall64-o32.S |  3 +--
 7 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 9f1b8dba2c81..de39b1f343ea 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -141,7 +141,8 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit);
+extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
+extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
 extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
 
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index a917e34b792b..37acfa036d44 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -170,11 +170,10 @@ syscall_exit_work:
 	li	t0, _TIF_WORK_SYSCALL_EXIT
 	and	t0, a2			# a2 is preloaded with TI_FLAGS
 	beqz	t0, work_pending	# trace bit set?
-	local_irq_enable		# could let do_syscall_trace()
+	local_irq_enable		# could let syscall_trace_leave()
 					# call schedule() instead
 	move	a0, sp
-	li	a1, 1
-	jal	do_syscall_trace
+	jal	syscall_trace_leave
 	b	resume_userspace
 
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 584e6b55c865..4e6ea1ffad46 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -533,15 +533,10 @@ static inline int audit_arch(void)
  * Notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 {
 	/* do the secure computing check first */
-	if (!entryexit)
-		secure_computing(regs->regs[2]);
-
-	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
-		                   -regs->regs[2]);
+	secure_computing(regs->regs[2]);
 
 	if (!(current->ptrace & PT_PTRACED))
 		goto out;
@@ -565,8 +560,40 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 	}
 
 out:
-	if (unlikely(current->audit_context) && !entryexit)
+	if (unlikely(current->audit_context))
 		audit_syscall_entry(audit_arch(), regs->regs[2],
 				    regs->regs[4], regs->regs[5],
 				    regs->regs[6], regs->regs[7]);
 }
+
+/*
+ * Notification of system call entry/exit
+ * - triggered by current->work.syscall_trace
+ */
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (unlikely(current->audit_context))
+		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
+		                   -regs->regs[2]);
+
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+
+	/* The 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ?
+	                         0x80 : 0));
+
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7f1377eb22d3..7a8e1dd7f6f2 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -88,8 +88,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 7c0ef7f128bf..2d31c83224f9 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -91,8 +91,7 @@ syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index de6c5563beab..38a0503b9a4a 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -89,8 +89,7 @@ n32_syscall_trace_entry:
 	SAVE_STATIC
 	move	s0, t2
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index b0541dda8830..91ea5e4041dd 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -123,8 +123,7 @@ trace_a_syscall:
 
 	move	s0, t2			# Save syscall pointer
 	move	a0, sp
-	li	a1, 0
-	jal	do_syscall_trace
+	jal	syscall_trace_enter
 
 	move	t0, s0
 	RESTORE_STATIC
-- 
cgit v1.2.3


From 0591128066bdfe07e0ef0ab7f877f794d8ba071d Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:12 +0200
Subject: MIPS: DB1200: Set Config[OD] for improved stability.

Setting Config[OD] gets rid of a _LOT_ of spurious CPLD interrupts,
but also decreases overall performance a bit.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2347/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/common/setup.c           | 4 ++--
 arch/mips/alchemy/devboards/db1200/setup.c | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c
index 561e5da2658b..1b887c868417 100644
--- a/arch/mips/alchemy/common/setup.c
+++ b/arch/mips/alchemy/common/setup.c
@@ -52,8 +52,6 @@ void __init plat_mem_setup(void)
 	/* this is faster than wasting cycles trying to approximate it */
 	preset_lpj = (est_freq >> 1) / HZ;
 
-	board_setup();  /* board specific setup */
-
 	if (au1xxx_cpu_needs_config_od())
 		/* Various early Au1xx0 errata corrected by this */
 		set_c0_config(1 << 19); /* Set Config[OD] */
@@ -61,6 +59,8 @@ void __init plat_mem_setup(void)
 		/* Clear to obtain best system bus performance */
 		clear_c0_config(1 << 19); /* Clear Config[OD] */
 
+	board_setup();  /* board specific setup */
+
 	/* IO/MEM resources. */
 	set_io_port_base(0);
 	ioport_resource.start = IOPORT_RESOURCE_START;
diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c
index 4a8980027ecf..1dac4f27d334 100644
--- a/arch/mips/alchemy/devboards/db1200/setup.c
+++ b/arch/mips/alchemy/devboards/db1200/setup.c
@@ -23,6 +23,13 @@ void __init board_setup(void)
 	unsigned long freq0, clksrc, div, pfc;
 	unsigned short whoami;
 
+	/* Set Config[OD] (disable overlapping bus transaction):
+	 * This gets rid of a _lot_ of spurious interrupts (especially
+	 * wrt. IDE); but incurs ~10% performance hit in some
+	 * cpu-bound applications.
+	 */
+	set_c0_config(1 << 19);
+
 	bcsr_init(DB1200_BCSR_PHYS_ADDR,
 		  DB1200_BCSR_PHYS_ADDR + DB1200_BCSR_HEXLED_OFS);
 
-- 
cgit v1.2.3


From c1e58a3129bc327f7e0eb06fd4fe5ebf2af5d8ef Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:13 +0200
Subject: MIPS: Alchemy: update inlinable GPIO API

This fixes a build failure with gpio_keys and CONFIG_GPIOLIB=n (mtx1):
  CC      drivers/input/keyboard/gpio_keys.o
gpio_keys.c: In function 'gpio_keys_report_event':
gpio_keys.c:325:2: error: implicit declaration of function 'gpio_get_value_cansleep'
gpio_keys.c: In function 'gpio_keys_setup_key':
gpio_keys.c:390:3: error: implicit declaration of function 'gpio_set_debounce'

Also add stubs for the other new functions.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2346/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-au1x00/gpio-au1000.h | 51 +++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
index 62d2f136d941..8f8c1c55593a 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
@@ -24,6 +24,7 @@
 
 #define MAKE_IRQ(intc, off)	(AU1000_INTC##intc##_INT_BASE + (off))
 
+struct gpio;
 
 static inline int au1000_gpio1_to_irq(int gpio)
 {
@@ -556,6 +557,16 @@ static inline void gpio_set_value(int gpio, int v)
 	alchemy_gpio_set_value(gpio, v);
 }
 
+static inline int gpio_get_value_cansleep(unsigned gpio)
+{
+	return gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value_cansleep(unsigned gpio, int value)
+{
+	gpio_set_value(gpio, value);
+}
+
 static inline int gpio_is_valid(int gpio)
 {
 	return alchemy_gpio_is_valid(gpio);
@@ -581,10 +592,50 @@ static inline int gpio_request(unsigned gpio, const char *label)
 	return 0;
 }
 
+static inline int gpio_request_one(unsigned gpio,
+					unsigned long flags, const char *label)
+{
+	return 0;
+}
+
+static inline int gpio_request_array(struct gpio *array, size_t num)
+{
+	return 0;
+}
+
 static inline void gpio_free(unsigned gpio)
 {
 }
 
+static inline void gpio_free_array(struct gpio *array, size_t num)
+{
+}
+
+static inline int gpio_set_debounce(unsigned gpio, unsigned debounce)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_export_link(struct device *dev, const char *name,
+				   unsigned gpio)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_sysfs_set_active_low(unsigned gpio, int value)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+
 #endif	/* !CONFIG_ALCHEMY_GPIO_INDIRECT */
 
 
-- 
cgit v1.2.3


From dca7587185b3a499a09a9e2755316eee31c49c7f Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:14 +0200
Subject: MIPS: Alchemy: irq code and constant cleanup

replace au_readl/au_writel with __raw_readl/__raw_writel,
and clean up IC-related stuff from the headers.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2354/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/common/irq.c             | 250 ++++++++++++++++-------------
 arch/mips/include/asm/mach-au1x00/au1000.h | 121 +-------------
 2 files changed, 140 insertions(+), 231 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c
index 55dd7c888517..b72e12829480 100644
--- a/arch/mips/alchemy/common/irq.c
+++ b/arch/mips/alchemy/common/irq.c
@@ -39,6 +39,36 @@
 #include <asm/mach-pb1x00/pb1000.h>
 #endif
 
+/* Interrupt Controller register offsets */
+#define IC_CFG0RD	0x40
+#define IC_CFG0SET	0x40
+#define IC_CFG0CLR	0x44
+#define IC_CFG1RD	0x48
+#define IC_CFG1SET	0x48
+#define IC_CFG1CLR	0x4C
+#define IC_CFG2RD	0x50
+#define IC_CFG2SET	0x50
+#define IC_CFG2CLR	0x54
+#define IC_REQ0INT	0x54
+#define IC_SRCRD	0x58
+#define IC_SRCSET	0x58
+#define IC_SRCCLR	0x5C
+#define IC_REQ1INT	0x5C
+#define IC_ASSIGNRD	0x60
+#define IC_ASSIGNSET	0x60
+#define IC_ASSIGNCLR	0x64
+#define IC_WAKERD	0x68
+#define IC_WAKESET	0x68
+#define IC_WAKECLR	0x6C
+#define IC_MASKRD	0x70
+#define IC_MASKSET	0x70
+#define IC_MASKCLR	0x74
+#define IC_RISINGRD	0x78
+#define IC_RISINGCLR	0x78
+#define IC_FALLINGRD	0x7C
+#define IC_FALLINGCLR	0x7C
+#define IC_TESTBIT	0x80
+
 static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type);
 
 /* NOTE on interrupt priorities: The original writers of this code said:
@@ -221,89 +251,101 @@ struct au1xxx_irqmap au1200_irqmap[] __initdata = {
 static void au1x_ic0_unmask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
-	au_writel(1 << bit, IC0_MASKSET);
-	au_writel(1 << bit, IC0_WAKESET);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKSET);
+	__raw_writel(1 << bit, base + IC_WAKESET);
+	wmb();
 }
 
 static void au1x_ic1_unmask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
-	au_writel(1 << bit, IC1_MASKSET);
-	au_writel(1 << bit, IC1_WAKESET);
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKSET);
+	__raw_writel(1 << bit, base + IC_WAKESET);
 
 /* very hacky. does the pb1000 cpld auto-disable this int?
  * nowhere in the current kernel sources is it disabled.	--mlau
  */
 #if defined(CONFIG_MIPS_PB1000)
 	if (d->irq == AU1000_GPIO15_INT)
-		au_writel(0x4000, PB1000_MDR); /* enable int */
+		__raw_writel(0x4000, (void __iomem *)PB1000_MDR); /* enable int */
 #endif
-	au_sync();
+	wmb();
 }
 
 static void au1x_ic0_mask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
-	au_writel(1 << bit, IC0_MASKCLR);
-	au_writel(1 << bit, IC0_WAKECLR);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	wmb();
 }
 
 static void au1x_ic1_mask(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
-	au_writel(1 << bit, IC1_MASKCLR);
-	au_writel(1 << bit, IC1_WAKECLR);
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
+
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	wmb();
 }
 
 static void au1x_ic0_ack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 
 	/*
 	 * This may assume that we don't get interrupts from
 	 * both edges at once, or if we do, that we don't care.
 	 */
-	au_writel(1 << bit, IC0_FALLINGCLR);
-	au_writel(1 << bit, IC0_RISINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	wmb();
 }
 
 static void au1x_ic1_ack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 
 	/*
 	 * This may assume that we don't get interrupts from
 	 * both edges at once, or if we do, that we don't care.
 	 */
-	au_writel(1 << bit, IC1_FALLINGCLR);
-	au_writel(1 << bit, IC1_RISINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	wmb();
 }
 
 static void au1x_ic0_maskack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 
-	au_writel(1 << bit, IC0_WAKECLR);
-	au_writel(1 << bit, IC0_MASKCLR);
-	au_writel(1 << bit, IC0_RISINGCLR);
-	au_writel(1 << bit, IC0_FALLINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	wmb();
 }
 
 static void au1x_ic1_maskack(struct irq_data *d)
 {
 	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 
-	au_writel(1 << bit, IC1_WAKECLR);
-	au_writel(1 << bit, IC1_MASKCLR);
-	au_writel(1 << bit, IC1_RISINGCLR);
-	au_writel(1 << bit, IC1_FALLINGCLR);
-	au_sync();
+	__raw_writel(1 << bit, base + IC_WAKECLR);
+	__raw_writel(1 << bit, base + IC_MASKCLR);
+	__raw_writel(1 << bit, base + IC_RISINGCLR);
+	__raw_writel(1 << bit, base + IC_FALLINGCLR);
+	wmb();
 }
 
 static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
@@ -318,13 +360,13 @@ static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
 		return -EINVAL;
 
 	local_irq_save(flags);
-	wakemsk = au_readl(SYS_WAKEMSK);
+	wakemsk = __raw_readl((void __iomem *)SYS_WAKEMSK);
 	if (on)
 		wakemsk |= 1 << bit;
 	else
 		wakemsk &= ~(1 << bit);
-	au_writel(wakemsk, SYS_WAKEMSK);
-	au_sync();
+	__raw_writel(wakemsk, (void __iomem *)SYS_WAKEMSK);
+	wmb();
 	local_irq_restore(flags);
 
 	return 0;
@@ -356,81 +398,74 @@ static struct irq_chip au1x_ic1_chip = {
 static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type)
 {
 	struct irq_chip *chip;
-	unsigned long icr[6];
-	unsigned int bit, ic, irq = d->irq;
+	unsigned int bit, irq = d->irq;
 	irq_flow_handler_t handler = NULL;
 	unsigned char *name = NULL;
+	void __iomem *base;
 	int ret;
 
 	if (irq >= AU1000_INTC1_INT_BASE) {
 		bit = irq - AU1000_INTC1_INT_BASE;
 		chip = &au1x_ic1_chip;
-		ic = 1;
+		base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 	} else {
 		bit = irq - AU1000_INTC0_INT_BASE;
 		chip = &au1x_ic0_chip;
-		ic = 0;
+		base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 	}
 
 	if (bit > 31)
 		return -EINVAL;
 
-	icr[0] = ic ? IC1_CFG0SET : IC0_CFG0SET;
-	icr[1] = ic ? IC1_CFG1SET : IC0_CFG1SET;
-	icr[2] = ic ? IC1_CFG2SET : IC0_CFG2SET;
-	icr[3] = ic ? IC1_CFG0CLR : IC0_CFG0CLR;
-	icr[4] = ic ? IC1_CFG1CLR : IC0_CFG1CLR;
-	icr[5] = ic ? IC1_CFG2CLR : IC0_CFG2CLR;
-
 	ret = 0;
 
 	switch (flow_type) {	/* cfgregs 2:1:0 */
 	case IRQ_TYPE_EDGE_RISING:	/* 0:0:1 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_edge_irq;
 		name = "riseedge";
 		break;
 	case IRQ_TYPE_EDGE_FALLING:	/* 0:1:0 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		handler = handle_edge_irq;
 		name = "falledge";
 		break;
 	case IRQ_TYPE_EDGE_BOTH:	/* 0:1:1 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_edge_irq;
 		name = "bothedge";
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:	/* 1:0:1 */
-		au_writel(1 << bit, icr[2]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[0]);
+		__raw_writel(1 << bit, base + IC_CFG2SET);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0SET);
 		handler = handle_level_irq;
 		name = "hilevel";
 		break;
 	case IRQ_TYPE_LEVEL_LOW:	/* 1:1:0 */
-		au_writel(1 << bit, icr[2]);
-		au_writel(1 << bit, icr[1]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2SET);
+		__raw_writel(1 << bit, base + IC_CFG1SET);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		handler = handle_level_irq;
 		name = "lowlevel";
 		break;
 	case IRQ_TYPE_NONE:		/* 0:0:0 */
-		au_writel(1 << bit, icr[5]);
-		au_writel(1 << bit, icr[4]);
-		au_writel(1 << bit, icr[3]);
+		__raw_writel(1 << bit, base + IC_CFG2CLR);
+		__raw_writel(1 << bit, base + IC_CFG1CLR);
+		__raw_writel(1 << bit, base + IC_CFG0CLR);
 		break;
 	default:
 		ret = -EINVAL;
 	}
 	__irq_set_chip_handler_name_locked(d->irq, chip, handler, name);
 
-	au_sync();
+	wmb();
 
 	return ret;
 }
@@ -444,21 +479,21 @@ asmlinkage void plat_irq_dispatch(void)
 		off = MIPS_CPU_IRQ_BASE + 7;
 		goto handle;
 	} else if (pending & CAUSEF_IP2) {
-		s = IC0_REQ0INT;
+		s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ0INT;
 		off = AU1000_INTC0_INT_BASE;
 	} else if (pending & CAUSEF_IP3) {
-		s = IC0_REQ1INT;
+		s = KSEG1ADDR(AU1000_IC0_PHYS_ADDR) + IC_REQ1INT;
 		off = AU1000_INTC0_INT_BASE;
 	} else if (pending & CAUSEF_IP4) {
-		s = IC1_REQ0INT;
+		s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ0INT;
 		off = AU1000_INTC1_INT_BASE;
 	} else if (pending & CAUSEF_IP5) {
-		s = IC1_REQ1INT;
+		s = KSEG1ADDR(AU1000_IC1_PHYS_ADDR) + IC_REQ1INT;
 		off = AU1000_INTC1_INT_BASE;
 	} else
 		goto spurious;
 
-	s = au_readl(s);
+	s = __raw_readl((void __iomem *)s);
 	if (unlikely(!s)) {
 spurious:
 		spurious_interrupt();
@@ -469,48 +504,42 @@ handle:
 	do_IRQ(off);
 }
 
+
+static inline void ic_init(void __iomem *base)
+{
+	/* initialize interrupt controller to a safe state */
+	__raw_writel(0xffffffff, base + IC_CFG0CLR);
+	__raw_writel(0xffffffff, base + IC_CFG1CLR);
+	__raw_writel(0xffffffff, base + IC_CFG2CLR);
+	__raw_writel(0xffffffff, base + IC_MASKCLR);
+	__raw_writel(0xffffffff, base + IC_ASSIGNCLR);
+	__raw_writel(0xffffffff, base + IC_WAKECLR);
+	__raw_writel(0xffffffff, base + IC_SRCSET);
+	__raw_writel(0xffffffff, base + IC_FALLINGCLR);
+	__raw_writel(0xffffffff, base + IC_RISINGCLR);
+	__raw_writel(0x00000000, base + IC_TESTBIT);
+	wmb();
+}
+
 static void __init au1000_init_irq(struct au1xxx_irqmap *map)
 {
 	unsigned int bit, irq_nr;
-	int i;
-
-	/*
-	 * Initialize interrupt controllers to a safe state.
-	 */
-	au_writel(0xffffffff, IC0_CFG0CLR);
-	au_writel(0xffffffff, IC0_CFG1CLR);
-	au_writel(0xffffffff, IC0_CFG2CLR);
-	au_writel(0xffffffff, IC0_MASKCLR);
-	au_writel(0xffffffff, IC0_ASSIGNCLR);
-	au_writel(0xffffffff, IC0_WAKECLR);
-	au_writel(0xffffffff, IC0_SRCSET);
-	au_writel(0xffffffff, IC0_FALLINGCLR);
-	au_writel(0xffffffff, IC0_RISINGCLR);
-	au_writel(0x00000000, IC0_TESTBIT);
-
-	au_writel(0xffffffff, IC1_CFG0CLR);
-	au_writel(0xffffffff, IC1_CFG1CLR);
-	au_writel(0xffffffff, IC1_CFG2CLR);
-	au_writel(0xffffffff, IC1_MASKCLR);
-	au_writel(0xffffffff, IC1_ASSIGNCLR);
-	au_writel(0xffffffff, IC1_WAKECLR);
-	au_writel(0xffffffff, IC1_SRCSET);
-	au_writel(0xffffffff, IC1_FALLINGCLR);
-	au_writel(0xffffffff, IC1_RISINGCLR);
-	au_writel(0x00000000, IC1_TESTBIT);
+	void __iomem *base;
 
+	ic_init((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR));
+	ic_init((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR));
 	mips_cpu_irq_init();
 
 	/* register all 64 possible IC0+IC1 irq sources as type "none".
 	 * Use set_irq_type() to set edge/level behaviour at runtime.
 	 */
-	for (i = AU1000_INTC0_INT_BASE;
-	     (i < AU1000_INTC0_INT_BASE + 32); i++)
-		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
+	for (irq_nr = AU1000_INTC0_INT_BASE;
+	     (irq_nr < AU1000_INTC0_INT_BASE + 32); irq_nr++)
+		au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE);
 
-	for (i = AU1000_INTC1_INT_BASE;
-	     (i < AU1000_INTC1_INT_BASE + 32); i++)
-		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
+	for (irq_nr = AU1000_INTC1_INT_BASE;
+	     (irq_nr < AU1000_INTC1_INT_BASE + 32); irq_nr++)
+		au1x_ic_settype(irq_get_irq_data(irq_nr), IRQ_TYPE_NONE);
 
 	/*
 	 * Initialize IC0, which is fixed per processor.
@@ -520,13 +549,13 @@ static void __init au1000_init_irq(struct au1xxx_irqmap *map)
 
 		if (irq_nr >= AU1000_INTC1_INT_BASE) {
 			bit = irq_nr - AU1000_INTC1_INT_BASE;
-			if (map->im_request)
-				au_writel(1 << bit, IC1_ASSIGNSET);
+			base = (void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR);
 		} else {
 			bit = irq_nr - AU1000_INTC0_INT_BASE;
-			if (map->im_request)
-				au_writel(1 << bit, IC0_ASSIGNSET);
+			base = (void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR);
 		}
+		if (map->im_request)
+			__raw_writel(1 << bit, base + IC_ASSIGNSET);
 
 		au1x_ic_settype(irq_get_irq_data(irq_nr), map->im_type);
 		++map;
@@ -583,17 +612,8 @@ static int alchemy_ic_resume(struct sys_device *dev)
 	struct alchemy_ic_sysdev *icdev =
 			container_of(dev, struct alchemy_ic_sysdev, sysdev);
 
-	__raw_writel(0xffffffff, icdev->base + IC_MASKCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG0CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG1CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_CFG2CLR);
-	__raw_writel(0xffffffff, icdev->base + IC_SRCCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_ASSIGNCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_WAKECLR);
-	__raw_writel(0xffffffff, icdev->base + IC_RISINGCLR);
-	__raw_writel(0xffffffff, icdev->base + IC_FALLINGCLR);
-	__raw_writel(0x00000000, icdev->base + IC_TESTBIT);
-	wmb();
+	ic_init(icdev->base);
+
 	__raw_writel(icdev->pmdata[0], icdev->base + IC_CFG0SET);
 	__raw_writel(icdev->pmdata[1], icdev->base + IC_CFG1SET);
 	__raw_writel(icdev->pmdata[2], icdev->base + IC_CFG2SET);
@@ -617,7 +637,7 @@ static struct sysdev_class alchemy_ic_sysdev_class = {
 static int __init alchemy_ic_sysdev_init(void)
 {
 	struct alchemy_ic_sysdev *icdev;
-	unsigned long icbase[2] = { IC0_PHYS_ADDR, IC1_PHYS_ADDR };
+	unsigned long icbase[2] = { AU1000_IC0_PHYS_ADDR, AU1000_IC1_PHYS_ADDR };
 	int err, i;
 
 	err = sysdev_class_register(&alchemy_ic_sysdev_class);
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index a6976619160a..66cfcdc75e4f 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -630,8 +630,13 @@ enum soc_au1200_ints {
 
 /*
  * Physical base addresses for integrated peripherals
+ * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200
  */
 
+#define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
+#define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
+
+
 #ifdef CONFIG_SOC_AU1000
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
@@ -643,8 +648,6 @@ enum soc_au1200_ints {
 #define	DMA5_PHYS_ADDR		0x14002500
 #define	DMA6_PHYS_ADDR		0x14002600
 #define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
 #define	USBD_PHYS_ADDR		0x10200000
@@ -680,8 +683,6 @@ enum soc_au1200_ints {
 #define	DMA5_PHYS_ADDR		0x14002500
 #define	DMA6_PHYS_ADDR		0x14002600
 #define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
 #define	USBD_PHYS_ADDR		0x10200000
@@ -718,10 +719,8 @@ enum soc_au1200_ints {
 #define	DMA5_PHYS_ADDR		0x14002500
 #define	DMA6_PHYS_ADDR		0x14002600
 #define	DMA7_PHYS_ADDR		0x14002700
-#define	IC0_PHYS_ADDR		0x10400000
 #define SD0_PHYS_ADDR		0x10600000
 #define SD1_PHYS_ADDR		0x10680000
-#define	IC1_PHYS_ADDR		0x11800000
 #define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
 #define	USBD_PHYS_ADDR		0x10200000
@@ -749,8 +748,6 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1550
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define	USBH_PHYS_ADDR		0x14020000
 #define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
@@ -786,8 +783,6 @@ enum soc_au1200_ints {
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
 #define AES_PHYS_ADDR		0x10300000
 #define CIM_PHYS_ADDR		0x14004000
-#define	IC0_PHYS_ADDR		0x10400000
-#define	IC1_PHYS_ADDR		0x11800000
 #define USBM_PHYS_ADDR		0x14020000
 #define	USBH_PHYS_ADDR		0x14020100
 #define	UART0_PHYS_ADDR		0x11100000
@@ -835,112 +830,6 @@ enum soc_au1200_ints {
 #endif
 
 
-/* Interrupt Controller register offsets */
-#define IC_CFG0RD		0x40
-#define IC_CFG0SET		0x40
-#define IC_CFG0CLR		0x44
-#define IC_CFG1RD		0x48
-#define IC_CFG1SET		0x48
-#define IC_CFG1CLR		0x4C
-#define IC_CFG2RD		0x50
-#define IC_CFG2SET		0x50
-#define IC_CFG2CLR		0x54
-#define IC_REQ0INT		0x54
-#define IC_SRCRD		0x58
-#define IC_SRCSET		0x58
-#define IC_SRCCLR		0x5C
-#define IC_REQ1INT		0x5C
-#define IC_ASSIGNRD		0x60
-#define IC_ASSIGNSET		0x60
-#define IC_ASSIGNCLR		0x64
-#define IC_WAKERD		0x68
-#define IC_WAKESET		0x68
-#define IC_WAKECLR		0x6C
-#define IC_MASKRD		0x70
-#define IC_MASKSET		0x70
-#define IC_MASKCLR		0x74
-#define IC_RISINGRD		0x78
-#define IC_RISINGCLR		0x78
-#define IC_FALLINGRD		0x7C
-#define IC_FALLINGCLR		0x7C
-#define IC_TESTBIT		0x80
-
-
-/* Interrupt Controller 0 */
-#define IC0_CFG0RD		0xB0400040
-#define IC0_CFG0SET		0xB0400040
-#define IC0_CFG0CLR		0xB0400044
-
-#define IC0_CFG1RD		0xB0400048
-#define IC0_CFG1SET		0xB0400048
-#define IC0_CFG1CLR		0xB040004C
-
-#define IC0_CFG2RD		0xB0400050
-#define IC0_CFG2SET		0xB0400050
-#define IC0_CFG2CLR		0xB0400054
-
-#define IC0_REQ0INT		0xB0400054
-#define IC0_SRCRD		0xB0400058
-#define IC0_SRCSET		0xB0400058
-#define IC0_SRCCLR		0xB040005C
-#define IC0_REQ1INT		0xB040005C
-
-#define IC0_ASSIGNRD		0xB0400060
-#define IC0_ASSIGNSET		0xB0400060
-#define IC0_ASSIGNCLR		0xB0400064
-
-#define IC0_WAKERD		0xB0400068
-#define IC0_WAKESET		0xB0400068
-#define IC0_WAKECLR		0xB040006C
-
-#define IC0_MASKRD		0xB0400070
-#define IC0_MASKSET		0xB0400070
-#define IC0_MASKCLR		0xB0400074
-
-#define IC0_RISINGRD		0xB0400078
-#define IC0_RISINGCLR		0xB0400078
-#define IC0_FALLINGRD		0xB040007C
-#define IC0_FALLINGCLR		0xB040007C
-
-#define IC0_TESTBIT		0xB0400080
-
-/* Interrupt Controller 1 */
-#define IC1_CFG0RD		0xB1800040
-#define IC1_CFG0SET		0xB1800040
-#define IC1_CFG0CLR		0xB1800044
-
-#define IC1_CFG1RD		0xB1800048
-#define IC1_CFG1SET		0xB1800048
-#define IC1_CFG1CLR		0xB180004C
-
-#define IC1_CFG2RD		0xB1800050
-#define IC1_CFG2SET		0xB1800050
-#define IC1_CFG2CLR		0xB1800054
-
-#define IC1_REQ0INT		0xB1800054
-#define IC1_SRCRD		0xB1800058
-#define IC1_SRCSET		0xB1800058
-#define IC1_SRCCLR		0xB180005C
-#define IC1_REQ1INT		0xB180005C
-
-#define IC1_ASSIGNRD            0xB1800060
-#define IC1_ASSIGNSET           0xB1800060
-#define IC1_ASSIGNCLR           0xB1800064
-
-#define IC1_WAKERD		0xB1800068
-#define IC1_WAKESET		0xB1800068
-#define IC1_WAKECLR		0xB180006C
-
-#define IC1_MASKRD		0xB1800070
-#define IC1_MASKSET		0xB1800070
-#define IC1_MASKCLR		0xB1800074
-
-#define IC1_RISINGRD		0xB1800078
-#define IC1_RISINGCLR		0xB1800078
-#define IC1_FALLINGRD		0xB180007C
-#define IC1_FALLINGCLR		0xB180007C
-
-#define IC1_TESTBIT		0xB1800080
 
 
 /* Au1000 */
-- 
cgit v1.2.3


From 4b5c82b5e57ac6cb919e7e74984e28b312bdf10c Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:15 +0200
Subject: MIPS: Alchemy: Convert irq.c to syscore_ops.

Convert the PM sysdev to use syscore_ops instead.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2350/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/common/irq.c | 101 +++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 60 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c
index b72e12829480..8b60ba0675e2 100644
--- a/arch/mips/alchemy/common/irq.c
+++ b/arch/mips/alchemy/common/irq.c
@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
@@ -585,81 +585,62 @@ void __init arch_init_irq(void)
 	}
 }
 
-struct alchemy_ic_sysdev {
-	struct sys_device sysdev;
-	void __iomem *base;
-	unsigned long pmdata[7];
-};
-
-static int alchemy_ic_suspend(struct sys_device *dev, pm_message_t state)
-{
-	struct alchemy_ic_sysdev *icdev =
-			container_of(dev, struct alchemy_ic_sysdev, sysdev);
 
-	icdev->pmdata[0] = __raw_readl(icdev->base + IC_CFG0RD);
-	icdev->pmdata[1] = __raw_readl(icdev->base + IC_CFG1RD);
-	icdev->pmdata[2] = __raw_readl(icdev->base + IC_CFG2RD);
-	icdev->pmdata[3] = __raw_readl(icdev->base + IC_SRCRD);
-	icdev->pmdata[4] = __raw_readl(icdev->base + IC_ASSIGNRD);
-	icdev->pmdata[5] = __raw_readl(icdev->base + IC_WAKERD);
-	icdev->pmdata[6] = __raw_readl(icdev->base + IC_MASKRD);
+static unsigned long alchemy_ic_pmdata[7 * 2];
 
-	return 0;
+static inline void alchemy_ic_suspend_one(void __iomem *base, unsigned long *d)
+{
+	d[0] = __raw_readl(base + IC_CFG0RD);
+	d[1] = __raw_readl(base + IC_CFG1RD);
+	d[2] = __raw_readl(base + IC_CFG2RD);
+	d[3] = __raw_readl(base + IC_SRCRD);
+	d[4] = __raw_readl(base + IC_ASSIGNRD);
+	d[5] = __raw_readl(base + IC_WAKERD);
+	d[6] = __raw_readl(base + IC_MASKRD);
+	ic_init(base);		/* shut it up too while at it */
 }
 
-static int alchemy_ic_resume(struct sys_device *dev)
+static inline void alchemy_ic_resume_one(void __iomem *base, unsigned long *d)
 {
-	struct alchemy_ic_sysdev *icdev =
-			container_of(dev, struct alchemy_ic_sysdev, sysdev);
-
-	ic_init(icdev->base);
-
-	__raw_writel(icdev->pmdata[0], icdev->base + IC_CFG0SET);
-	__raw_writel(icdev->pmdata[1], icdev->base + IC_CFG1SET);
-	__raw_writel(icdev->pmdata[2], icdev->base + IC_CFG2SET);
-	__raw_writel(icdev->pmdata[3], icdev->base + IC_SRCSET);
-	__raw_writel(icdev->pmdata[4], icdev->base + IC_ASSIGNSET);
-	__raw_writel(icdev->pmdata[5], icdev->base + IC_WAKESET);
+	ic_init(base);
+
+	__raw_writel(d[0], base + IC_CFG0SET);
+	__raw_writel(d[1], base + IC_CFG1SET);
+	__raw_writel(d[2], base + IC_CFG2SET);
+	__raw_writel(d[3], base + IC_SRCSET);
+	__raw_writel(d[4], base + IC_ASSIGNSET);
+	__raw_writel(d[5], base + IC_WAKESET);
 	wmb();
 
-	__raw_writel(icdev->pmdata[6], icdev->base + IC_MASKSET);
+	__raw_writel(d[6], base + IC_MASKSET);
 	wmb();
+}
 
+static int alchemy_ic_suspend(void)
+{
+	alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR),
+			       alchemy_ic_pmdata);
+	alchemy_ic_suspend_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR),
+			       &alchemy_ic_pmdata[7]);
 	return 0;
 }
 
-static struct sysdev_class alchemy_ic_sysdev_class = {
-	.name		= "ic",
+static void alchemy_ic_resume(void)
+{
+	alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC1_PHYS_ADDR),
+			      &alchemy_ic_pmdata[7]);
+	alchemy_ic_resume_one((void __iomem *)KSEG1ADDR(AU1000_IC0_PHYS_ADDR),
+			      alchemy_ic_pmdata);
+}
+
+static struct syscore_ops alchemy_ic_syscore_ops = {
 	.suspend	= alchemy_ic_suspend,
 	.resume		= alchemy_ic_resume,
 };
 
-static int __init alchemy_ic_sysdev_init(void)
+static int __init alchemy_ic_pm_init(void)
 {
-	struct alchemy_ic_sysdev *icdev;
-	unsigned long icbase[2] = { AU1000_IC0_PHYS_ADDR, AU1000_IC1_PHYS_ADDR };
-	int err, i;
-
-	err = sysdev_class_register(&alchemy_ic_sysdev_class);
-	if (err)
-		return err;
-
-	for (i = 0; i < 2; i++) {
-		icdev = kzalloc(sizeof(struct alchemy_ic_sysdev), GFP_KERNEL);
-		if (!icdev)
-			return -ENOMEM;
-
-		icdev->base = ioremap(icbase[i], 0x1000);
-
-		icdev->sysdev.id = i;
-		icdev->sysdev.cls = &alchemy_ic_sysdev_class;
-		err = sysdev_register(&icdev->sysdev);
-		if (err) {
-			kfree(icdev);
-			return err;
-		}
-	}
-
+	register_syscore_ops(&alchemy_ic_syscore_ops);
 	return 0;
 }
-device_initcall(alchemy_ic_sysdev_init);
+device_initcall(alchemy_ic_pm_init);
-- 
cgit v1.2.3


From adcb86279f1e4d7a1a9f267b49441aecf4a5110a Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:16 +0200
Subject: MIPS: Alchemy: Convert dbdma.c to syscore_ops

Convert the PM sysdev to syscore_ops and clean up the ddma addresses a bit.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2351/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/alchemy/common/dbdma.c                 | 123 +++++++++--------------
 arch/mips/include/asm/mach-au1x00/au1000.h       |   4 +-
 arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h |   8 --
 3 files changed, 47 insertions(+), 88 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/dbdma.c b/arch/mips/alchemy/common/dbdma.c
index ca0506a8585a..3a5abb54d505 100644
--- a/arch/mips/alchemy/common/dbdma.c
+++ b/arch/mips/alchemy/common/dbdma.c
@@ -36,7 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
 
@@ -58,7 +58,8 @@ static DEFINE_SPINLOCK(au1xxx_dbdma_spin_lock);
 /* I couldn't find a macro that did this... */
 #define ALIGN_ADDR(x, a)	((((u32)(x)) + (a-1)) & ~(a-1))
 
-static dbdma_global_t *dbdma_gptr = (dbdma_global_t *)DDMA_GLOBAL_BASE;
+static dbdma_global_t *dbdma_gptr =
+			(dbdma_global_t *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
 static int dbdma_initialized;
 
 static dbdev_tab_t dbdev_tab[] = {
@@ -299,7 +300,7 @@ u32 au1xxx_dbdma_chan_alloc(u32 srcid, u32 destid,
 	if (ctp != NULL) {
 		memset(ctp, 0, sizeof(chan_tab_t));
 		ctp->chan_index = chan = i;
-		dcp = DDMA_CHANNEL_BASE;
+		dcp = KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
 		dcp += (0x0100 * chan);
 		ctp->chan_ptr = (au1x_dma_chan_t *)dcp;
 		cp = (au1x_dma_chan_t *)dcp;
@@ -958,105 +959,75 @@ u32 au1xxx_dbdma_put_dscr(u32 chanid, au1x_ddma_desc_t *dscr)
 }
 
 
-struct alchemy_dbdma_sysdev {
-	struct sys_device sysdev;
-	u32 pm_regs[NUM_DBDMA_CHANS + 1][6];
-};
+static unsigned long alchemy_dbdma_pm_data[NUM_DBDMA_CHANS + 1][6];
 
-static int alchemy_dbdma_suspend(struct sys_device *dev,
-				 pm_message_t state)
+static int alchemy_dbdma_suspend(void)
 {
-	struct alchemy_dbdma_sysdev *sdev =
-		container_of(dev, struct alchemy_dbdma_sysdev, sysdev);
 	int i;
-	u32 addr;
+	void __iomem *addr;
 
-	addr = DDMA_GLOBAL_BASE;
-	sdev->pm_regs[0][0] = au_readl(addr + 0x00);
-	sdev->pm_regs[0][1] = au_readl(addr + 0x04);
-	sdev->pm_regs[0][2] = au_readl(addr + 0x08);
-	sdev->pm_regs[0][3] = au_readl(addr + 0x0c);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	alchemy_dbdma_pm_data[0][0] = __raw_readl(addr + 0x00);
+	alchemy_dbdma_pm_data[0][1] = __raw_readl(addr + 0x04);
+	alchemy_dbdma_pm_data[0][2] = __raw_readl(addr + 0x08);
+	alchemy_dbdma_pm_data[0][3] = __raw_readl(addr + 0x0c);
 
 	/* save channel configurations */
-	for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) {
-		sdev->pm_regs[i][0] = au_readl(addr + 0x00);
-		sdev->pm_regs[i][1] = au_readl(addr + 0x04);
-		sdev->pm_regs[i][2] = au_readl(addr + 0x08);
-		sdev->pm_regs[i][3] = au_readl(addr + 0x0c);
-		sdev->pm_regs[i][4] = au_readl(addr + 0x10);
-		sdev->pm_regs[i][5] = au_readl(addr + 0x14);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
+	for (i = 1; i <= NUM_DBDMA_CHANS; i++) {
+		alchemy_dbdma_pm_data[i][0] = __raw_readl(addr + 0x00);
+		alchemy_dbdma_pm_data[i][1] = __raw_readl(addr + 0x04);
+		alchemy_dbdma_pm_data[i][2] = __raw_readl(addr + 0x08);
+		alchemy_dbdma_pm_data[i][3] = __raw_readl(addr + 0x0c);
+		alchemy_dbdma_pm_data[i][4] = __raw_readl(addr + 0x10);
+		alchemy_dbdma_pm_data[i][5] = __raw_readl(addr + 0x14);
 
 		/* halt channel */
-		au_writel(sdev->pm_regs[i][0] & ~1, addr + 0x00);
-		au_sync();
-		while (!(au_readl(addr + 0x14) & 1))
-			au_sync();
+		__raw_writel(alchemy_dbdma_pm_data[i][0] & ~1, addr + 0x00);
+		wmb();
+		while (!(__raw_readl(addr + 0x14) & 1))
+			wmb();
 
 		addr += 0x100;	/* next channel base */
 	}
 	/* disable channel interrupts */
-	au_writel(0, DDMA_GLOBAL_BASE + 0x0c);
-	au_sync();
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	__raw_writel(0, addr + 0x0c);
+	wmb();
 
 	return 0;
 }
 
-static int alchemy_dbdma_resume(struct sys_device *dev)
+static void alchemy_dbdma_resume(void)
 {
-	struct alchemy_dbdma_sysdev *sdev =
-		container_of(dev, struct alchemy_dbdma_sysdev, sysdev);
 	int i;
-	u32 addr;
+	void __iomem *addr;
 
-	addr = DDMA_GLOBAL_BASE;
-	au_writel(sdev->pm_regs[0][0], addr + 0x00);
-	au_writel(sdev->pm_regs[0][1], addr + 0x04);
-	au_writel(sdev->pm_regs[0][2], addr + 0x08);
-	au_writel(sdev->pm_regs[0][3], addr + 0x0c);
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_CONF_PHYS_ADDR);
+	__raw_writel(alchemy_dbdma_pm_data[0][0], addr + 0x00);
+	__raw_writel(alchemy_dbdma_pm_data[0][1], addr + 0x04);
+	__raw_writel(alchemy_dbdma_pm_data[0][2], addr + 0x08);
+	__raw_writel(alchemy_dbdma_pm_data[0][3], addr + 0x0c);
 
 	/* restore channel configurations */
-	for (i = 1, addr = DDMA_CHANNEL_BASE; i <= NUM_DBDMA_CHANS; i++) {
-		au_writel(sdev->pm_regs[i][0], addr + 0x00);
-		au_writel(sdev->pm_regs[i][1], addr + 0x04);
-		au_writel(sdev->pm_regs[i][2], addr + 0x08);
-		au_writel(sdev->pm_regs[i][3], addr + 0x0c);
-		au_writel(sdev->pm_regs[i][4], addr + 0x10);
-		au_writel(sdev->pm_regs[i][5], addr + 0x14);
-		au_sync();
+	addr = (void __iomem *)KSEG1ADDR(AU1550_DBDMA_PHYS_ADDR);
+	for (i = 1; i <= NUM_DBDMA_CHANS; i++) {
+		__raw_writel(alchemy_dbdma_pm_data[i][0], addr + 0x00);
+		__raw_writel(alchemy_dbdma_pm_data[i][1], addr + 0x04);
+		__raw_writel(alchemy_dbdma_pm_data[i][2], addr + 0x08);
+		__raw_writel(alchemy_dbdma_pm_data[i][3], addr + 0x0c);
+		__raw_writel(alchemy_dbdma_pm_data[i][4], addr + 0x10);
+		__raw_writel(alchemy_dbdma_pm_data[i][5], addr + 0x14);
+		wmb();
 		addr += 0x100;	/* next channel base */
 	}
-
-	return 0;
 }
 
-static struct sysdev_class alchemy_dbdma_sysdev_class = {
-	.name		= "dbdma",
+static struct syscore_ops alchemy_dbdma_syscore_ops = {
 	.suspend	= alchemy_dbdma_suspend,
 	.resume		= alchemy_dbdma_resume,
 };
 
-static int __init alchemy_dbdma_sysdev_init(void)
-{
-	struct alchemy_dbdma_sysdev *sdev;
-	int ret;
-
-	ret = sysdev_class_register(&alchemy_dbdma_sysdev_class);
-	if (ret)
-		return ret;
-
-	sdev = kzalloc(sizeof(struct alchemy_dbdma_sysdev), GFP_KERNEL);
-	if (!sdev)
-		return -ENOMEM;
-
-	sdev->sysdev.id = -1;
-	sdev->sysdev.cls = &alchemy_dbdma_sysdev_class;
-	ret = sysdev_register(&sdev->sysdev);
-	if (ret)
-		kfree(sdev);
-
-	return ret;
-}
-
 static int __init au1xxx_dbdma_init(void)
 {
 	int irq_nr, ret;
@@ -1084,11 +1055,7 @@ static int __init au1xxx_dbdma_init(void)
 	else {
 		dbdma_initialized = 1;
 		printk(KERN_INFO "Alchemy DBDMA initialized\n");
-		ret = alchemy_dbdma_sysdev_init();
-		if (ret) {
-			printk(KERN_ERR "DBDMA PM init failed\n");
-			ret = 0;
-		}
+		register_syscore_ops(&alchemy_dbdma_syscore_ops);
 	}
 
 	return ret;
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index 66cfcdc75e4f..eb8f1034e1ef 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -635,6 +635,8 @@ enum soc_au1200_ints {
 
 #define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
 #define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
+#define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
+#define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
 
 
 #ifdef CONFIG_SOC_AU1000
@@ -761,7 +763,6 @@ enum soc_au1200_ints {
 #define	UART3_PHYS_ADDR		0x11400000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
-#define	DDMA_PHYS_ADDR		0x14002000
 #define PE_PHYS_ADDR		0x14008000
 #define PSC0_PHYS_ADDR		0x11A00000
 #define PSC1_PHYS_ADDR		0x11B00000
@@ -789,7 +790,6 @@ enum soc_au1200_ints {
 #define	UART1_PHYS_ADDR		0x11200000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
-#define	DDMA_PHYS_ADDR		0x14002000
 #define PSC0_PHYS_ADDR	 	0x11A00000
 #define PSC1_PHYS_ADDR	 	0x11B00000
 #define SD0_PHYS_ADDR		0x10600000
diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
index c8a553a36ba4..2fdacfe85e23 100644
--- a/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1xxx_dbdma.h
@@ -37,14 +37,6 @@
 
 #ifndef _LANGUAGE_ASSEMBLY
 
-/*
- * The DMA base addresses.
- * The channels are every 256 bytes (0x0100) from the channel 0 base.
- * Interrupt status/enable is bits 15:0 for channels 15 to zero.
- */
-#define DDMA_GLOBAL_BASE	0xb4003000
-#define DDMA_CHANNEL_BASE	0xb4002000
-
 typedef volatile struct dbdma_global {
 	u32	ddma_config;
 	u32	ddma_intstat;
-- 
cgit v1.2.3


From 80130204b43ce9c3b50924e4c2d44e9f2881f8c3 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:17 +0200
Subject: MIPS: Alchemy: Rewrite UART setup and constants.

Detect CPU type at runtime and setup uarts accordingly; also clean up the
uart base address mess in the process as far as possible.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2352/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org
---
 arch/mips/alchemy/common/platform.c        | 93 ++++++++++++++++--------------
 arch/mips/alchemy/devboards/prom.c         |  2 +-
 arch/mips/alchemy/gpr/board_setup.c        | 14 ++---
 arch/mips/alchemy/gpr/init.c               |  2 +-
 arch/mips/alchemy/mtx-1/init.c             |  2 +-
 arch/mips/alchemy/xxs1500/board_setup.c    | 11 ++--
 arch/mips/alchemy/xxs1500/init.c           |  2 +-
 arch/mips/boot/compressed/uart-alchemy.c   |  2 +-
 arch/mips/include/asm/mach-au1x00/au1000.h | 57 +++++++++++++-----
 9 files changed, 107 insertions(+), 78 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 9e7814db3d03..36489fb184a2 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -30,21 +30,12 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 #ifdef CONFIG_SERIAL_8250
 	switch (state) {
 	case 0:
-		if ((__raw_readl(port->membase + UART_MOD_CNTRL) & 3) != 3) {
-			/* power-on sequence as suggested in the databooks */
-			__raw_writel(0, port->membase + UART_MOD_CNTRL);
-			wmb();
-			__raw_writel(1, port->membase + UART_MOD_CNTRL);
-			wmb();
-		}
-		__raw_writel(3, port->membase + UART_MOD_CNTRL); /* full on */
-		wmb();
+		alchemy_uart_enable(CPHYSADDR(port->membase));
 		serial8250_do_pm(port, state, old_state);
 		break;
 	case 3:		/* power off */
 		serial8250_do_pm(port, state, old_state);
-		__raw_writel(0, port->membase + UART_MOD_CNTRL);
-		wmb();
+		alchemy_uart_disable(CPHYSADDR(port->membase));
 		break;
 	default:
 		serial8250_do_pm(port, state, old_state);
@@ -65,38 +56,60 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 		.pm		= alchemy_8250_pm,		\
 	}
 
-static struct plat_serial8250_port au1x00_uart_data[] = {
-#if defined(CONFIG_SOC_AU1000)
-	PORT(UART0_PHYS_ADDR, AU1000_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1000_UART1_INT),
-	PORT(UART2_PHYS_ADDR, AU1000_UART2_INT),
-	PORT(UART3_PHYS_ADDR, AU1000_UART3_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	PORT(UART0_PHYS_ADDR, AU1500_UART0_INT),
-	PORT(UART3_PHYS_ADDR, AU1500_UART3_INT),
-#elif defined(CONFIG_SOC_AU1100)
-	PORT(UART0_PHYS_ADDR, AU1100_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1100_UART1_INT),
-	PORT(UART3_PHYS_ADDR, AU1100_UART3_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	PORT(UART0_PHYS_ADDR, AU1550_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1550_UART1_INT),
-	PORT(UART3_PHYS_ADDR, AU1550_UART3_INT),
-#elif defined(CONFIG_SOC_AU1200)
-	PORT(UART0_PHYS_ADDR, AU1200_UART0_INT),
-	PORT(UART1_PHYS_ADDR, AU1200_UART1_INT),
-#endif
-	{ },
+static struct plat_serial8250_port au1x00_uart_data[][4] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1000_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1000_UART1_INT),
+		PORT(AU1000_UART2_PHYS_ADDR, AU1000_UART2_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1000_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1500_UART0_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1500_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1100] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1100_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1100_UART1_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1100_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1550_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1550_UART1_INT),
+		PORT(AU1000_UART3_PHYS_ADDR, AU1550_UART3_INT),
+	},
+	[ALCHEMY_CPU_AU1200] = {
+		PORT(AU1000_UART0_PHYS_ADDR, AU1200_UART0_INT),
+		PORT(AU1000_UART1_PHYS_ADDR, AU1200_UART1_INT),
+	},
 };
 
 static struct platform_device au1xx0_uart_device = {
 	.name			= "serial8250",
 	.id			= PLAT8250_DEV_AU1X00,
-	.dev			= {
-		.platform_data	= au1x00_uart_data,
-	},
 };
 
+static void __init alchemy_setup_uarts(int ctype)
+{
+	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
+	int s = sizeof(struct plat_serial8250_port);
+	int c = alchemy_get_uarts(ctype);
+	struct plat_serial8250_port *ports;
+
+	ports = kzalloc(s * (c + 1), GFP_KERNEL);
+	if (!ports) {
+		printk(KERN_INFO "Alchemy: no memory for UART data\n");
+		return;
+	}
+	memcpy(ports, au1x00_uart_data[ctype], s * c);
+	au1xx0_uart_device.dev.platform_data = ports;
+
+	/* Fill up uartclk. */
+	for (s = 0; s < c; s++)
+		ports[s].uartclk = uartclk;
+	if (platform_device_register(&au1xx0_uart_device))
+		printk(KERN_INFO "Alchemy: failed to register UARTs\n");
+}
+
 /* OHCI (USB full speed host controller) */
 static struct resource au1xxx_usb_ohci_resources[] = {
 	[0] = {
@@ -442,7 +455,6 @@ void __init au1xxx_override_eth_cfg(unsigned int port,
 }
 
 static struct platform_device *au1xxx_platform_devices[] __initdata = {
-	&au1xx0_uart_device,
 	&au1xxx_usb_ohci_device,
 #ifdef CONFIG_FB_AU1100
 	&au1100_lcd_device,
@@ -465,13 +477,10 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
 
 static int __init au1xxx_platform_init(void)
 {
-	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
-	int err, i;
+	int err, i, ctype = alchemy_get_cputype();
 	unsigned char ethaddr[6];
 
-	/* Fill up uartclk. */
-	for (i = 0; au1x00_uart_data[i].flags; i++)
-		au1x00_uart_data[i].uartclk = uartclk;
+	alchemy_setup_uarts(ctype);
 
 	/* use firmware-provided mac addr if available and necessary */
 	i = prom_get_ethernet_addr(ethaddr);
diff --git a/arch/mips/alchemy/devboards/prom.c b/arch/mips/alchemy/devboards/prom.c
index baeb21385058..e5306b56da6d 100644
--- a/arch/mips/alchemy/devboards/prom.c
+++ b/arch/mips/alchemy/devboards/prom.c
@@ -62,5 +62,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-    alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/gpr/board_setup.c b/arch/mips/alchemy/gpr/board_setup.c
index ad2e3f137933..5f8f0691ed2d 100644
--- a/arch/mips/alchemy/gpr/board_setup.c
+++ b/arch/mips/alchemy/gpr/board_setup.c
@@ -36,9 +36,6 @@
 
 #include <prom.h>
 
-#define UART1_ADDR	KSEG1ADDR(UART1_PHYS_ADDR)
-#define UART3_ADDR	KSEG1ADDR(UART3_PHYS_ADDR)
-
 char irq_tab_alchemy[][5] __initdata = {
 	[0] = { -1, AU1500_PCI_INTA, AU1500_PCI_INTB, 0xff, 0xff },
 };
@@ -67,18 +64,15 @@ static void gpr_power_off(void)
 
 void __init board_setup(void)
 {
-	printk(KERN_INFO "Tarpeze ITS GPR board\n");
+	printk(KERN_INFO "Trapeze ITS GPR board\n");
 
 	pm_power_off = gpr_power_off;
 	_machine_halt = gpr_power_off;
 	_machine_restart = gpr_reset;
 
-	/* Enable UART3 */
-	au_writel(0x1, UART3_ADDR + UART_MOD_CNTRL);/* clock enable (CE) */
-	au_writel(0x3, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
-	/* Enable UART1 */
-	au_writel(0x1, UART1_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */
-	au_writel(0x3, UART1_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
+	/* Enable UART1/3 */
+	alchemy_uart_enable(AU1000_UART3_PHYS_ADDR);
+	alchemy_uart_enable(AU1000_UART1_PHYS_ADDR);
 
 	/* Take away Reset of UMTS-card */
 	alchemy_gpio_direction_output(215, 1);
diff --git a/arch/mips/alchemy/gpr/init.c b/arch/mips/alchemy/gpr/init.c
index f044f4c541d7..229aafae680c 100644
--- a/arch/mips/alchemy/gpr/init.c
+++ b/arch/mips/alchemy/gpr/init.c
@@ -59,5 +59,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/mtx-1/init.c b/arch/mips/alchemy/mtx-1/init.c
index f8d25575fa05..2e81cc7f3422 100644
--- a/arch/mips/alchemy/mtx-1/init.c
+++ b/arch/mips/alchemy/mtx-1/init.c
@@ -62,5 +62,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index febfb0fb0896..81e57fad07ab 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -66,13 +66,10 @@ void __init board_setup(void)
 	au_writel(pin_func, SYS_PINFUNC);
 
 	/* Enable UART */
-	au_writel(0x01, UART3_ADDR + UART_MOD_CNTRL); /* clock enable (CE) */
-	mdelay(10);
-	au_writel(0x03, UART3_ADDR + UART_MOD_CNTRL); /* CE and "enable" */
-	mdelay(10);
-
-	/* Enable DTR = USB power up */
-	au_writel(0x01, UART3_ADDR + UART_MCR); /* UART_MCR_DTR is 0x01??? */
+	alchemy_uart_enable(AU1000_UART3_PHYS_ADDR);
+	/* Enable DTR (MCR bit 0) = USB power up */
+	__raw_writel(1, (void __iomem *)KSEG1ADDR(AU1000_UART3_PHYS_ADDR + 0x18));
+	wmb();
 
 #ifdef CONFIG_PCI
 #if defined(__MIPSEB__)
diff --git a/arch/mips/alchemy/xxs1500/init.c b/arch/mips/alchemy/xxs1500/init.c
index 34a90a4bb6f4..0ee02cfa989d 100644
--- a/arch/mips/alchemy/xxs1500/init.c
+++ b/arch/mips/alchemy/xxs1500/init.c
@@ -59,5 +59,5 @@ void __init prom_init(void)
 
 void prom_putchar(unsigned char c)
 {
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/boot/compressed/uart-alchemy.c b/arch/mips/boot/compressed/uart-alchemy.c
index 1bff22fa089b..eb063e6dead9 100644
--- a/arch/mips/boot/compressed/uart-alchemy.c
+++ b/arch/mips/boot/compressed/uart-alchemy.c
@@ -3,5 +3,5 @@
 void putc(char c)
 {
 	/* all current (Jan. 2010) in-kernel boards */
-	alchemy_uart_putchar(UART0_PHYS_ADDR, c);
+	alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
 }
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index eb8f1034e1ef..c4ffb209edf2 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -161,6 +161,45 @@ static inline int alchemy_get_cputype(void)
 	return ALCHEMY_CPU_UNKNOWN;
 }
 
+/* return number of uarts on a given cputype */
+static inline int alchemy_get_uarts(int type)
+{
+	switch (type) {
+	case ALCHEMY_CPU_AU1000:
+		return 4;
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1200:
+		return 2;
+	case ALCHEMY_CPU_AU1100:
+	case ALCHEMY_CPU_AU1550:
+		return 3;
+	}
+	return 0;
+}
+
+/* enable an UART block if it isn't already */
+static inline void alchemy_uart_enable(u32 uart_phys)
+{
+	void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys);
+
+	/* reset, enable clock, deassert reset */
+	if ((__raw_readl(addr + 0x100) & 3) != 3) {
+		__raw_writel(0, addr + 0x100);
+		wmb();
+		__raw_writel(1, addr + 0x100);
+		wmb();
+	}
+	__raw_writel(3, addr + 0x100);
+	wmb();
+}
+
+static inline void alchemy_uart_disable(u32 uart_phys)
+{
+	void __iomem *addr = (void __iomem *)KSEG1ADDR(uart_phys);
+	__raw_writel(0, addr + 0x100);	/* UART_MOD_CNTRL */
+	wmb();
+}
+
 static inline void alchemy_uart_putchar(u32 uart_phys, u8 c)
 {
 	void __iomem *base = (void __iomem *)KSEG1ADDR(uart_phys);
@@ -634,6 +673,10 @@ enum soc_au1200_ints {
  */
 
 #define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
+#define AU1000_UART0_PHYS_ADDR		0x11100000 /* 01234 */
+#define AU1000_UART1_PHYS_ADDR		0x11200000 /* 0234 */
+#define AU1000_UART2_PHYS_ADDR		0x11300000 /* 0 */
+#define AU1000_UART3_PHYS_ADDR		0x11400000 /* 0123 */
 #define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
 #define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
 #define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
@@ -660,10 +703,6 @@ enum soc_au1200_ints {
 #define	MACDMA0_PHYS_ADDR	0x14004000
 #define	MACDMA1_PHYS_ADDR	0x14004200
 #define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART2_PHYS_ADDR		0x11300000
-#define	UART3_PHYS_ADDR		0x11400000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
 #define	SYS_PHYS_ADDR		0x11900000
@@ -695,8 +734,6 @@ enum soc_au1200_ints {
 #define	MACDMA0_PHYS_ADDR	0x14004000
 #define	MACDMA1_PHYS_ADDR	0x14004200
 #define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART3_PHYS_ADDR		0x11400000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
 #define PCI_MEM_PHYS_ADDR	0x400000000ULL
@@ -732,9 +769,6 @@ enum soc_au1200_ints {
 #define	MACDMA0_PHYS_ADDR	0x14004000
 #define	MACDMA1_PHYS_ADDR	0x14004200
 #define	I2S_PHYS_ADDR		0x11000000
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART3_PHYS_ADDR		0x11400000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
 #define GPIO2_PHYS_ADDR		0x11700000
@@ -758,9 +792,6 @@ enum soc_au1200_ints {
 #define	MACEN_PHYS_ADDR		0x10520000
 #define	MACDMA0_PHYS_ADDR	0x14004000
 #define	MACDMA1_PHYS_ADDR	0x14004200
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
-#define	UART3_PHYS_ADDR		0x11400000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
 #define PE_PHYS_ADDR		0x14008000
@@ -786,8 +817,6 @@ enum soc_au1200_ints {
 #define CIM_PHYS_ADDR		0x14004000
 #define USBM_PHYS_ADDR		0x14020000
 #define	USBH_PHYS_ADDR		0x14020100
-#define	UART0_PHYS_ADDR		0x11100000
-#define	UART1_PHYS_ADDR		0x11200000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
 #define PSC0_PHYS_ADDR	 	0x11A00000
-- 
cgit v1.2.3


From 40d8bc281711d188f35f035f28d94b111b735484 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:18 +0200
Subject: MIPS: Alchemy: Rewrite ethernet platform setup

Rewrite ethernet setup to use runtime cpu detection, and also clean up
the ethernet base address mess as far as possible.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2353/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org
---
 arch/mips/alchemy/common/platform.c        | 151 +++++++++++++++++++----------
 arch/mips/include/asm/mach-au1x00/au1000.h |  63 +++++-------
 2 files changed, 123 insertions(+), 91 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 36489fb184a2..541fff24abe6 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -13,9 +13,10 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
+#include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
-#include <linux/init.h>
+#include <linux/slab.h>
 
 #include <asm/mach-au1x00/au1xxx.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
@@ -372,15 +373,16 @@ static struct platform_device pbdb_smbus_device = {
 #endif
 
 /* Macro to help defining the Ethernet MAC resources */
+#define MAC_RES_COUNT	3	/* MAC regs base, MAC enable reg, MAC INT */
 #define MAC_RES(_base, _enable, _irq)			\
 	{						\
-		.start	= CPHYSADDR(_base),		\
-		.end	= CPHYSADDR(_base + 0xffff),	\
+		.start	= _base,			\
+		.end	= _base + 0xffff,		\
 		.flags	= IORESOURCE_MEM,		\
 	},						\
 	{						\
-		.start	= CPHYSADDR(_enable),		\
-		.end	= CPHYSADDR(_enable + 0x3),	\
+		.start	= _enable,			\
+		.end	= _enable + 0x3,		\
 		.flags	= IORESOURCE_MEM,		\
 	},						\
 	{						\
@@ -389,19 +391,29 @@ static struct platform_device pbdb_smbus_device = {
 		.flags	= IORESOURCE_IRQ		\
 	}
 
-static struct resource au1xxx_eth0_resources[] = {
-#if defined(CONFIG_SOC_AU1000)
-	MAC_RES(AU1000_ETH0_BASE, AU1000_MAC0_ENABLE, AU1000_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1100)
-	MAC_RES(AU1100_ETH0_BASE, AU1100_MAC0_ENABLE, AU1100_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	MAC_RES(AU1550_ETH0_BASE, AU1550_MAC0_ENABLE, AU1550_MAC0_DMA_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	MAC_RES(AU1500_ETH0_BASE, AU1500_MAC0_ENABLE, AU1500_MAC0_DMA_INT),
-#endif
+static struct resource au1xxx_eth0_resources[][MAC_RES_COUNT] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1000_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		MAC_RES(AU1500_MAC0_PHYS_ADDR,
+			AU1500_MACEN_PHYS_ADDR,
+			AU1500_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1100] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1100_MAC0_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		MAC_RES(AU1000_MAC0_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR,
+			AU1550_MAC0_DMA_INT)
+	},
 };
 
-
 static struct au1000_eth_platform_data au1xxx_eth0_platform_data = {
 	.phy1_search_mac0 = 1,
 };
@@ -409,20 +421,26 @@ static struct au1000_eth_platform_data au1xxx_eth0_platform_data = {
 static struct platform_device au1xxx_eth0_device = {
 	.name		= "au1000-eth",
 	.id		= 0,
-	.num_resources	= ARRAY_SIZE(au1xxx_eth0_resources),
-	.resource	= au1xxx_eth0_resources,
+	.num_resources	= MAC_RES_COUNT,
 	.dev.platform_data = &au1xxx_eth0_platform_data,
 };
 
-#ifndef CONFIG_SOC_AU1100
-static struct resource au1xxx_eth1_resources[] = {
-#if defined(CONFIG_SOC_AU1000)
-	MAC_RES(AU1000_ETH1_BASE, AU1000_MAC1_ENABLE, AU1000_MAC1_DMA_INT),
-#elif defined(CONFIG_SOC_AU1550)
-	MAC_RES(AU1550_ETH1_BASE, AU1550_MAC1_ENABLE, AU1550_MAC1_DMA_INT),
-#elif defined(CONFIG_SOC_AU1500)
-	MAC_RES(AU1500_ETH1_BASE, AU1500_MAC1_ENABLE, AU1500_MAC1_DMA_INT),
-#endif
+static struct resource au1xxx_eth1_resources[][MAC_RES_COUNT] __initdata = {
+	[ALCHEMY_CPU_AU1000] = {
+		MAC_RES(AU1000_MAC1_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR + 4,
+			AU1000_MAC1_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1500] = {
+		MAC_RES(AU1500_MAC1_PHYS_ADDR,
+			AU1500_MACEN_PHYS_ADDR + 4,
+			AU1500_MAC1_DMA_INT)
+	},
+	[ALCHEMY_CPU_AU1550] = {
+		MAC_RES(AU1000_MAC1_PHYS_ADDR,
+			AU1000_MACEN_PHYS_ADDR + 4,
+			AU1550_MAC1_DMA_INT)
+	},
 };
 
 static struct au1000_eth_platform_data au1xxx_eth1_platform_data = {
@@ -432,11 +450,9 @@ static struct au1000_eth_platform_data au1xxx_eth1_platform_data = {
 static struct platform_device au1xxx_eth1_device = {
 	.name		= "au1000-eth",
 	.id		= 1,
-	.num_resources	= ARRAY_SIZE(au1xxx_eth1_resources),
-	.resource	= au1xxx_eth1_resources,
+	.num_resources	= MAC_RES_COUNT,
 	.dev.platform_data = &au1xxx_eth1_platform_data,
 };
-#endif
 
 void __init au1xxx_override_eth_cfg(unsigned int port,
 			struct au1000_eth_platform_data *eth_data)
@@ -447,11 +463,62 @@ void __init au1xxx_override_eth_cfg(unsigned int port,
 	if (port == 0)
 		memcpy(&au1xxx_eth0_platform_data, eth_data,
 			sizeof(struct au1000_eth_platform_data));
-#ifndef CONFIG_SOC_AU1100
 	else
 		memcpy(&au1xxx_eth1_platform_data, eth_data,
 			sizeof(struct au1000_eth_platform_data));
-#endif
+}
+
+static void __init alchemy_setup_macs(int ctype)
+{
+	int ret, i;
+	unsigned char ethaddr[6];
+	struct resource *macres;
+
+	/* Handle 1st MAC */
+	if (alchemy_get_macs(ctype) < 1)
+		return;
+
+	macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL);
+	if (!macres) {
+		printk(KERN_INFO "Alchemy: no memory for MAC0 resources\n");
+		return;
+	}
+	memcpy(macres, au1xxx_eth0_resources[ctype],
+	       sizeof(struct resource) * MAC_RES_COUNT);
+	au1xxx_eth0_device.resource = macres;
+
+	i = prom_get_ethernet_addr(ethaddr);
+	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
+		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+
+	ret = platform_device_register(&au1xxx_eth0_device);
+	if (!ret)
+		printk(KERN_INFO "Alchemy: failed to register MAC0\n");
+
+
+	/* Handle 2nd MAC */
+	if (alchemy_get_macs(ctype) < 2)
+		return;
+
+	macres = kmalloc(sizeof(struct resource) * MAC_RES_COUNT, GFP_KERNEL);
+	if (!macres) {
+		printk(KERN_INFO "Alchemy: no memory for MAC1 resources\n");
+		return;
+	}
+	memcpy(macres, au1xxx_eth1_resources[ctype],
+	       sizeof(struct resource) * MAC_RES_COUNT);
+	au1xxx_eth1_device.resource = macres;
+
+	ethaddr[5] += 1;	/* next addr for 2nd MAC */
+	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
+		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
+
+	/* Register second MAC if enabled in pinfunc */
+	if (!(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2)) {
+		ret = platform_device_register(&au1xxx_eth1_device);
+		if (ret)
+			printk(KERN_INFO "Alchemy: failed to register MAC1\n");
+	}
 }
 
 static struct platform_device *au1xxx_platform_devices[] __initdata = {
@@ -472,33 +539,17 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
 #ifdef SMBUS_PSC_BASE
 	&pbdb_smbus_device,
 #endif
-	&au1xxx_eth0_device,
 };
 
 static int __init au1xxx_platform_init(void)
 {
-	int err, i, ctype = alchemy_get_cputype();
-	unsigned char ethaddr[6];
+	int err, ctype = alchemy_get_cputype();
 
 	alchemy_setup_uarts(ctype);
-
-	/* use firmware-provided mac addr if available and necessary */
-	i = prom_get_ethernet_addr(ethaddr);
-	if (!i && !is_valid_ether_addr(au1xxx_eth0_platform_data.mac))
-		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
+	alchemy_setup_macs(ctype);
 
 	err = platform_add_devices(au1xxx_platform_devices,
 				   ARRAY_SIZE(au1xxx_platform_devices));
-#ifndef CONFIG_SOC_AU1100
-	ethaddr[5] += 1;	/* next addr for 2nd MAC */
-	if (!i && !is_valid_ether_addr(au1xxx_eth1_platform_data.mac))
-		memcpy(au1xxx_eth1_platform_data.mac, ethaddr, 6);
-
-	/* Register second MAC if enabled in pinfunc */
-	if (!err && !(au_readl(SYS_PINFUNC) & (u32)SYS_PF_NI2))
-		err = platform_device_register(&au1xxx_eth1_device);
-#endif
-
 	return err;
 }
 
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index c4ffb209edf2..415d287c272b 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -219,6 +219,20 @@ static inline void alchemy_uart_putchar(u32 uart_phys, u8 c)
 	wmb();
 }
 
+/* return number of ethernet MACs on a given cputype */
+static inline int alchemy_get_macs(int type)
+{
+	switch (type) {
+	case ALCHEMY_CPU_AU1000:
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1550:
+		return 2;
+	case ALCHEMY_CPU_AU1100:
+		return 1;
+	}
+	return 0;
+}
+
 /* arch/mips/au1000/common/clocks.c */
 extern void set_au1x00_speed(unsigned int new_freq);
 extern unsigned int get_au1x00_speed(void);
@@ -673,6 +687,12 @@ enum soc_au1200_ints {
  */
 
 #define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
+#define AU1000_MAC0_PHYS_ADDR		0x10500000 /* 023 */
+#define AU1000_MAC1_PHYS_ADDR		0x10510000 /* 023 */
+#define AU1000_MACEN_PHYS_ADDR		0x10520000 /* 023 */
+#define AU1500_MAC0_PHYS_ADDR		0x11500000 /* 1 */
+#define AU1500_MAC1_PHYS_ADDR		0x11510000 /* 1 */
+#define AU1500_MACEN_PHYS_ADDR		0x11520000 /* 1 */
 #define AU1000_UART0_PHYS_ADDR		0x11100000 /* 01234 */
 #define AU1000_UART1_PHYS_ADDR		0x11200000 /* 0234 */
 #define AU1000_UART2_PHYS_ADDR		0x11300000 /* 0 */
@@ -680,6 +700,8 @@ enum soc_au1200_ints {
 #define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
 #define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
 #define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
+#define AU1000_MACDMA0_PHYS_ADDR	0x14004000 /* 0123 */
+#define AU1000_MACDMA1_PHYS_ADDR	0x14004200 /* 0123 */
 
 
 #ifdef CONFIG_SOC_AU1000
@@ -697,11 +719,6 @@ enum soc_au1200_ints {
 #define	USBH_PHYS_ADDR		0x10100000
 #define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MAC1_PHYS_ADDR		0x10510000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
 #define	I2S_PHYS_ADDR		0x11000000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
@@ -728,11 +745,6 @@ enum soc_au1200_ints {
 #define	USBH_PHYS_ADDR		0x10100000
 #define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	MAC0_PHYS_ADDR		0x11500000
-#define	MAC1_PHYS_ADDR		0x11510000
-#define	MACEN_PHYS_ADDR		0x11520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
 #define	I2S_PHYS_ADDR		0x11000000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
@@ -764,10 +776,6 @@ enum soc_au1200_ints {
 #define	USBH_PHYS_ADDR		0x10100000
 #define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
 #define	I2S_PHYS_ADDR		0x11000000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
@@ -787,11 +795,6 @@ enum soc_au1200_ints {
 #define	USBH_PHYS_ADDR		0x14020000
 #define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	MAC0_PHYS_ADDR		0x10500000
-#define	MAC1_PHYS_ADDR		0x10510000
-#define	MACEN_PHYS_ADDR		0x10520000
-#define	MACDMA0_PHYS_ADDR	0x14004000
-#define	MACDMA1_PHYS_ADDR	0x14004200
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
 #define PE_PHYS_ADDR		0x14008000
@@ -870,12 +873,6 @@ enum soc_au1200_ints {
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1000_USB_HOST_INT
-
-#define AU1000_ETH0_BASE	0xB0500000
-#define AU1000_ETH1_BASE	0xB0510000
-#define AU1000_MAC0_ENABLE	0xB0520000
-#define AU1000_MAC1_ENABLE	0xB0520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1000 */
 
 /* Au1500 */
@@ -887,12 +884,6 @@ enum soc_au1200_ints {
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017fffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1500_USB_HOST_INT
-
-#define AU1500_ETH0_BASE	0xB1500000
-#define AU1500_ETH1_BASE	0xB1510000
-#define AU1500_MAC0_ENABLE	0xB1520000
-#define AU1500_MAC1_ENABLE	0xB1520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1500 */
 
 /* Au1100 */
@@ -904,10 +895,6 @@ enum soc_au1200_ints {
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1100_USB_HOST_INT
-
-#define AU1100_ETH0_BASE	0xB0500000
-#define AU1100_MAC0_ENABLE	0xB0520000
-#define NUM_ETH_INTERFACES 1
 #endif /* CONFIG_SOC_AU1100 */
 
 #ifdef CONFIG_SOC_AU1550
@@ -917,12 +904,6 @@ enum soc_au1200_ints {
 #define USB_OHCI_LEN		0x00060000
 #define USB_HOST_CONFIG 	0xB4027ffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1550_USB_HOST_INT
-
-#define AU1550_ETH0_BASE	0xB0500000
-#define AU1550_ETH1_BASE	0xB0510000
-#define AU1550_MAC0_ENABLE	0xB0520000
-#define AU1550_MAC1_ENABLE	0xB0520004
-#define NUM_ETH_INTERFACES 2
 #endif /* CONFIG_SOC_AU1550 */
 
 
-- 
cgit v1.2.3


From 5d4ddcb4279672e69136e746d6de6f01b501b853 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:19 +0200
Subject: MIPS: Alchemy: Cleanup DMA addresses

According to the databooks, the Au1000 DMA engine must be programmed with
the physical FIFO addresses.  This patch does that; furthermore this
opened the possibility to get rid of a lot of now unnecessary address
defines.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: Florian Fainelli <florian@openwrt.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Patchwork: https://patchwork.linux-mips.org/patch/2348/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org
---
 arch/mips/alchemy/common/dma.c                 | 46 ++++++++++---------
 arch/mips/alchemy/common/platform.c            |  8 ++--
 arch/mips/include/asm/mach-au1x00/au1000.h     | 62 +++-----------------------
 arch/mips/include/asm/mach-au1x00/au1000_dma.h |  4 --
 4 files changed, 35 insertions(+), 85 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/dma.c b/arch/mips/alchemy/common/dma.c
index d5278877891d..347980e79a89 100644
--- a/arch/mips/alchemy/common/dma.c
+++ b/arch/mips/alchemy/common/dma.c
@@ -58,6 +58,9 @@
  * returned from request_dma.
  */
 
+/* DMA Channel register block spacing */
+#define DMA_CHANNEL_LEN		0x00000100
+
 DEFINE_SPINLOCK(au1000_dma_spin_lock);
 
 struct dma_chan au1000_dma_table[NUM_AU1000_DMA_CHANNELS] = {
@@ -77,22 +80,23 @@ static const struct dma_dev {
 	unsigned int fifo_addr;
 	unsigned int dma_mode;
 } dma_dev_table[DMA_NUM_DEV] = {
-	{UART0_ADDR + UART_TX, 0},
-	{UART0_ADDR + UART_RX, 0},
-	{0, 0},
-	{0, 0},
-	{AC97C_DATA, DMA_DW16 },          /* coherent */
-	{AC97C_DATA, DMA_DR | DMA_DW16 }, /* coherent */
-	{UART3_ADDR + UART_TX, DMA_DW8 | DMA_NC},
-	{UART3_ADDR + UART_RX, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP0RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP0WR, DMA_DW8 | DMA_NC},
-	{USBD_EP2WR, DMA_DW8 | DMA_NC},
-	{USBD_EP3WR, DMA_DW8 | DMA_NC},
-	{USBD_EP4RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{USBD_EP5RD, DMA_DR | DMA_DW8 | DMA_NC},
-	{I2S_DATA, DMA_DW32 | DMA_NC},
-	{I2S_DATA, DMA_DR | DMA_DW32 | DMA_NC}
+	{ AU1000_UART0_PHYS_ADDR + 0x04, DMA_DW8 },		/* UART0_TX */
+	{ AU1000_UART0_PHYS_ADDR + 0x00, DMA_DW8 | DMA_DR },	/* UART0_RX */
+	{ 0, 0 },	/* DMA_REQ0 */
+	{ 0, 0 },	/* DMA_REQ1 */
+	{ AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 },		/* AC97 TX c */
+	{ AU1000_AC97_PHYS_ADDR + 0x08, DMA_DW16 | DMA_DR },	/* AC97 RX c */
+	{ AU1000_UART3_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC },	/* UART3_TX */
+	{ AU1000_UART3_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* UART3_RX */
+	{ AU1000_USBD_PHYS_ADDR + 0x00, DMA_DW8 | DMA_NC | DMA_DR }, /* EP0RD */
+	{ AU1000_USBD_PHYS_ADDR + 0x04, DMA_DW8 | DMA_NC }, /* EP0WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x08, DMA_DW8 | DMA_NC }, /* EP2WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x0c, DMA_DW8 | DMA_NC }, /* EP3WR */
+	{ AU1000_USBD_PHYS_ADDR + 0x10, DMA_DW8 | DMA_NC | DMA_DR }, /* EP4RD */
+	{ AU1000_USBD_PHYS_ADDR + 0x14, DMA_DW8 | DMA_NC | DMA_DR }, /* EP5RD */
+	/* on Au1500, these 2 are DMA_REQ2/3 (GPIO208/209) instead! */
+	{ AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC},	/* I2S TX */
+	{ AU1000_I2S_PHYS_ADDR + 0x00, DMA_DW32 | DMA_NC | DMA_DR}, /* I2S RX */
 };
 
 int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
@@ -123,10 +127,10 @@ int au1000_dma_read_proc(char *buf, char **start, off_t fpos,
 
 /* Device FIFO addresses and default DMA modes - 2nd bank */
 static const struct dma_dev dma_dev_table_bank2[DMA_NUM_DEV_BANK2] = {
-	{ SD0_XMIT_FIFO, DMA_DS | DMA_DW8 },		/* coherent */
-	{ SD0_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 },	/* coherent */
-	{ SD1_XMIT_FIFO, DMA_DS | DMA_DW8 },		/* coherent */
-	{ SD1_RECV_FIFO, DMA_DS | DMA_DR | DMA_DW8 }	/* coherent */
+	{ AU1100_SD0_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 },		/* coherent */
+	{ AU1100_SD0_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR },	/* coherent */
+	{ AU1100_SD1_PHYS_ADDR + 0x00, DMA_DS | DMA_DW8 },		/* coherent */
+	{ AU1100_SD1_PHYS_ADDR + 0x04, DMA_DS | DMA_DW8 | DMA_DR }	/* coherent */
 };
 
 void dump_au1000_dma_channel(unsigned int dmanr)
@@ -202,7 +206,7 @@ int request_au1000_dma(int dev_id, const char *dev_str,
 	}
 
 	/* fill it in */
-	chan->io = DMA_CHANNEL_BASE + i * DMA_CHANNEL_LEN;
+	chan->io = KSEG1ADDR(AU1000_DMA_PHYS_ADDR) + i * DMA_CHANNEL_LEN;
 	chan->dev_id = dev_id;
 	chan->dev_str = dev_str;
 	chan->fifo_addr = dev->fifo_addr;
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 541fff24abe6..3b2c18b14341 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -283,8 +283,8 @@ extern struct au1xmmc_platform_data au1xmmc_platdata[2];
 
 static struct resource au1200_mmc0_resources[] = {
 	[0] = {
-		.start          = SD0_PHYS_ADDR,
-		.end            = SD0_PHYS_ADDR + 0x7ffff,
+		.start          = AU1100_SD0_PHYS_ADDR,
+		.end            = AU1100_SD0_PHYS_ADDR + 0xfff,
 		.flags          = IORESOURCE_MEM,
 	},
 	[1] = {
@@ -319,8 +319,8 @@ static struct platform_device au1200_mmc0_device = {
 #ifndef CONFIG_MIPS_DB1200
 static struct resource au1200_mmc1_resources[] = {
 	[0] = {
-		.start          = SD1_PHYS_ADDR,
-		.end            = SD1_PHYS_ADDR + 0x7ffff,
+		.start          = AU1100_SD1_PHYS_ADDR,
+		.end            = AU1100_SD1_PHYS_ADDR + 0xfff,
 		.flags          = IORESOURCE_MEM,
 	},
 	[1] = {
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index 415d287c272b..2dfff4f26512 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -686,10 +686,15 @@ enum soc_au1200_ints {
  * 0..au1000 1..au1500 2..au1100 3..au1550 4..au1200
  */
 
+#define AU1000_AC97_PHYS_ADDR		0x10000000 /* 012 */
+#define AU1000_USBD_PHYS_ADDR		0x10200000 /* 0123 */
 #define AU1000_IC0_PHYS_ADDR		0x10400000 /* 01234 */
 #define AU1000_MAC0_PHYS_ADDR		0x10500000 /* 023 */
 #define AU1000_MAC1_PHYS_ADDR		0x10510000 /* 023 */
 #define AU1000_MACEN_PHYS_ADDR		0x10520000 /* 023 */
+#define AU1100_SD0_PHYS_ADDR		0x10600000 /* 24 */
+#define AU1100_SD1_PHYS_ADDR		0x10680000 /* 24 */
+#define AU1000_I2S_PHYS_ADDR		0x11000000 /* 02 */
 #define AU1500_MAC0_PHYS_ADDR		0x11500000 /* 1 */
 #define AU1500_MAC1_PHYS_ADDR		0x11510000 /* 1 */
 #define AU1500_MACEN_PHYS_ADDR		0x11520000 /* 1 */
@@ -698,6 +703,7 @@ enum soc_au1200_ints {
 #define AU1000_UART2_PHYS_ADDR		0x11300000 /* 0 */
 #define AU1000_UART3_PHYS_ADDR		0x11400000 /* 0123 */
 #define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
+#define AU1000_DMA_PHYS_ADDR		0x14002000 /* 012 */
 #define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
 #define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
 #define AU1000_MACDMA0_PHYS_ADDR	0x14004000 /* 0123 */
@@ -707,19 +713,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1000
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	I2S_PHYS_ADDR		0x11000000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
 #define	SYS_PHYS_ADDR		0x11900000
@@ -733,19 +728,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1500
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
-#define	I2S_PHYS_ADDR		0x11000000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
 #define PCI_MEM_PHYS_ADDR	0x400000000ULL
@@ -762,21 +746,8 @@ enum soc_au1200_ints {
 #ifdef CONFIG_SOC_AU1100
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
-#define	DMA0_PHYS_ADDR		0x14002000
-#define	DMA1_PHYS_ADDR		0x14002100
-#define	DMA2_PHYS_ADDR		0x14002200
-#define	DMA3_PHYS_ADDR		0x14002300
-#define	DMA4_PHYS_ADDR		0x14002400
-#define	DMA5_PHYS_ADDR		0x14002500
-#define	DMA6_PHYS_ADDR		0x14002600
-#define	DMA7_PHYS_ADDR		0x14002700
-#define SD0_PHYS_ADDR		0x10600000
-#define SD1_PHYS_ADDR		0x10680000
-#define	AC97_PHYS_ADDR		0x10000000
 #define	USBH_PHYS_ADDR		0x10100000
-#define	USBD_PHYS_ADDR		0x10200000
 #define	IRDA_PHYS_ADDR		0x10300000
-#define	I2S_PHYS_ADDR		0x11000000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
 #define GPIO2_PHYS_ADDR		0x11700000
@@ -793,7 +764,6 @@ enum soc_au1200_ints {
 #define	MEM_PHYS_ADDR		0x14000000
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
 #define	USBH_PHYS_ADDR		0x14020000
-#define	USBD_PHYS_ADDR		0x10200000
 #define PCI_PHYS_ADDR		0x14005000
 #define GPIO2_PHYS_ADDR		0x11700000
 #define	SYS_PHYS_ADDR		0x11900000
@@ -824,8 +794,6 @@ enum soc_au1200_ints {
 #define	SYS_PHYS_ADDR		0x11900000
 #define PSC0_PHYS_ADDR	 	0x11A00000
 #define PSC1_PHYS_ADDR	 	0x11B00000
-#define SD0_PHYS_ADDR		0x10600000
-#define SD1_PHYS_ADDR		0x10680000
 #define LCD_PHYS_ADDR		0x15000000
 #define SWCNT_PHYS_ADDR		0x1110010C
 #define MAEFE_PHYS_ADDR		0x14012000
@@ -867,9 +835,6 @@ enum soc_au1200_ints {
 /* Au1000 */
 #ifdef CONFIG_SOC_AU1000
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1000_USB_HOST_INT
@@ -878,9 +843,6 @@ enum soc_au1200_ints {
 /* Au1500 */
 #ifdef CONFIG_SOC_AU1500
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017fffc
 #define FOR_PLATFORM_C_USB_HOST_INT AU1500_USB_HOST_INT
@@ -889,16 +851,12 @@ enum soc_au1200_ints {
 /* Au1100 */
 #ifdef CONFIG_SOC_AU1100
 
-#define UART0_ADDR		0xB1100000
-#define UART3_ADDR		0xB1400000
-
 #define USB_OHCI_BASE		0x10100000	/* phys addr for ioremap */
 #define USB_HOST_CONFIG 	0xB017FFFC
 #define FOR_PLATFORM_C_USB_HOST_INT AU1100_USB_HOST_INT
 #endif /* CONFIG_SOC_AU1100 */
 
 #ifdef CONFIG_SOC_AU1550
-#define UART0_ADDR		0xB1100000
 
 #define USB_OHCI_BASE		0x14020000	/* phys addr for ioremap */
 #define USB_OHCI_LEN		0x00060000
@@ -909,8 +867,6 @@ enum soc_au1200_ints {
 
 #ifdef CONFIG_SOC_AU1200
 
-#define UART0_ADDR		0xB1100000
-
 #define USB_UOC_BASE		0x14020020
 #define USB_UOC_LEN		0x20
 #define USB_OHCI_BASE		0x14020100
@@ -1534,12 +1490,6 @@ enum soc_au1200_ints {
 #  define AC97C_RS		(1 << 1)
 #  define AC97C_CE		(1 << 0)
 
-/* Secure Digital (SD) Controller */
-#define SD0_XMIT_FIFO	0xB0600000
-#define SD0_RECV_FIFO	0xB0600004
-#define SD1_XMIT_FIFO	0xB0680000
-#define SD1_RECV_FIFO	0xB0680004
-
 #if defined(CONFIG_SOC_AU1500) || defined(CONFIG_SOC_AU1550)
 /* Au1500 PCI Controller */
 #define Au1500_CFG_BASE 	0xB4005000	/* virtual, KSEG1 addr */
diff --git a/arch/mips/include/asm/mach-au1x00/au1000_dma.h b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
index c333b4e1cd44..59f5b55b2200 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000_dma.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000_dma.h
@@ -37,10 +37,6 @@
 
 #define NUM_AU1000_DMA_CHANNELS	8
 
-/* DMA Channel Base Addresses */
-#define DMA_CHANNEL_BASE	0xB4002000
-#define DMA_CHANNEL_LEN		0x00000100
-
 /* DMA Channel Register Offsets */
 #define DMA_MODE_SET		0x00000000
 #define DMA_MODE_READ		DMA_MODE_SET
-- 
cgit v1.2.3


From b7f720d68c0042cc8ce496e31a61df79a77f1b48 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@googlemail.com>
Date: Sun, 8 May 2011 10:42:20 +0200
Subject: MIPS: Alchemy: Clean up GPIO registers and accessors

remove au_readl/au_writel, remove the predefined GPIO1/2 KSEG1 register
addresses and fix the fallout in all boards and drivers.

This also fixes a bug in the mtx-1_wdt driver which was introduced by
commit 6ea8115bb6f359df4f45152f2b40e1d4d1891392
("Convert mtx1 wdt to be a platform device and use generic GPIO API")
before this patch mtx-1_wdt only modified GPIO215, the patch then
used the gpio resource information as bit index into the GPIO2 register
but the conversion to the GPIO API didn't realize that.
With this patch the drivers original behaviour is restored and GPIO15
is left alone.

Signed-off-by: Manuel Lauss <manuel.lauss@googlemail.com>
Cc: Florian Fainelli <florian@openwrt.org>
To: Linux-MIPS <linux-mips@linux-mips.org>
Cc: linux-watchdog@vger.kernel.org
Cc: Wim Van Sebroeck <wim@iguana.be>
Patchwork: https://patchwork.linux-mips.org/patch/2381/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org
---
 arch/mips/alchemy/devboards/pb1000/board_setup.c |  2 +-
 arch/mips/alchemy/devboards/pb1500/board_setup.c |  2 +-
 arch/mips/alchemy/mtx-1/board_setup.c            |  2 +-
 arch/mips/alchemy/mtx-1/platform.c               |  4 +-
 arch/mips/include/asm/mach-au1x00/au1000.h       | 27 +--------
 arch/mips/include/asm/mach-au1x00/gpio-au1000.h  | 71 ++++++++++++++++--------
 drivers/mtd/nand/au1550nd.c                      |  3 +-
 drivers/watchdog/mtx-1_wdt.c                     | 21 ++++---
 8 files changed, 70 insertions(+), 62 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/devboards/pb1000/board_setup.c b/arch/mips/alchemy/devboards/pb1000/board_setup.c
index 2d85c4b5be09..e64fdcbf75d0 100644
--- a/arch/mips/alchemy/devboards/pb1000/board_setup.c
+++ b/arch/mips/alchemy/devboards/pb1000/board_setup.c
@@ -65,7 +65,7 @@ void __init board_setup(void)
 
 	/* Set AUX clock to 12 MHz * 8 = 96 MHz */
 	au_writel(8, SYS_AUXPLL);
-	au_writel(0, SYS_PINSTATERD);
+	alchemy_gpio1_input_enable();
 	udelay(100);
 
 #if defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE)
diff --git a/arch/mips/alchemy/devboards/pb1500/board_setup.c b/arch/mips/alchemy/devboards/pb1500/board_setup.c
index 83f46215eb0c..3b4fa3206969 100644
--- a/arch/mips/alchemy/devboards/pb1500/board_setup.c
+++ b/arch/mips/alchemy/devboards/pb1500/board_setup.c
@@ -56,7 +56,7 @@ void __init board_setup(void)
 	sys_clksrc = sys_freqctrl = pin_func = 0;
 	/* Set AUX clock to 12 MHz * 8 = 96 MHz */
 	au_writel(8, SYS_AUXPLL);
-	au_writel(0, SYS_PINSTATERD);
+	alchemy_gpio1_input_enable();
 	udelay(100);
 
 	/* GPIO201 is input for PCMCIA card detect */
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index cf436ab679ae..3ae984cf98cf 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -87,7 +87,7 @@ void __init board_setup(void)
 	au_writel(SYS_PF_NI2, SYS_PINFUNC);
 
 	/* Initialize GPIO */
-	au_writel(0xFFFFFFFF, SYS_TRIOUTCLR);
+	au_writel(~0, KSEG1ADDR(AU1000_SYS_PHYS_ADDR) + SYS_TRIOUTCLR);
 	alchemy_gpio_direction_output(0, 0);	/* Disable M66EN (PCI 66MHz) */
 	alchemy_gpio_direction_output(3, 1);	/* Disable PCI CLKRUN# */
 	alchemy_gpio_direction_output(1, 1);	/* Enable EXT_IO3 */
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index 956f946218c5..55628e390fd7 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -53,8 +53,8 @@ static struct platform_device mtx1_button = {
 
 static struct resource mtx1_wdt_res[] = {
 	[0] = {
-		.start	= 15,
-		.end	= 15,
+		.start	= 215,
+		.end	= 215,
 		.name	= "mtx1-wdt-gpio",
 		.flags	= IORESOURCE_IRQ,
 	}
diff --git a/arch/mips/include/asm/mach-au1x00/au1000.h b/arch/mips/include/asm/mach-au1x00/au1000.h
index 2dfff4f26512..f260ebed713b 100644
--- a/arch/mips/include/asm/mach-au1x00/au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/au1000.h
@@ -702,7 +702,9 @@ enum soc_au1200_ints {
 #define AU1000_UART1_PHYS_ADDR		0x11200000 /* 0234 */
 #define AU1000_UART2_PHYS_ADDR		0x11300000 /* 0 */
 #define AU1000_UART3_PHYS_ADDR		0x11400000 /* 0123 */
+#define AU1500_GPIO2_PHYS_ADDR		0x11700000 /* 1234 */
 #define AU1000_IC1_PHYS_ADDR		0x11800000 /* 01234 */
+#define AU1000_SYS_PHYS_ADDR		0x11900000 /* 01234 */
 #define AU1000_DMA_PHYS_ADDR		0x14002000 /* 012 */
 #define AU1550_DBDMA_PHYS_ADDR		0x14002000 /* 34 */
 #define AU1550_DBDMA_CONF_PHYS_ADDR	0x14003000 /* 34 */
@@ -717,7 +719,6 @@ enum soc_au1200_ints {
 #define	IRDA_PHYS_ADDR		0x10300000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PCMCIA_IO_PHYS_ADDR	0xF00000000ULL
 #define PCMCIA_ATTR_PHYS_ADDR	0xF40000000ULL
 #define PCMCIA_MEM_PHYS_ADDR	0xF80000000ULL
@@ -730,8 +731,6 @@ enum soc_au1200_ints {
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
 #define	USBH_PHYS_ADDR		0x10100000
 #define PCI_PHYS_ADDR		0x14005000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PCI_MEM_PHYS_ADDR	0x400000000ULL
 #define PCI_IO_PHYS_ADDR	0x500000000ULL
 #define PCI_CONFIG0_PHYS_ADDR	0x600000000ULL
@@ -750,8 +749,6 @@ enum soc_au1200_ints {
 #define	IRDA_PHYS_ADDR		0x10300000
 #define	SSI0_PHYS_ADDR		0x11600000
 #define	SSI1_PHYS_ADDR		0x11680000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define LCD_PHYS_ADDR		0x15000000
 #define PCMCIA_IO_PHYS_ADDR	0xF00000000ULL
 #define PCMCIA_ATTR_PHYS_ADDR	0xF40000000ULL
@@ -765,8 +762,6 @@ enum soc_au1200_ints {
 #define	STATIC_MEM_PHYS_ADDR	0x14001000
 #define	USBH_PHYS_ADDR		0x14020000
 #define PCI_PHYS_ADDR		0x14005000
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PE_PHYS_ADDR		0x14008000
 #define PSC0_PHYS_ADDR		0x11A00000
 #define PSC1_PHYS_ADDR		0x11B00000
@@ -790,8 +785,6 @@ enum soc_au1200_ints {
 #define CIM_PHYS_ADDR		0x14004000
 #define USBM_PHYS_ADDR		0x14020000
 #define	USBH_PHYS_ADDR		0x14020100
-#define GPIO2_PHYS_ADDR		0x11700000
-#define	SYS_PHYS_ADDR		0x11900000
 #define PSC0_PHYS_ADDR	 	0x11A00000
 #define PSC1_PHYS_ADDR	 	0x11B00000
 #define LCD_PHYS_ADDR		0x15000000
@@ -1359,22 +1352,6 @@ enum soc_au1200_ints {
 #define SYS_PINFUNC_S1B 	(1 << 2)
 #endif
 
-#define SYS_TRIOUTRD		0xB1900100
-#define SYS_TRIOUTCLR		0xB1900100
-#define SYS_OUTPUTRD		0xB1900108
-#define SYS_OUTPUTSET		0xB1900108
-#define SYS_OUTPUTCLR		0xB190010C
-#define SYS_PINSTATERD		0xB1900110
-#define SYS_PININPUTEN		0xB1900110
-
-/* GPIO2, Au1500, Au1550 only */
-#define GPIO2_BASE		0xB1700000
-#define GPIO2_DIR		(GPIO2_BASE + 0)
-#define GPIO2_OUTPUT		(GPIO2_BASE + 8)
-#define GPIO2_PINSTATE		(GPIO2_BASE + 0xC)
-#define GPIO2_INTENABLE 	(GPIO2_BASE + 0x10)
-#define GPIO2_ENABLE		(GPIO2_BASE + 0x14)
-
 /* Power Management */
 #define SYS_SCRATCH0		0xB1900018
 #define SYS_SCRATCH1		0xB190001C
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
index 8f8c1c55593a..1f41a522906d 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
@@ -24,6 +24,22 @@
 
 #define MAKE_IRQ(intc, off)	(AU1000_INTC##intc##_INT_BASE + (off))
 
+/* GPIO1 registers within SYS_ area */
+#define SYS_TRIOUTRD		0x100
+#define SYS_TRIOUTCLR		0x100
+#define SYS_OUTPUTRD		0x108
+#define SYS_OUTPUTSET		0x108
+#define SYS_OUTPUTCLR		0x10C
+#define SYS_PINSTATERD		0x110
+#define SYS_PININPUTEN		0x110
+
+/* register offsets within GPIO2 block */
+#define GPIO2_DIR		0x00
+#define GPIO2_OUTPUT		0x08
+#define GPIO2_PINSTATE		0x0C
+#define GPIO2_INTENABLE		0x10
+#define GPIO2_ENABLE		0x14
+
 struct gpio;
 
 static inline int au1000_gpio1_to_irq(int gpio)
@@ -201,23 +217,26 @@ static inline int au1200_irq_to_gpio(int irq)
  */
 static inline void alchemy_gpio1_set_value(int gpio, int v)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
 	unsigned long r = v ? SYS_OUTPUTSET : SYS_OUTPUTCLR;
-	au_writel(mask, r);
-	au_sync();
+	__raw_writel(mask, base + r);
+	wmb();
 }
 
 static inline int alchemy_gpio1_get_value(int gpio)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
-	return au_readl(SYS_PINSTATERD) & mask;
+	return __raw_readl(base + SYS_PINSTATERD) & mask;
 }
 
 static inline int alchemy_gpio1_direction_input(int gpio)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO1_BASE);
-	au_writel(mask, SYS_TRIOUTCLR);
-	au_sync();
+	__raw_writel(mask, base + SYS_TRIOUTCLR);
+	wmb();
 	return 0;
 }
 
@@ -258,27 +277,31 @@ static inline int alchemy_gpio1_to_irq(int gpio)
  */
 static inline void __alchemy_gpio2_mod_dir(int gpio, int to_out)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
 	unsigned long mask = 1 << (gpio - ALCHEMY_GPIO2_BASE);
-	unsigned long d = au_readl(GPIO2_DIR);
+	unsigned long d = __raw_readl(base + GPIO2_DIR);
+
 	if (to_out)
 		d |= mask;
 	else
 		d &= ~mask;
-	au_writel(d, GPIO2_DIR);
-	au_sync();
+	__raw_writel(d, base + GPIO2_DIR);
+	wmb();
 }
 
 static inline void alchemy_gpio2_set_value(int gpio, int v)
 {
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
 	unsigned long mask;
 	mask = ((v) ? 0x00010001 : 0x00010000) << (gpio - ALCHEMY_GPIO2_BASE);
-	au_writel(mask, GPIO2_OUTPUT);
-	au_sync();
+	__raw_writel(mask, base + GPIO2_OUTPUT);
+	wmb();
 }
 
 static inline int alchemy_gpio2_get_value(int gpio)
 {
-	return au_readl(GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE));
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	return __raw_readl(base + GPIO2_PINSTATE) & (1 << (gpio - ALCHEMY_GPIO2_BASE));
 }
 
 static inline int alchemy_gpio2_direction_input(int gpio)
@@ -330,21 +353,23 @@ static inline int alchemy_gpio2_to_irq(int gpio)
  */
 static inline void alchemy_gpio1_input_enable(void)
 {
-	au_writel(0, SYS_PININPUTEN);	/* the write op is key */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1000_SYS_PHYS_ADDR);
+	__raw_writel(0, base + SYS_PININPUTEN);	/* the write op is key */
+	wmb();
 }
 
 /* GPIO2 shared interrupts and control */
 
 static inline void __alchemy_gpio2_mod_int(int gpio2, int en)
 {
-	unsigned long r = au_readl(GPIO2_INTENABLE);
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	unsigned long r = __raw_readl(base + GPIO2_INTENABLE);
 	if (en)
 		r |= 1 << gpio2;
 	else
 		r &= ~(1 << gpio2);
-	au_writel(r, GPIO2_INTENABLE);
-	au_sync();
+	__raw_writel(r, base + GPIO2_INTENABLE);
+	wmb();
 }
 
 /**
@@ -419,10 +444,11 @@ static inline void alchemy_gpio2_disable_int(int gpio2)
  */
 static inline void alchemy_gpio2_enable(void)
 {
-	au_writel(3, GPIO2_ENABLE);	/* reset, clock enabled */
-	au_sync();
-	au_writel(1, GPIO2_ENABLE);	/* clock enabled */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	__raw_writel(3, base + GPIO2_ENABLE);	/* reset, clock enabled */
+	wmb();
+	__raw_writel(1, base + GPIO2_ENABLE);	/* clock enabled */
+	wmb();
 }
 
 /**
@@ -432,8 +458,9 @@ static inline void alchemy_gpio2_enable(void)
  */
 static inline void alchemy_gpio2_disable(void)
 {
-	au_writel(2, GPIO2_ENABLE);	/* reset, clock disabled */
-	au_sync();
+	void __iomem *base = (void __iomem *)KSEG1ADDR(AU1500_GPIO2_PHYS_ADDR);
+	__raw_writel(2, base + GPIO2_ENABLE);	/* reset, clock disabled */
+	wmb();
 }
 
 /**********************************************************************/
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 3ffe05db4923..5d513b54a7d7 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -470,7 +471,7 @@ static int __init au1xxx_nand_init(void)
 
 #ifdef CONFIG_MIPS_PB1550
 	/* set gpio206 high */
-	au_writel(au_readl(GPIO2_DIR) & ~(1 << 6), GPIO2_DIR);
+	gpio_direction_input(206);
 
 	boot_swapboot = (au_readl(MEM_STSTAT) & (0x7 << 1)) | ((bcsr_read(BCSR_STATUS) >> 6) & 0x1);
 
diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c
index 5ec5ac1f7878..1479dc4d6129 100644
--- a/drivers/watchdog/mtx-1_wdt.c
+++ b/drivers/watchdog/mtx-1_wdt.c
@@ -66,6 +66,7 @@ static struct {
 	int default_ticks;
 	unsigned long inuse;
 	unsigned gpio;
+	int gstate;
 } mtx1_wdt_device;
 
 static void mtx1_wdt_trigger(unsigned long unused)
@@ -75,13 +76,13 @@ static void mtx1_wdt_trigger(unsigned long unused)
 	spin_lock(&mtx1_wdt_device.lock);
 	if (mtx1_wdt_device.running)
 		ticks--;
-	/*
-	 * toggle GPIO2_15
-	 */
-	tmp = au_readl(GPIO2_DIR);
-	tmp = (tmp & ~(1 << mtx1_wdt_device.gpio)) |
-	      ((~tmp) & (1 << mtx1_wdt_device.gpio));
-	au_writel(tmp, GPIO2_DIR);
+
+	/* toggle wdt gpio */
+	mtx1_wdt_device.gstate = ~mtx1_wdt_device.gstate;
+	if (mtx1_wdt_device.gstate)
+		gpio_direction_output(mtx1_wdt_device.gpio, 1);
+	else
+		gpio_direction_input(mtx1_wdt_device.gpio);
 
 	if (mtx1_wdt_device.queue && ticks)
 		mod_timer(&mtx1_wdt_device.timer, jiffies + MTX1_WDT_INTERVAL);
@@ -103,7 +104,8 @@ static void mtx1_wdt_start(void)
 	spin_lock_irqsave(&mtx1_wdt_device.lock, flags);
 	if (!mtx1_wdt_device.queue) {
 		mtx1_wdt_device.queue = 1;
-		gpio_set_value(mtx1_wdt_device.gpio, 1);
+		mtx1_wdt_device.gstate = 1;
+		gpio_direction_output(mtx1_wdt_device.gpio, 1);
 		mod_timer(&mtx1_wdt_device.timer, jiffies + MTX1_WDT_INTERVAL);
 	}
 	mtx1_wdt_device.running++;
@@ -117,7 +119,8 @@ static int mtx1_wdt_stop(void)
 	spin_lock_irqsave(&mtx1_wdt_device.lock, flags);
 	if (mtx1_wdt_device.queue) {
 		mtx1_wdt_device.queue = 0;
-		gpio_set_value(mtx1_wdt_device.gpio, 0);
+		mtx1_wdt_device.gstate = 0;
+		gpio_direction_output(mtx1_wdt_device.gpio, 0);
 	}
 	ticks = mtx1_wdt_device.default_ticks;
 	spin_unlock_irqrestore(&mtx1_wdt_device.lock, flags);
-- 
cgit v1.2.3


From b3ae52b6b0335eba547221aad2cb3c50902e3d2d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:30 +0200
Subject: SSB: Change fallback sprom to callback mechanism.

Some embedded devices like the Netgear WNDR3300 have two SSB based cards
without an own sprom on the pci bus. We have to provide two different
fallback sproms for these and this was not possible with the old solution.
In the bcm47xx architecture the sprom data is stored in the nvram in the
main flash storage. The architecture code will be able to fill the sprom
with the stored data based on the bus where the device was found.

The bcm63xx code should do the same thing as before, just using the new
API.

Acked-by: Michael Buesch <mb@bu3sch.de>
Cc: netdev@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2362/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm63xx/boards/board_bcm963xx.c | 16 ++++++++++--
 drivers/ssb/pci.c                         | 16 ++++++++----
 drivers/ssb/sprom.c                       | 43 ++++++++++++++++++-------------
 drivers/ssb/ssb_private.h                 |  3 ++-
 include/linux/ssb/ssb.h                   |  4 ++-
 5 files changed, 55 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 8dba8cfb752f..40b223b603be 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -643,6 +643,17 @@ static struct ssb_sprom bcm63xx_sprom = {
 	.boardflags_lo		= 0x2848,
 	.boardflags_hi		= 0x0000,
 };
+
+int bcm63xx_get_fallback_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		memcpy(out, &bcm63xx_sprom, sizeof(struct ssb_sprom));
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
 #endif
 
 /*
@@ -793,8 +804,9 @@ void __init board_prom_init(void)
 	if (!board_get_mac_address(bcm63xx_sprom.il0mac)) {
 		memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN);
 		memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN);
-		if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0)
-			printk(KERN_ERR "failed to register fallback SPROM\n");
+		if (ssb_arch_register_fallback_sprom(
+				&bcm63xx_get_fallback_sprom) < 0)
+			printk(KERN_ERR PFX "failed to register fallback SPROM\n");
 	}
 #endif
 }
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 6f34963b3c64..7ad48585c5e6 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -662,7 +662,6 @@ static int sprom_extract(struct ssb_bus *bus, struct ssb_sprom *out,
 static int ssb_pci_sprom_get(struct ssb_bus *bus,
 			     struct ssb_sprom *sprom)
 {
-	const struct ssb_sprom *fallback;
 	int err;
 	u16 *buf;
 
@@ -707,10 +706,17 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 		if (err) {
 			/* All CRC attempts failed.
 			 * Maybe there is no SPROM on the device?
-			 * If we have a fallback, use that. */
-			fallback = ssb_get_fallback_sprom();
-			if (fallback) {
-				memcpy(sprom, fallback, sizeof(*sprom));
+			 * Now we ask the arch code if there is some sprom
+			 * available for this device in some other storage */
+			err = ssb_fill_sprom_with_fallback(bus, sprom);
+			if (err) {
+				ssb_printk(KERN_WARNING PFX "WARNING: Using"
+					   " fallback SPROM failed (err %d)\n",
+					   err);
+			} else {
+				ssb_dprintk(KERN_DEBUG PFX "Using SPROM"
+					    " revision %d provided by"
+					    " platform.\n", sprom->revision);
 				err = 0;
 				goto out_free;
 			}
diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index 5f34d7a3e3a5..45ff0e3a3828 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 
 
-static const struct ssb_sprom *fallback_sprom;
+static int(*get_fallback_sprom)(struct ssb_bus *dev, struct ssb_sprom *out);
 
 
 static int sprom2hex(const u16 *sprom, char *buf, size_t buf_len,
@@ -145,36 +145,43 @@ out:
 }
 
 /**
- * ssb_arch_set_fallback_sprom - Set a fallback SPROM for use if no SPROM is found.
+ * ssb_arch_register_fallback_sprom - Registers a method providing a
+ * fallback SPROM if no SPROM is found.
  *
- * @sprom: The SPROM data structure to register.
+ * @sprom_callback: The callback function.
  *
- * With this function the architecture implementation may register a fallback
- * SPROM data structure. The fallback is only used for PCI based SSB devices,
- * where no valid SPROM can be found in the shadow registers.
+ * With this function the architecture implementation may register a
+ * callback handler which fills the SPROM data structure. The fallback is
+ * only used for PCI based SSB devices, where no valid SPROM can be found
+ * in the shadow registers.
  *
- * This function is useful for weird architectures that have a half-assed SSB device
- * hardwired to their PCI bus.
+ * This function is useful for weird architectures that have a half-assed
+ * SSB device hardwired to their PCI bus.
  *
- * Note that it does only work with PCI attached SSB devices. PCMCIA devices currently
- * don't use this fallback.
- * Architectures must provide the SPROM for native SSB devices anyway,
- * so the fallback also isn't used for native devices.
+ * Note that it does only work with PCI attached SSB devices. PCMCIA
+ * devices currently don't use this fallback.
+ * Architectures must provide the SPROM for native SSB devices anyway, so
+ * the fallback also isn't used for native devices.
  *
- * This function is available for architecture code, only. So it is not exported.
+ * This function is available for architecture code, only. So it is not
+ * exported.
  */
-int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom)
+int ssb_arch_register_fallback_sprom(int (*sprom_callback)(struct ssb_bus *bus,
+				     struct ssb_sprom *out))
 {
-	if (fallback_sprom)
+	if (get_fallback_sprom)
 		return -EEXIST;
-	fallback_sprom = sprom;
+	get_fallback_sprom = sprom_callback;
 
 	return 0;
 }
 
-const struct ssb_sprom *ssb_get_fallback_sprom(void)
+int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out)
 {
-	return fallback_sprom;
+	if (!get_fallback_sprom)
+		return -ENOENT;
+
+	return get_fallback_sprom(bus, out);
 }
 
 /* http://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 0331139a726f..77653014db0b 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -171,7 +171,8 @@ ssize_t ssb_attr_sprom_store(struct ssb_bus *bus,
 			     const char *buf, size_t count,
 			     int (*sprom_check_crc)(const u16 *sprom, size_t size),
 			     int (*sprom_write)(struct ssb_bus *bus, const u16 *sprom));
-extern const struct ssb_sprom *ssb_get_fallback_sprom(void);
+extern int ssb_fill_sprom_with_fallback(struct ssb_bus *bus,
+					struct ssb_sprom *out);
 
 
 /* core.c */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 9659eff52ca2..045f72ab5dfd 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -404,7 +404,9 @@ extern bool ssb_is_sprom_available(struct ssb_bus *bus);
 
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
-extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
+extern int ssb_arch_register_fallback_sprom(
+		int (*sprom_callback)(struct ssb_bus *bus,
+		struct ssb_sprom *out));
 
 /* Suspend a SSB bus.
  * Call this from the parent bus suspend routine. */
-- 
cgit v1.2.3


From a7c62f8564357532872e106f0fa383728cf886cc Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:31 +0200
Subject: MIPS: BCM47xx: Extend bcm47xx_fill_sprom with prefix.

When an other SSB based device without an own SPROM is attached, using the
PCI bus to the main SSB based device, the data normally found in the SPROM
will be stored in the NVRAM on modern devices. The keys, to load the data
from the NVRAM, are all using some sort of prefix like pci/1/1/, pci/1/3/
or sb/1/ before the actual key. This patch extends bcm47xx_fill_sprom() to
make it possible to read out these values when some prefix was used.

The keys for the SPROM data used on the main chip does not have a prefix.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2363/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm47xx/setup.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index c95f90bf734c..bbfcf9bd38db 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -57,10 +57,23 @@ static void bcm47xx_machine_halt(void)
 }
 
 #define READ_FROM_NVRAM(_outvar, name, buf) \
-	if (nvram_getenv(name, buf, sizeof(buf)) >= 0)\
+	if (nvram_getprefix(prefix, name, buf, sizeof(buf)) >= 0)\
 		sprom->_outvar = simple_strtoul(buf, NULL, 0);
 
-static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
+static inline int nvram_getprefix(const char *prefix, char *name,
+				  char *buf, int len)
+{
+	if (prefix) {
+		char key[100];
+
+		snprintf(key, sizeof(key), "%s%s", prefix, name);
+		return nvram_getenv(key, buf, len);
+	}
+
+	return nvram_getenv(name, buf, len);
+}
+
+static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 {
 	char buf[100];
 	u32 boardflags;
@@ -69,11 +82,11 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 
 	sprom->revision = 1; /* Fallback: Old hardware does not define this. */
 	READ_FROM_NVRAM(revision, "sromrev", buf);
-	if (nvram_getenv("il0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "il0macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->il0mac);
-	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "et0macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et0mac);
-	if (nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "et1macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et1mac);
 	READ_FROM_NVRAM(et0phyaddr, "et0phyaddr", buf);
 	READ_FROM_NVRAM(et1phyaddr, "et1phyaddr", buf);
@@ -125,14 +138,14 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom)
 	READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf);
 	READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf);
 
-	if (nvram_getenv("boardflags", buf, sizeof(buf)) >= 0) {
+	if (nvram_getprefix(prefix, "boardflags", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
 		if (boardflags) {
 			sprom->boardflags_lo = (boardflags & 0x0000FFFFU);
 			sprom->boardflags_hi = (boardflags & 0xFFFF0000U) >> 16;
 		}
 	}
-	if (nvram_getenv("boardflags2", buf, sizeof(buf)) >= 0) {
+	if (nvram_getprefix(prefix, "boardflags2", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
 		if (boardflags) {
 			sprom->boardflags2_lo = (boardflags & 0x0000FFFFU);
@@ -158,7 +171,7 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus,
 	if (nvram_getenv("boardrev", buf, sizeof(buf)) >= 0)
 		iv->boardinfo.rev = (u16)simple_strtoul(buf, NULL, 0);
 
-	bcm47xx_fill_sprom(&iv->sprom);
+	bcm47xx_fill_sprom(&iv->sprom, NULL);
 
 	if (nvram_getenv("cardbus", buf, sizeof(buf)) >= 0)
 		iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10);
-- 
cgit v1.2.3


From fe6f3642ac70d21004ddbe7242bd4548c35f1c10 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:32 +0200
Subject: MIPS: BCM47xx: Register SSB fallback sprom callback

We are generating the prefix based on the PCI bus address the device is
on. This is done like Broadcom does it in their code expect that the the
bus number is increased by one. In the SB bus implementation used by
Broadcom the SB bus emulates a PCI bus so the kernel sees one PCI bus
more then in our implementation. We do not handle prefixes like sb/1/
yet as they are only used on the new bus which is not implemented yet.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2364/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm47xx/nvram.c |  3 ++-
 arch/mips/bcm47xx/setup.c | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index e5b6615731e5..54db815bc86c 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2005 Broadcom Corporation
  * Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de>
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -23,7 +24,7 @@
 static char nvram_buf[NVRAM_SPACE];
 
 /* Probe for NVRAM header */
-static void __init early_nvram_init(void)
+static void early_nvram_init(void)
 {
 	struct ssb_mipscore *mcore = &ssb_bcm47xx.mipscore;
 	struct nvram_header *header;
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index bbfcf9bd38db..258ffcf92754 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -3,6 +3,7 @@
  *  Copyright (C) 2006 Felix Fietkau <nbd@openwrt.org>
  *  Copyright (C) 2006 Michael Buesch <mb@bu3sch.de>
  *  Copyright (C) 2010 Waldemar Brodkorb <wbx@openadk.org>
+ *  Copyright (C) 2010-2011 Hauke Mehrtens <hauke@hauke-m.de>
  *
  *  This program is free software; you can redistribute  it and/or modify it
  *  under  the terms of  the GNU General  Public License as published by the
@@ -154,6 +155,22 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 	}
 }
 
+int bcm47xx_get_sprom(struct ssb_bus *bus, struct ssb_sprom *out)
+{
+	char prefix[10];
+
+	if (bus->bustype == SSB_BUSTYPE_PCI) {
+		snprintf(prefix, sizeof(prefix), "pci/%u/%u/",
+			 bus->host_pci->bus->number + 1,
+			 PCI_SLOT(bus->host_pci->devfn));
+		bcm47xx_fill_sprom(out, prefix);
+		return 0;
+	} else {
+		printk(KERN_WARNING "bcm47xx: unable to fill SPROM for given bustype.\n");
+		return -EINVAL;
+	}
+}
+
 static int bcm47xx_get_invariants(struct ssb_bus *bus,
 				   struct ssb_init_invariants *iv)
 {
@@ -185,6 +202,11 @@ void __init plat_mem_setup(void)
 	char buf[100];
 	struct ssb_mipscore *mcore;
 
+	err = ssb_arch_register_fallback_sprom(&bcm47xx_get_sprom);
+	if (err)
+		printk(KERN_WARNING "bcm47xx: someone else already registered"
+			" a ssb SPROM callback handler (err %d)\n", err);
+
 	err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE,
 				      bcm47xx_get_invariants);
 	if (err)
-- 
cgit v1.2.3


From 41790fd51f71f3744a5d142cc5369eebab8817a0 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:33 +0200
Subject: MIPS: BCM47xx: Extend the filling of SPROM from NVRAM

Some members of the struct ssb_sprom where not filled with data available
in the NVRAM. Some attribute names in the NVRAM changed from SPROM version
3 to version 4. This patch was done by analyzing the the pci sprom parser
in the ssb code and some open source parts of the braodcom wireless driver
used on embedded devices.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2365/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm47xx/setup.c | 81 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 66 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 258ffcf92754..73b529b57433 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -61,6 +61,11 @@ static void bcm47xx_machine_halt(void)
 	if (nvram_getprefix(prefix, name, buf, sizeof(buf)) >= 0)\
 		sprom->_outvar = simple_strtoul(buf, NULL, 0);
 
+#define READ_FROM_NVRAM2(_outvar, name1, name2, buf) \
+	if (nvram_getprefix(prefix, name1, buf, sizeof(buf)) >= 0 || \
+	    nvram_getprefix(prefix, name2, buf, sizeof(buf)) >= 0)\
+		sprom->_outvar = simple_strtoul(buf, NULL, 0);
+
 static inline int nvram_getprefix(const char *prefix, char *name,
 				  char *buf, int len)
 {
@@ -74,6 +79,27 @@ static inline int nvram_getprefix(const char *prefix, char *name,
 	return nvram_getenv(name, buf, len);
 }
 
+static u32 nvram_getu32(const char *name, char *buf, int len)
+{
+	int rv;
+	char key[100];
+	u16 var0, var1;
+
+	snprintf(key, sizeof(key), "%s0", name);
+	rv = nvram_getenv(key, buf, len);
+	/* return 0 here so this looks like unset */
+	if (rv < 0)
+		return 0;
+	var0 = simple_strtoul(buf, NULL, 0);
+
+	snprintf(key, sizeof(key), "%s1", name);
+	rv = nvram_getenv(key, buf, len);
+	if (rv < 0)
+		return 0;
+	var1 = simple_strtoul(buf, NULL, 0);
+	return var1 << 16 | var0;
+}
+
 static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 {
 	char buf[100];
@@ -83,7 +109,8 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 
 	sprom->revision = 1; /* Fallback: Old hardware does not define this. */
 	READ_FROM_NVRAM(revision, "sromrev", buf);
-	if (nvram_getprefix(prefix, "il0macaddr", buf, sizeof(buf)) >= 0)
+	if (nvram_getprefix(prefix, "il0macaddr", buf, sizeof(buf)) >= 0 ||
+	    nvram_getprefix(prefix, "macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->il0mac);
 	if (nvram_getprefix(prefix, "et0macaddr", buf, sizeof(buf)) >= 0)
 		nvram_parse_macaddr(buf, sprom->et0mac);
@@ -109,20 +136,36 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 	READ_FROM_NVRAM(pa1hib0, "pa1hib0", buf);
 	READ_FROM_NVRAM(pa1hib2, "pa1hib1", buf);
 	READ_FROM_NVRAM(pa1hib1, "pa1hib2", buf);
-	READ_FROM_NVRAM(gpio0, "wl0gpio0", buf);
-	READ_FROM_NVRAM(gpio1, "wl0gpio1", buf);
-	READ_FROM_NVRAM(gpio2, "wl0gpio2", buf);
-	READ_FROM_NVRAM(gpio3, "wl0gpio3", buf);
-	READ_FROM_NVRAM(maxpwr_bg, "pa0maxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_al, "pa1lomaxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_a, "pa1maxpwr", buf);
-	READ_FROM_NVRAM(maxpwr_ah, "pa1himaxpwr", buf);
-	READ_FROM_NVRAM(itssi_a, "pa1itssit", buf);
-	READ_FROM_NVRAM(itssi_bg, "pa0itssit", buf);
+	READ_FROM_NVRAM2(gpio0, "ledbh0", "wl0gpio0", buf);
+	READ_FROM_NVRAM2(gpio1, "ledbh1", "wl0gpio1", buf);
+	READ_FROM_NVRAM2(gpio2, "ledbh2", "wl0gpio2", buf);
+	READ_FROM_NVRAM2(gpio3, "ledbh3", "wl0gpio3", buf);
+	READ_FROM_NVRAM2(maxpwr_bg, "maxp2ga0", "pa0maxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_al, "maxp5gla0", "pa1lomaxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_a, "maxp5ga0", "pa1maxpwr", buf);
+	READ_FROM_NVRAM2(maxpwr_ah, "maxp5gha0", "pa1himaxpwr", buf);
+	READ_FROM_NVRAM2(itssi_bg, "itt5ga0", "pa0itssit", buf);
+	READ_FROM_NVRAM2(itssi_a, "itt2ga0", "pa1itssit", buf);
 	READ_FROM_NVRAM(tri2g, "tri2g", buf);
 	READ_FROM_NVRAM(tri5gl, "tri5gl", buf);
 	READ_FROM_NVRAM(tri5g, "tri5g", buf);
 	READ_FROM_NVRAM(tri5gh, "tri5gh", buf);
+	READ_FROM_NVRAM(txpid2g[0], "txpid2ga0", buf);
+	READ_FROM_NVRAM(txpid2g[1], "txpid2ga1", buf);
+	READ_FROM_NVRAM(txpid2g[2], "txpid2ga2", buf);
+	READ_FROM_NVRAM(txpid2g[3], "txpid2ga3", buf);
+	READ_FROM_NVRAM(txpid5g[0], "txpid5ga0", buf);
+	READ_FROM_NVRAM(txpid5g[1], "txpid5ga1", buf);
+	READ_FROM_NVRAM(txpid5g[2], "txpid5ga2", buf);
+	READ_FROM_NVRAM(txpid5g[3], "txpid5ga3", buf);
+	READ_FROM_NVRAM(txpid5gl[0], "txpid5gla0", buf);
+	READ_FROM_NVRAM(txpid5gl[1], "txpid5gla1", buf);
+	READ_FROM_NVRAM(txpid5gl[2], "txpid5gla2", buf);
+	READ_FROM_NVRAM(txpid5gl[3], "txpid5gla3", buf);
+	READ_FROM_NVRAM(txpid5gh[0], "txpid5gha0", buf);
+	READ_FROM_NVRAM(txpid5gh[1], "txpid5gha1", buf);
+	READ_FROM_NVRAM(txpid5gh[2], "txpid5gha2", buf);
+	READ_FROM_NVRAM(txpid5gh[3], "txpid5gha3", buf);
 	READ_FROM_NVRAM(rxpo2g, "rxpo2g", buf);
 	READ_FROM_NVRAM(rxpo5g, "rxpo5g", buf);
 	READ_FROM_NVRAM(rssisav2g, "rssisav2g", buf);
@@ -134,10 +177,18 @@ static void bcm47xx_fill_sprom(struct ssb_sprom *sprom, const char *prefix)
 	READ_FROM_NVRAM(rssismf5g, "rssismf5g", buf);
 	READ_FROM_NVRAM(bxa5g, "bxa5g", buf);
 	READ_FROM_NVRAM(cck2gpo, "cck2gpo", buf);
-	READ_FROM_NVRAM(ofdm2gpo, "ofdm2gpo", buf);
-	READ_FROM_NVRAM(ofdm5glpo, "ofdm5glpo", buf);
-	READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf);
-	READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf);
+
+	sprom->ofdm2gpo = nvram_getu32("ofdm2gpo", buf, sizeof(buf));
+	sprom->ofdm5glpo = nvram_getu32("ofdm5glpo", buf, sizeof(buf));
+	sprom->ofdm5gpo = nvram_getu32("ofdm5gpo", buf, sizeof(buf));
+	sprom->ofdm5ghpo = nvram_getu32("ofdm5ghpo", buf, sizeof(buf));
+
+	READ_FROM_NVRAM(antenna_gain.ghz24.a0, "ag0", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a1, "ag1", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a2, "ag2", buf);
+	READ_FROM_NVRAM(antenna_gain.ghz24.a3, "ag3", buf);
+	memcpy(&sprom->antenna_gain.ghz5, &sprom->antenna_gain.ghz24,
+	       sizeof(sprom->antenna_gain.ghz5));
 
 	if (nvram_getprefix(prefix, "boardflags", buf, sizeof(buf)) >= 0) {
 		boardflags = simple_strtoul(buf, NULL, 0);
-- 
cgit v1.2.3


From 9cbda726bb283d60cd4f34a3a9da8b5b48a46b0f Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 10 May 2011 23:31:34 +0200
Subject: MIPS: BCM47xx: Fix MAC address parsing.

Some devices like the Netgear WGT634u are using minuses between the blocks
of the MAC address and other devices are using colons to separate them.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2366/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-bcm47xx/nvram.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/mach-bcm47xx/nvram.h b/arch/mips/include/asm/mach-bcm47xx/nvram.h
index 9759588ba3cf..184d5ecb5f51 100644
--- a/arch/mips/include/asm/mach-bcm47xx/nvram.h
+++ b/arch/mips/include/asm/mach-bcm47xx/nvram.h
@@ -39,8 +39,16 @@ extern int nvram_getenv(char *name, char *val, size_t val_len);
 
 static inline void nvram_parse_macaddr(char *buf, u8 *macaddr)
 {
-	sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], &macaddr[1],
-	       &macaddr[2], &macaddr[3], &macaddr[4], &macaddr[5]);
+	if (strchr(buf, ':'))
+		sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0],
+			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
+			&macaddr[5]);
+	else if (strchr(buf, '-'))
+		sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0],
+			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
+			&macaddr[5]);
+	else
+		printk(KERN_WARNING "Can not parse mac address: %s\n", buf);
 }
 
 #endif
-- 
cgit v1.2.3


From 6edde0247644db475f68f25dcb1bf72260600081 Mon Sep 17 00:00:00 2001
From: Maarten ter Huurne <maarten@treewalker.org>
Date: Mon, 2 May 2011 11:47:00 +0200
Subject: MIPS: JZ4740: setup: Autodetect physical memory.

Assume that the boot loader knows the physical memory of the system and
deduce that information from the contents of the SDRAM control register.
It is still possible to override with with the "mem=" parameter, but we
have a sensible default now.

Signed-off-by: Maarten ter Huurne <maarten@treewalker.org>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2319/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/jz4740/setup.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index 6a9e14dab91e..d97cfbf882f5 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -1,5 +1,6 @@
 /*
  *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ *  Copyright (C) 2011, Maarten ter Huurne <maarten@treewalker.org>
  *  JZ4740 setup code
  *
  *  This program is free software; you can redistribute it and/or modify it
@@ -14,13 +15,44 @@
  */
 
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 
+#include <asm/bootinfo.h>
+
+#include <asm/mach-jz4740/base.h>
+
 #include "reset.h"
 
+
+#define JZ4740_EMC_SDRAM_CTRL 0x80
+
+
+static void __init jz4740_detect_mem(void)
+{
+	void __iomem *jz_emc_base;
+	u32 ctrl, bus, bank, rows, cols;
+	phys_t size;
+
+	jz_emc_base = ioremap(JZ4740_EMC_BASE_ADDR, 0x100);
+	ctrl = readl(jz_emc_base + JZ4740_EMC_SDRAM_CTRL);
+	bus = 2 - ((ctrl >> 31) & 1);
+	bank = 1 + ((ctrl >> 19) & 1);
+	cols = 8 + ((ctrl >> 26) & 7);
+	rows = 11 + ((ctrl >> 20) & 3);
+	printk(KERN_DEBUG
+		"SDRAM preconfigured: bus:%u bank:%u rows:%u cols:%u\n",
+		bus, bank, rows, cols);
+	iounmap(jz_emc_base);
+
+	size = 1 << (bus + bank + cols + rows);
+	add_memory_region(0, size, BOOT_MEM_RAM);
+}
+
 void __init plat_mem_setup(void)
 {
 	jz4740_reset_init();
+	jz4740_detect_mem();
 }
 
 const char *get_system_type(void)
-- 
cgit v1.2.3


From c094c99e659efedcbb05a0f75b8f77145d8ec539 Mon Sep 17 00:00:00 2001
From: Robert Millan <rmh@gnu.org>
Date: Mon, 18 Apr 2011 11:37:55 -0700
Subject: MIPS: Introduce set_elf_platform() helper function

Replace these sequences:

if (cpu == 0)
	__elf_platform = "foo";

with a trivial inline function.

Signed-off-by: Robert Millan <rmh@gnu.org>
Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: wu zhangjin <wuzhangjin@gmail.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2304/
Patchwork: https://patchwork.linux-mips.org/patch/2374/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cpu-probe.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index c7b7eb24e277..aa86250844ca 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -291,6 +291,12 @@ static inline int cpu_has_confreg(void)
 #endif
 }
 
+static inline void set_elf_platform(int cpu, const char *plat)
+{
+	if (cpu == 0)
+		__elf_platform = plat;
+}
+
 /*
  * Get the FPU Implementation/Revision.
  */
@@ -956,14 +962,12 @@ static inline void cpu_probe_cavium(struct cpuinfo_mips *c, unsigned int cpu)
 		c->cputype = CPU_CAVIUM_OCTEON_PLUS;
 		__cpu_name[cpu] = "Cavium Octeon+";
 platform:
-		if (cpu == 0)
-			__elf_platform = "octeon";
+		set_elf_platform(cpu, "octeon");
 		break;
 	case PRID_IMP_CAVIUM_CN63XX:
 		c->cputype = CPU_CAVIUM_OCTEON2;
 		__cpu_name[cpu] = "Cavium Octeon II";
-		if (cpu == 0)
-			__elf_platform = "octeon2";
+		set_elf_platform(cpu, "octeon2");
 		break;
 	default:
 		printk(KERN_INFO "Unknown Octeon chip!\n");
-- 
cgit v1.2.3


From 06785df09b18e9127d16893039b64ae118c53cb4 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Sat, 16 Apr 2011 11:29:28 -0700
Subject: MIPS: Set ELF AT_PLATFORM string for BMIPS processors

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Cc: Robert Millan <rmh@gnu.org>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: wu zhangjin <wuzhangjin@gmail.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2300/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cpu-probe.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index aa86250844ca..7da861755f4f 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -917,12 +917,14 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_BMIPS32_REV8:
 		c->cputype = CPU_BMIPS32;
 		__cpu_name[cpu] = "Broadcom BMIPS32";
+		set_elf_platform(cpu, "bmips32");
 		break;
 	case PRID_IMP_BMIPS3300:
 	case PRID_IMP_BMIPS3300_ALT:
 	case PRID_IMP_BMIPS3300_BUG:
 		c->cputype = CPU_BMIPS3300;
 		__cpu_name[cpu] = "Broadcom BMIPS3300";
+		set_elf_platform(cpu, "bmips3300");
 		break;
 	case PRID_IMP_BMIPS43XX: {
 		int rev = c->processor_id & 0xff;
@@ -931,15 +933,18 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
 				rev <= PRID_REV_BMIPS4380_HI) {
 			c->cputype = CPU_BMIPS4380;
 			__cpu_name[cpu] = "Broadcom BMIPS4380";
+			set_elf_platform(cpu, "bmips4380");
 		} else {
 			c->cputype = CPU_BMIPS4350;
 			__cpu_name[cpu] = "Broadcom BMIPS4350";
+			set_elf_platform(cpu, "bmips4350");
 		}
 		break;
 	}
 	case PRID_IMP_BMIPS5000:
 		c->cputype = CPU_BMIPS5000;
 		__cpu_name[cpu] = "Broadcom BMIPS5000";
+		set_elf_platform(cpu, "bmips5000");
 		c->options |= MIPS_CPU_ULRI;
 		break;
 	}
-- 
cgit v1.2.3


From 5aac1e8a381d52a977b5050369a82a547c446ee2 Mon Sep 17 00:00:00 2001
From: Robert Millan <rmh@gnu.org>
Date: Sat, 16 Apr 2011 11:29:29 -0700
Subject: MIPS: Set ELF AT_PLATFORM string for Loongson2 processors

Signed-off-by: Robert Millan <rmh@gnu.org>
Acked-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: wu zhangjin <wuzhangjin@gmail.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2302/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cpu-probe.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 7da861755f4f..bb133d10b145 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -620,6 +620,16 @@ static inline void cpu_probe_legacy(struct cpuinfo_mips *c, unsigned int cpu)
 	case PRID_IMP_LOONGSON2:
 		c->cputype = CPU_LOONGSON2;
 		__cpu_name[cpu] = "ICT Loongson-2";
+
+		switch (c->processor_id & PRID_REV_MASK) {
+		case PRID_REV_LOONGSON2E:
+			set_elf_platform(cpu, "loongson2e");
+			break;
+		case PRID_REV_LOONGSON2F:
+			set_elf_platform(cpu, "loongson2f");
+			break;
+		}
+
 		c->isa_level = MIPS_CPU_ISA_III;
 		c->options = R4K_OPTS |
 			     MIPS_CPU_FPU | MIPS_CPU_LLSC |
-- 
cgit v1.2.3


From 1c8da7a1107a46c94b21cc176aaf95c819aab3db Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Sun, 10 Apr 2011 01:42:17 +0800
Subject: MIPS: Lemote 2F, Malta: Fix build warning

Since 5ada28bf76752e33dce3d807bf0dfbe6d1b943ad ["led-class: always
implement blinking"] LEDS_CLASS=m is no longer valid so change the setting
from m to y.

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
To: david.woodhouse@intel.com
To: akpm@linux-foundation.org
To: mingo@elte.hu
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2276/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/configs/lemote2f_defconfig | 2 +-
 arch/mips/configs/malta_defconfig    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 167c1d07b809..cb2c5eaa60fd 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -329,7 +329,7 @@ CONFIG_USB_LED=m
 CONFIG_USB_GADGET=m
 CONFIG_USB_GADGET_M66592=y
 CONFIG_MMC=m
-CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_CLASS=y
 CONFIG_STAGING=y
 # CONFIG_STAGING_EXCLUDE_BUILD is not set
 CONFIG_FB_SM7XX=y
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 7270f3183bda..5527abbb7dea 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -374,7 +374,7 @@ CONFIG_FB_CIRRUS=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_HID=m
-CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_IDE_DISK=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=m
-- 
cgit v1.2.3


From b32ee693eb106172f89639acff88dc8fee8ba3e2 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <wanlong.gao@gmail.com>
Date: Sun, 10 Apr 2011 03:04:18 +0800
Subject: MIPS: Fix build warnings on defconfigs

Since d45dcef77019012fc6769e657fc2f1a5d681bbbb ["Bluetooth: Fix BT_L2CAP
and BT_SCO in Kconfig"] BT_L2CAP=m and BT_SCO=m are no longer valid so
change the settings from m to y.

[ralf@linux-mips.org: Merging only the MIPS parts of this patch.]

Signed-off-by: Wanlong Gao <wanlong.gao@gmail.com>
To: akpm@linux-foundation.org
To: manuel.lauss@googlemail.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linuxppc-dev@lists.ozlabs.org
Patchwork: https://patchwork.linux-mips.org/patch/2277/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/configs/lemote2f_defconfig | 4 ++--
 arch/mips/configs/mtx1_defconfig     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index cb2c5eaa60fd..b6acd2f256b6 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -86,8 +86,8 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_EMATCH=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index a97a42c6b2c8..37862b2ce363 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -225,8 +225,8 @@ CONFIG_TOSHIBA_FIR=m
 CONFIG_VLSI_FIR=m
 CONFIG_MCS_FIR=m
 CONFIG_BT=m
-CONFIG_BT_L2CAP=m
-CONFIG_BT_SCO=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_SCO=y
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
 CONFIG_BT_BNEP=m
-- 
cgit v1.2.3


From 7716e6548abed1582a7759666e79d5c612a906c7 Mon Sep 17 00:00:00 2001
From: Chandrakala Chavva <cchavva@caviumnetworks.com>
Date: Thu, 17 Feb 2011 13:57:52 -0800
Subject: Octeon: Fix interrupt irq settings for performance counters.

Octeon uses different interrupt irq for timer and performance counters.
Set CvmCtl[IPPCI] to correct irq value very early.

Signed-off-by: Chandrakala Chavva <cchavva@caviumnetworks.com>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Cc: Chandrakala Chavva <cchavva@caviumnetworks.com>
Patchwork: https://patchwork.linux-mips.org/patch/2085/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/setup.c                              | 7 -------
 arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h | 5 +++++
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 0707fae3f0ee..2d9028f1474c 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -288,7 +288,6 @@ void octeon_user_io_init(void)
 	union octeon_cvmemctl cvmmemctl;
 	union cvmx_iob_fau_timeout fau_timeout;
 	union cvmx_pow_nw_tim nm_tim;
-	uint64_t cvmctl;
 
 	/* Get the current settings for CP0_CVMMEMCTL_REG */
 	cvmmemctl.u64 = read_c0_cvmmemctl();
@@ -392,12 +391,6 @@ void octeon_user_io_init(void)
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
 			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
 
-	/* Move the performance counter interrupts to IRQ 6 */
-	cvmctl = read_c0_cvmctl();
-	cvmctl &= ~(7 << 7);
-	cvmctl |= 6 << 7;
-	write_c0_cvmctl(cvmctl);
-
 	/* Set a default for the hardware timeouts */
 	fau_timeout.u64 = 0;
 	fau_timeout.s.tout_val = 0xfff;
diff --git a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
index 0b2b5eb22e9b..dedef7d2b01f 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
@@ -63,6 +63,11 @@
 	# CN30XX Disable instruction prefetching
 	or  v0, v0, 0x2000
 skip:
+	# First clear off CvmCtl[IPPCI] bit and move the performance
+	# counters interrupt to IRQ 6
+	li	v1, ~(7 << 7)
+	and	v0, v0, v1
+	ori	v0, v0, (6 << 7)
 	# Write the cavium control register
 	dmtc0   v0, CP0_CVMCTL_REG
 	sync
-- 
cgit v1.2.3


From e650ce0f083ff9354a10ad66e6bf8c193e8a2755 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Thu, 17 Feb 2011 14:47:52 -0800
Subject: MIPS: Octeon: Don't request interrupts for unused IPI mailbox bits.

We only use the three low-order mailbox bits.  Leave the upper bits alone
for possible use by drivers and other software.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/2090/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/cavium-octeon/smp.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index ba78b21cc8d0..716fae6f941a 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -37,7 +37,7 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 	uint64_t action;
 
 	/* Load the mailbox register to figure out what we're supposed to do */
-	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid));
+	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)) & 0xffff;
 
 	/* Clear the mailbox to clear the interrupt */
 	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
@@ -200,16 +200,15 @@ void octeon_prepare_cpus(unsigned int max_cpus)
 	if (labi->labi_signature != LABI_SIGNATURE)
 		panic("The bootloader version on this board is incorrect.");
 #endif
-
-	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffffffff);
+	/*
+	 * Only the low order mailbox bits are used for IPIs, leave
+	 * the other bits alone.
+	 */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffff);
 	if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_DISABLED,
-			"mailbox0", mailbox_interrupt)) {
+			"SMP-IPI", mailbox_interrupt)) {
 		panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n");
 	}
-	if (request_irq(OCTEON_IRQ_MBOX1, mailbox_interrupt, IRQF_DISABLED,
-			"mailbox1", mailbox_interrupt)) {
-		panic("Cannot request_irq(OCTEON_IRQ_MBOX1)\n");
-	}
 }
 
 /**
-- 
cgit v1.2.3


From 9c1e8a9138ff92a4ff816ea8a1884ad2461a993a Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 17 May 2011 16:18:09 +0100
Subject: MIPS: Cleanup arch_get_unmapped_area

As noticed by Kevin Cernekee <cernekee@gmail.com> in
http://www.linux-mips.org/cgi-bin/extract-mesg.cgi?a=linux-mips&m=2011-05&i=BANLkTikq04wuK%3Dbz%2BLieavmm3oDtoYWKxg%40mail.gmail.com

Patchwork: https://patchwork.linux-mips.org/patch/2387/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/syscall.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 58beabf50b3c..0c207e8ee601 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -79,20 +79,13 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 {
 	struct vm_area_struct * vmm;
 	int do_color_align;
-	unsigned long task_size;
 
-#ifdef CONFIG_32BIT
-	task_size = TASK_SIZE;
-#else /* Must be CONFIG_64BIT*/
-	task_size = test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE;
-#endif
-
-	if (len > task_size)
+	if (len > TASK_SIZE)
 		return -ENOMEM;
 
 	if (flags & MAP_FIXED) {
-		/* Even MAP_FIXED mappings must reside within task_size.  */
-		if (task_size - len < addr)
+		/* Even MAP_FIXED mappings must reside within TASK_SIZE.  */
+		if (TASK_SIZE - len < addr)
 			return -EINVAL;
 
 		/*
@@ -114,7 +107,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		else
 			addr = PAGE_ALIGN(addr);
 		vmm = find_vma(current->mm, addr);
-		if (task_size - len >= addr &&
+		if (TASK_SIZE - len >= addr &&
 		    (!vmm || addr + len <= vmm->vm_start))
 			return addr;
 	}
@@ -126,7 +119,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
 		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (task_size - len < addr)
+		if (TASK_SIZE - len < addr)
 			return -ENOMEM;
 		if (!vmm || addr + len <= vmm->vm_start)
 			return addr;
-- 
cgit v1.2.3


From 6f6c3c33c027f2c83d53e8562cd9daa73fe8108b Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 May 2011 09:21:33 +0100
Subject: MIPS: Move arch_get_unmapped_area and gang to new file.

It never really belonged into syscall.c and it's about to become well more
complex.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/syscall.c | 113 -----------------------------------------
 arch/mips/mm/Makefile      |   3 +-
 arch/mips/mm/mmap.c        | 122 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+), 114 deletions(-)
 create mode 100644 arch/mips/mm/mmap.c

(limited to 'arch')

diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 0c207e8ee601..d02765708ddb 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -10,12 +10,9 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/linkage.h>
-#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
-#include <linux/mman.h>
 #include <linux/ptrace.h>
-#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/syscalls.h>
 #include <linux/file.h>
@@ -25,11 +22,9 @@
 #include <linux/msg.h>
 #include <linux/shm.h>
 #include <linux/compiler.h>
-#include <linux/module.h>
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <linux/random.h>
 #include <linux/elf.h>
 
 #include <asm/asm.h>
@@ -66,114 +61,6 @@ out:
 	return res;
 }
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
-
-EXPORT_SYMBOL(shm_align_mask);
-
-#define COLOUR_ALIGN(addr,pgoff)				\
-	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
-	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
-
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
-	unsigned long len, unsigned long pgoff, unsigned long flags)
-{
-	struct vm_area_struct * vmm;
-	int do_color_align;
-
-	if (len > TASK_SIZE)
-		return -ENOMEM;
-
-	if (flags & MAP_FIXED) {
-		/* Even MAP_FIXED mappings must reside within TASK_SIZE.  */
-		if (TASK_SIZE - len < addr)
-			return -EINVAL;
-
-		/*
-		 * We do not accept a shared mapping if it would violate
-		 * cache aliasing constraints.
-		 */
-		if ((flags & MAP_SHARED) &&
-		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
-			return -EINVAL;
-		return addr;
-	}
-
-	do_color_align = 0;
-	if (filp || (flags & MAP_SHARED))
-		do_color_align = 1;
-	if (addr) {
-		if (do_color_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-		else
-			addr = PAGE_ALIGN(addr);
-		vmm = find_vma(current->mm, addr);
-		if (TASK_SIZE - len >= addr &&
-		    (!vmm || addr + len <= vmm->vm_start))
-			return addr;
-	}
-	addr = current->mm->mmap_base;
-	if (do_color_align)
-		addr = COLOUR_ALIGN(addr, pgoff);
-	else
-		addr = PAGE_ALIGN(addr);
-
-	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (TASK_SIZE - len < addr)
-			return -ENOMEM;
-		if (!vmm || addr + len <= vmm->vm_start)
-			return addr;
-		addr = vmm->vm_end;
-		if (do_color_align)
-			addr = COLOUR_ALIGN(addr, pgoff);
-	}
-}
-
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
-	unsigned long random_factor = 0UL;
-
-	if (current->flags & PF_RANDOMIZE) {
-		random_factor = get_random_int();
-		random_factor = random_factor << PAGE_SHIFT;
-		if (TASK_IS_32BIT_ADDR)
-			random_factor &= 0xfffffful;
-		else
-			random_factor &= 0xffffffful;
-	}
-
-	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
-	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
-}
-
-static inline unsigned long brk_rnd(void)
-{
-	unsigned long rnd = get_random_int();
-
-	rnd = rnd << PAGE_SHIFT;
-	/* 8MB for 32bit, 256MB for 64bit */
-	if (TASK_IS_32BIT_ADDR)
-		rnd = rnd & 0x7ffffful;
-	else
-		rnd = rnd & 0xffffffful;
-
-	return rnd;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long base = mm->brk;
-	unsigned long ret;
-
-	ret = PAGE_ALIGN(base + brk_rnd());
-
-	if (ret < mm->brk)
-		return mm->brk;
-
-	return ret;
-}
-
 SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len,
 	unsigned long, prot, unsigned long, flags, unsigned long,
 	fd, off_t, offset)
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index eb4463689faa..4d8c1623eee2 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -3,7 +3,8 @@
 #
 
 obj-y				+= cache.o dma-default.o extable.o fault.o \
-				   init.o tlbex.o tlbex-fault.o uasm.o page.o
+				   init.o mmap.o tlbex.o tlbex-fault.o uasm.o \
+				   page.o
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
 obj-$(CONFIG_64BIT)		+= pgtable-64.o
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
new file mode 100644
index 000000000000..ae3c20a9556e
--- /dev/null
+++ b/arch/mips/mm/mmap.c
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+
+unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
+
+EXPORT_SYMBOL(shm_align_mask);
+
+#define COLOUR_ALIGN(addr,pgoff)				\
+	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
+	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+	unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_area_struct * vmm;
+	int do_color_align;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		/* Even MAP_FIXED mappings must reside within TASK_SIZE.  */
+		if (TASK_SIZE - len < addr)
+			return -EINVAL;
+
+		/*
+		 * We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+			return -EINVAL;
+		return addr;
+	}
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = 1;
+	if (addr) {
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+		vmm = find_vma(current->mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vmm || addr + len <= vmm->vm_start))
+			return addr;
+	}
+	addr = current->mm->mmap_base;
+	if (do_color_align)
+		addr = COLOUR_ALIGN(addr, pgoff);
+	else
+		addr = PAGE_ALIGN(addr);
+
+	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+		/* At this point:  (!vmm || addr < vmm->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+		if (!vmm || addr + len <= vmm->vm_start)
+			return addr;
+		addr = vmm->vm_end;
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+	}
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE) {
+		random_factor = get_random_int();
+		random_factor = random_factor << PAGE_SHIFT;
+		if (TASK_IS_32BIT_ADDR)
+			random_factor &= 0xfffffful;
+		else
+			random_factor &= 0xffffffful;
+	}
+
+	mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+	mm->get_unmapped_area = arch_get_unmapped_area;
+	mm->unmap_area = arch_unmap_area;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+	unsigned long rnd = get_random_int();
+
+	rnd = rnd << PAGE_SHIFT;
+	/* 8MB for 32bit, 256MB for 64bit */
+	if (TASK_IS_32BIT_ADDR)
+		rnd = rnd & 0x7ffffful;
+	else
+		rnd = rnd & 0xffffffful;
+
+	return rnd;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long base = mm->brk;
+	unsigned long ret;
+
+	ret = PAGE_ALIGN(base + brk_rnd());
+
+	if (ret < mm->brk)
+		return mm->brk;
+
+	return ret;
+}
-- 
cgit v1.2.3


From 61ee9a4ba05f0a4163d43a33dee7a0651e080b98 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:42 +0000
Subject: x86: Convert PIT to clockevents_config_and_register()

Let the core do the work.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.545615675%40linutronix.de%3E
---
 arch/x86/kernel/i8253.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 577e90cadaeb..fb66dc9e36cb 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -93,7 +93,6 @@ static struct clock_event_device pit_ce = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= init_pit_timer,
 	.set_next_event = pit_next_event,
-	.shift		= 32,
 	.irq		= 0,
 };
 
@@ -108,11 +107,8 @@ void __init setup_pit_timer(void)
 	 * IO_APIC has been initialized.
 	 */
 	pit_ce.cpumask = cpumask_of(smp_processor_id());
-	pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
-	pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
-	pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
 
-	clockevents_register_device(&pit_ce);
+	clockevents_config_and_register(&pit_ce, CLOCK_TICK_RATE, 0xF, 0x7FFF);
 	global_clock_event = &pit_ce;
 }
 
-- 
cgit v1.2.3


From ab0e08f15d23628dd8d50bf6ce1a935a8840c7dc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 May 2011 21:33:43 +0000
Subject: x86: hpet: Cleanup the clockevents init and register code

No need to recalculate the frequency and the conversion factors over
and over. Calculate the frequency once and use the new config/register
interface and let the core code do the math.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Link: http://lkml.kernel.org/r/%3C20110518210136.646482357%40linutronix.de%3E
---
 arch/x86/kernel/hpet.c | 72 +++++++++++---------------------------------------
 1 file changed, 16 insertions(+), 56 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index bfe8f729e086..6781765b3a0d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -217,7 +217,7 @@ static void hpet_reserve_platform_timers(unsigned int id) { }
 /*
  * Common hpet info
  */
-static unsigned long hpet_period;
+static unsigned long hpet_freq;
 
 static void hpet_legacy_set_mode(enum clock_event_mode mode,
 			  struct clock_event_device *evt);
@@ -232,7 +232,6 @@ static struct clock_event_device hpet_clockevent = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= hpet_legacy_set_mode,
 	.set_next_event = hpet_legacy_next_event,
-	.shift		= 32,
 	.irq		= 0,
 	.rating		= 50,
 };
@@ -289,29 +288,13 @@ static void hpet_legacy_clockevent_register(void)
 	/* Start HPET legacy interrupts */
 	hpet_enable_legacy_int();
 
-	/*
-	 * The mult factor is defined as (include/linux/clockchips.h)
-	 *  mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h)
-	 * hpet_period is in units of femtoseconds (per cycle), so
-	 *  mult/2^shift = cyc/ns = 10^6/hpet_period
-	 *  mult = (10^6 * 2^shift)/hpet_period
-	 *  mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period
-	 */
-	hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC,
-				      hpet_period, hpet_clockevent.shift);
-	/* Calculate the min / max delta */
-	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
-							   &hpet_clockevent);
-	/* Setup minimum reprogramming delta. */
-	hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
-							   &hpet_clockevent);
-
 	/*
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
 	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
-	clockevents_register_device(&hpet_clockevent);
+	clockevents_config_and_register(&hpet_clockevent, hpet_freq,
+					HPET_MIN_PROG_DELTA, 0x7FFFFFFF);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
 }
@@ -549,7 +532,6 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 {
 	struct clock_event_device *evt = &hdev->evt;
-	uint64_t hpet_freq;
 
 	WARN_ON(cpu != smp_processor_id());
 	if (!(hdev->flags & HPET_DEV_VALID))
@@ -571,24 +553,10 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 
 	evt->set_mode = hpet_msi_set_mode;
 	evt->set_next_event = hpet_msi_next_event;
-	evt->shift = 32;
-
-	/*
-	 * The period is a femto seconds value. We need to calculate the
-	 * scaled math multiplication factor for nanosecond to hpet tick
-	 * conversion.
-	 */
-	hpet_freq = FSEC_PER_SEC;
-	do_div(hpet_freq, hpet_period);
-	evt->mult = div_sc((unsigned long) hpet_freq,
-				      NSEC_PER_SEC, evt->shift);
-	/* Calculate the max delta */
-	evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
-	/* 5 usec minimum reprogramming delta. */
-	evt->min_delta_ns = 5000;
-
 	evt->cpumask = cpumask_of(hdev->cpu);
-	clockevents_register_device(evt);
+
+	clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
+					0x7FFFFFFF);
 }
 
 #ifdef CONFIG_HPET
@@ -792,7 +760,6 @@ static struct clocksource clocksource_hpet = {
 static int hpet_clocksource_register(void)
 {
 	u64 start, now;
-	u64 hpet_freq;
 	cycle_t t1;
 
 	/* Start the counter */
@@ -819,24 +786,7 @@ static int hpet_clocksource_register(void)
 		return -ENODEV;
 	}
 
-	/*
-	 * The definition of mult is (include/linux/clocksource.h)
-	 * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc
-	 * so we first need to convert hpet_period to ns/cyc units:
-	 *  mult/2^shift = ns/cyc = hpet_period/10^6
-	 *  mult = (hpet_period * 2^shift)/10^6
-	 *  mult = (hpet_period << shift)/FSEC_PER_NSEC
-	 */
-
-	/* Need to convert hpet_period (fsec/cyc) to cyc/sec:
-	 *
-	 * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc)
-	 * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period
-	 */
-	hpet_freq = FSEC_PER_SEC;
-	do_div(hpet_freq, hpet_period);
 	clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
-
 	return 0;
 }
 
@@ -845,7 +795,9 @@ static int hpet_clocksource_register(void)
  */
 int __init hpet_enable(void)
 {
+	unsigned long hpet_period;
 	unsigned int id;
+	u64 freq;
 	int i;
 
 	if (!is_hpet_capable())
@@ -883,6 +835,14 @@ int __init hpet_enable(void)
 	if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
 		goto out_nohpet;
 
+	/*
+	 * The period is a femto seconds value. Convert it to a
+	 * frequency.
+	 */
+	freq = FSEC_PER_SEC;
+	do_div(freq, hpet_period);
+	hpet_freq = freq;
+
 	/*
 	 * Read the HPET ID register to retrieve the IRQ routing
 	 * information and the number of channels
-- 
cgit v1.2.3


From b87ba87ca26e226b2277a2d5613ed596f408e96d Mon Sep 17 00:00:00 2001
From: "Tian, Kevin" <kevin.tian@intel.com>
Date: Fri, 6 May 2011 14:43:36 +0800
Subject: x86: Skip migrating IRQF_PER_CPU irqs in fixup_irqs()

IRQF_PER_CPU means that the irq cannot be moved away from a given
cpu. So it must not be migrated when the cpu goes offline.

[ tglx: massaged changelog ]

Signed-off-by: Fengzhe Zhang <fengzhe.zhang@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/%3C625BA99ED14B2D499DC4E29D8138F1505C8ED7F7E2%40shsmsx502.ccr.corp.intel.com%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1cb0b9fc78dc..544efe2741be 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -249,7 +249,7 @@ void fixup_irqs(void)
 
 		data = irq_desc_get_irq_data(desc);
 		affinity = data->affinity;
-		if (!irq_has_action(irq) ||
+		if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
 		    cpumask_subset(affinity, cpu_online_mask)) {
 			raw_spin_unlock(&desc->lock);
 			continue;
-- 
cgit v1.2.3


From 983bbf1af0664b78689612b247acb514300f62c7 Mon Sep 17 00:00:00 2001
From: "Tian, Kevin" <kevin.tian@intel.com>
Date: Fri, 6 May 2011 14:43:56 +0800
Subject: x86: Don't unmask disabled irqs when migrating them

It doesn't make sense to unconditionally unmask a disabled irq when
migrating it from offlined cpu to another. If the irq triggers then it
will be disabled in the interrupt handler anyway. So we can just avoid
unmasking it.

[ tglx: Made masking unconditional again and fixed the changelog ]

Signed-off-by: Fengzhe Zhang <fengzhe.zhang@intel.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/%3C625BA99ED14B2D499DC4E29D8138F1505C8ED7F7E3%40shsmsx502.ccr.corp.intel.com%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/irq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 544efe2741be..6c0802eb2f7f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -276,7 +276,8 @@ void fixup_irqs(void)
 		else if (!(warned++))
 			set_affinity = 0;
 
-		if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+		if (!irqd_can_move_in_process_context(data) &&
+		    !irqd_irq_disabled(data) && chip->irq_unmask)
 			chip->irq_unmask(data);
 
 		raw_spin_unlock(&desc->lock);
-- 
cgit v1.2.3


From 3f508953dd2ebcbcd32d28d0b6aad4d76980e722 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Thu, 12 May 2011 17:19:53 -0400
Subject: arch/x86/xen/mmu: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
[v1: Rebased on top of latest linus's to include fixes in mmu.c]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0684f3c74d53..b5f776f60b1b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1054,7 +1054,7 @@ void xen_mm_pin_all(void)
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now.
  */
-static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
+static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 				  enum pt_level level)
 {
 	SetPagePinned(page);
@@ -1271,7 +1271,7 @@ void xen_exit_mmap(struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
-static __init void xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(pgd_t *base)
 {
 }
 
@@ -1291,7 +1291,7 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
 
 static void xen_post_allocator_init(void);
 
-static __init void xen_pagetable_setup_done(pgd_t *base)
+static void __init xen_pagetable_setup_done(pgd_t *base)
 {
 	xen_setup_shared_info();
 	xen_post_allocator_init();
@@ -1488,7 +1488,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 }
 
 #ifdef CONFIG_X86_32
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
 	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
@@ -1498,7 +1498,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	return pte;
 }
 #else /* CONFIG_X86_64 */
-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
 {
 	unsigned long pfn = pte_pfn(pte);
 
@@ -1519,7 +1519,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 
 /* Init-time set_pte while constructing initial pagetables, which
    doesn't allow RO pagetable pages to be remapped RW */
-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 {
 	pte = mask_rw_pte(ptep, pte);
 
@@ -1537,7 +1537,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -1547,7 +1547,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 }
 
 /* Used for pmd and pud */
-static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 {
 #ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
@@ -1557,13 +1557,13 @@ static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static __init void xen_release_pte_init(unsigned long pfn)
+static void __init xen_release_pte_init(unsigned long pfn)
 {
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static __init void xen_release_pmd_init(unsigned long pfn)
+static void __init xen_release_pmd_init(unsigned long pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -1689,7 +1689,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
 		BUG();
 }
 
-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
 	unsigned ident_pte;
@@ -1772,7 +1772,7 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pud_t *l3;
@@ -1843,7 +1843,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
 static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
 
-static __init void xen_write_cr3_init(unsigned long cr3)
+static void __init xen_write_cr3_init(unsigned long cr3)
 {
 	unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
 
@@ -1880,7 +1880,7 @@ static __init void xen_write_cr3_init(unsigned long cr3)
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 
-__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 					 unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
@@ -1986,7 +1986,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
 }
 
-__init void xen_ident_map_ISA(void)
+void __init xen_ident_map_ISA(void)
 {
 	unsigned long pa;
 
@@ -2009,7 +2009,7 @@ __init void xen_ident_map_ISA(void)
 	xen_flush_tlb();
 }
 
-static __init void xen_post_allocator_init(void)
+static void __init xen_post_allocator_init(void)
 {
 #ifdef CONFIG_XEN_DEBUG
 	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
@@ -2046,7 +2046,7 @@ static void xen_leave_lazy_mmu(void)
 	preempt_enable();
 }
 
-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.read_cr2 = xen_read_cr2,
 	.write_cr2 = xen_write_cr2,
 
-- 
cgit v1.2.3


From ad7ba09e658f8e890797ed168dfb3f84be934b09 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 4 May 2011 20:19:15 +0200
Subject: arch/x86/xen/xen-ops: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/xen-ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3112f55638c4..97dfdc8757b3 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -74,7 +74,7 @@ static inline void xen_hvm_smp_init(void) {}
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init xen_init_spinlocks(void);
-__cpuinit void xen_init_lock_cpu(int cpu);
+void __cpuinit xen_init_lock_cpu(int cpu);
 void xen_uninit_lock_cpu(int cpu);
 #else
 static inline void xen_init_spinlocks(void)
-- 
cgit v1.2.3


From fb6ce5dea4bb704bfcd9fda7e6b1354da66f4d2f Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 4 May 2011 20:18:45 +0200
Subject: arch/x86/xen/time: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/time.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 2e2d370a47b1..bd4ffd7d9589 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -439,11 +439,11 @@ void xen_timer_resume(void)
 	}
 }
 
-static const struct pv_time_ops xen_time_ops __initdata = {
+static const struct pv_time_ops xen_time_ops __initconst = {
 	.sched_clock = xen_clocksource_read,
 };
 
-static __init void xen_time_init(void)
+static void __init xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 	struct timespec tp;
@@ -468,7 +468,7 @@ static __init void xen_time_init(void)
 	xen_setup_cpu_clockevents();
 }
 
-__init void xen_init_time_ops(void)
+void __init xen_init_time_ops(void)
 {
 	pv_time_ops = xen_time_ops;
 
@@ -490,7 +490,7 @@ static void xen_hvm_setup_cpu_clockevents(void)
 	xen_setup_cpu_clockevents();
 }
 
-__init void xen_hvm_init_time_ops(void)
+void __init xen_hvm_init_time_ops(void)
 {
 	/* vector callback is needed otherwise we cannot receive interrupts
 	 * on cpu > 0 and at this point we don't know how many cpus are
-- 
cgit v1.2.3


From b53cedebd74918237176520f9157deb7ae066b71 Mon Sep 17 00:00:00 2001
From: Daniel Kiper <dkiper@net-space.pl>
Date: Wed, 4 May 2011 20:18:05 +0200
Subject: arch/x86/xen/smp: Cleanup code/data sections definitions

Cleanup code/data sections definitions
accordingly to include/linux/init.h.

Signed-off-by: Daniel Kiper <dkiper@net-space.pl>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/smp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 30612441ed99..194a3edef5cb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -57,7 +57,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static __cpuinit void cpu_bringup(void)
+static void __cpuinit cpu_bringup(void)
 {
 	int cpu = smp_processor_id();
 
@@ -85,7 +85,7 @@ static __cpuinit void cpu_bringup(void)
 	wmb();			/* make sure everything is out */
 }
 
-static __cpuinit void cpu_bringup_and_idle(void)
+static void __cpuinit cpu_bringup_and_idle(void)
 {
 	cpu_bringup();
 	cpu_idle();
@@ -242,7 +242,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	}
 }
 
-static __cpuinit int
+static int __cpuinit
 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 {
 	struct vcpu_guest_context *ctxt;
@@ -486,7 +486,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static const struct smp_ops xen_smp_ops __initdata = {
+static const struct smp_ops xen_smp_ops __initconst = {
 	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = xen_smp_prepare_cpus,
 	.smp_cpus_done = xen_smp_cpus_done,
-- 
cgit v1.2.3


From 359c47ea71be21c1105ae474e8c90ec3e988bbdf Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 3 Apr 2011 13:32:00 +0200
Subject: m68k: bitops - offset == ((long)p - (long)vaddr) * 8

Hence use "offset" in find_next_{,zero_}bit(), like is already done for
find_next_{,zero_}bit_le()

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/bitops_mm.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/bitops_mm.h b/arch/m68k/include/asm/bitops_mm.h
index 9d69f6e62365..5bd3afadeafa 100644
--- a/arch/m68k/include/asm/bitops_mm.h
+++ b/arch/m68k/include/asm/bitops_mm.h
@@ -220,8 +220,7 @@ static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
 		offset += 32;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
-	res = find_first_zero_bit(p, size - ((long)p - (long)vaddr) * 8);
-	return offset + res;
+	return offset + find_first_zero_bit(p, size - offset);
 }
 
 static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
@@ -267,8 +266,7 @@ static inline int find_next_bit(const unsigned long *vaddr, int size,
 		offset += 32;
 	}
 	/* No one yet, search remaining full bytes for a one */
-	res = find_first_bit(p, size - ((long)p - (long)vaddr) * 8);
-	return offset + res;
+	return offset + find_first_bit(p, size - offset);
 }
 
 /*
-- 
cgit v1.2.3


From f82a519f1262963d6ab30fa238721463fad2e0c8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 3 Apr 2011 14:00:10 +0200
Subject: m68k: bitops - Never step beyond the end of the bitmap

find_next bitops on m68k (find_next_zero_bit, find_next_bit, and
find_next_bit_le) may cause out of bounds memory access
when the bitmap size in bits % 32 != 0 and offset (the bitnumber
to start searching at) is very close to the bitmap size.

For example,

       unsigned long bitmap[2] = { 0, 0 };
       find_next_bit(bitmap, 63, 62);

1. find_next_bit() tries to find any set bits in bitmap[1],
   but no bits set.

2. Then find_first_bit(bimap + 2, -1)

3. Unfortunately find_first_bit() takes unsigned int as the size argument.

4. find_first_bit will access bitmap[2~] until it find any set bits.

Add missing tests for stepping beyond the end of the bitmap to all
find_{first,next}_*() functions, and make sure they never return a value
larger than the bitmap size.

Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/bitops_mm.h | 81 ++++++++++++++++++++++++++-------------
 1 file changed, 55 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/bitops_mm.h b/arch/m68k/include/asm/bitops_mm.h
index 5bd3afadeafa..e9020f88a748 100644
--- a/arch/m68k/include/asm/bitops_mm.h
+++ b/arch/m68k/include/asm/bitops_mm.h
@@ -181,14 +181,15 @@ static inline int find_first_zero_bit(const unsigned long *vaddr,
 {
 	const unsigned long *p = vaddr;
 	int res = 32;
+	unsigned int words;
 	unsigned long num;
 
 	if (!size)
 		return 0;
 
-	size = (size + 31) >> 5;
+	words = (size + 31) >> 5;
 	while (!(num = ~*p++)) {
-		if (!--size)
+		if (!--words)
 			goto out;
 	}
 
@@ -196,7 +197,8 @@ static inline int find_first_zero_bit(const unsigned long *vaddr,
 			      : "=d" (res) : "d" (num & -num));
 	res ^= 31;
 out:
-	return ((long)p - (long)vaddr - 4) * 8 + res;
+	res += ((long)p - (long)vaddr - 4) * 8;
+	return res < size ? res : size;
 }
 
 static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
@@ -215,9 +217,14 @@ static inline int find_next_zero_bit(const unsigned long *vaddr, int size,
 		/* Look for zero in first longword */
 		__asm__ __volatile__ ("bfffo %1{#0,#0},%0"
 				      : "=d" (res) : "d" (num & -num));
-		if (res < 32)
-			return offset + (res ^ 31);
+		if (res < 32) {
+			offset += res ^ 31;
+			return offset < size ? offset : size;
+		}
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
 	return offset + find_first_zero_bit(p, size - offset);
@@ -227,14 +234,15 @@ static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr;
 	int res = 32;
+	unsigned int words;
 	unsigned long num;
 
 	if (!size)
 		return 0;
 
-	size = (size + 31) >> 5;
+	words = (size + 31) >> 5;
 	while (!(num = *p++)) {
-		if (!--size)
+		if (!--words)
 			goto out;
 	}
 
@@ -242,7 +250,8 @@ static inline int find_first_bit(const unsigned long *vaddr, unsigned size)
 			      : "=d" (res) : "d" (num & -num));
 	res ^= 31;
 out:
-	return ((long)p - (long)vaddr - 4) * 8 + res;
+	res += ((long)p - (long)vaddr - 4) * 8;
+	return res < size ? res : size;
 }
 
 static inline int find_next_bit(const unsigned long *vaddr, int size,
@@ -261,9 +270,14 @@ static inline int find_next_bit(const unsigned long *vaddr, int size,
 		/* Look for one in first longword */
 		__asm__ __volatile__ ("bfffo %1{#0,#0},%0"
 				      : "=d" (res) : "d" (num & -num));
-		if (res < 32)
-			return offset + (res ^ 31);
+		if (res < 32) {
+			offset += res ^ 31;
+			return offset < size ? offset : size;
+		}
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No one yet, search remaining full bytes for a one */
 	return offset + find_first_bit(p, size - offset);
@@ -364,23 +378,25 @@ static inline int test_bit_le(int nr, const void *vaddr)
 static inline int find_first_zero_bit_le(const void *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr, *addr = vaddr;
-	int res;
+	int res = 0;
+	unsigned int words;
 
 	if (!size)
 		return 0;
 
-	size = (size >> 5) + ((size & 31) > 0);
-	while (*p++ == ~0UL)
-	{
-		if (--size == 0)
-			return (p - addr) << 5;
+	words = (size >> 5) + ((size & 31) > 0);
+	while (*p++ == ~0UL) {
+		if (--words == 0)
+			goto out;
 	}
 
 	--p;
 	for (res = 0; res < 32; res++)
 		if (!test_bit_le(res, p))
 			break;
-	return (p - addr) * 32 + res;
+out:
+	res += (p - addr) * 32;
+	return res < size ? res : size;
 }
 
 static inline unsigned long find_next_zero_bit_le(const void *addr,
@@ -398,10 +414,15 @@ static inline unsigned long find_next_zero_bit_le(const void *addr,
 		offset -= bit;
 		/* Look for zero in first longword */
 		for (res = bit; res < 32; res++)
-			if (!test_bit_le(res, p))
-				return offset + res;
+			if (!test_bit_le(res, p)) {
+				offset += res;
+				return offset < size ? offset : size;
+			}
 		p++;
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No zero yet, search remaining full bytes for a zero */
 	return offset + find_first_zero_bit_le(p, size - offset);
@@ -410,22 +431,25 @@ static inline unsigned long find_next_zero_bit_le(const void *addr,
 static inline int find_first_bit_le(const void *vaddr, unsigned size)
 {
 	const unsigned long *p = vaddr, *addr = vaddr;
-	int res;
+	int res = 0;
+	unsigned int words;
 
 	if (!size)
 		return 0;
 
-	size = (size >> 5) + ((size & 31) > 0);
+	words = (size >> 5) + ((size & 31) > 0);
 	while (*p++ == 0UL) {
-		if (--size == 0)
-			return (p - addr) << 5;
+		if (--words == 0)
+			goto out;
 	}
 
 	--p;
 	for (res = 0; res < 32; res++)
 		if (test_bit_le(res, p))
 			break;
-	return (p - addr) * 32 + res;
+out:
+	res += (p - addr) * 32;
+	return res < size ? res : size;
 }
 
 static inline unsigned long find_next_bit_le(const void *addr,
@@ -443,10 +467,15 @@ static inline unsigned long find_next_bit_le(const void *addr,
 		offset -= bit;
 		/* Look for one in first longword */
 		for (res = bit; res < 32; res++)
-			if (test_bit_le(res, p))
-				return offset + res;
+			if (test_bit_le(res, p)) {
+				offset += res;
+				return offset < size ? offset : size;
+			}
 		p++;
 		offset += 32;
+
+		if (offset >= size)
+			return size;
 	}
 	/* No set bit yet, search remaining full bytes for a set bit */
 	return offset + find_first_bit_le(p, size - offset);
-- 
cgit v1.2.3


From c4245c9d6535f3d02fda7f6eb9adcec9f09e8fe3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 6 Apr 2011 22:12:53 +0200
Subject: m68k: Merge mmu and non-mmu versions of sys_call_table

Impact for nommu:
  - Store table in .rodata instead of .text,
  - Let kernel/sys_ni.c handle the stubbing of MMU-only syscalls,
  - Implement sys_mremap and sys_nfsservct,
  - Remove unused padding at the end of the table.

Impact for mmu:
  - Store table in .rodata instead of .data.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
---
 arch/m68k/kernel/Makefile_mm    |   2 +-
 arch/m68k/kernel/entry_mm.S     | 348 ----------------------------------------
 arch/m68k/kernel/syscalltable.S | 191 +++++++++++-----------
 3 files changed, 96 insertions(+), 445 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/Makefile_mm b/arch/m68k/kernel/Makefile_mm
index 55d5d6b680a2..aced67804579 100644
--- a/arch/m68k/kernel/Makefile_mm
+++ b/arch/m68k/kernel/Makefile_mm
@@ -10,7 +10,7 @@ endif
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o syscalltable.o
 
 devres-y = ../../../kernel/irq/devres.o
 
diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
index 1359ee659574..bd0ec05263b2 100644
--- a/arch/m68k/kernel/entry_mm.S
+++ b/arch/m68k/kernel/entry_mm.S
@@ -407,351 +407,3 @@ resume:
 
 	rts
 
-.data
-ALIGN
-sys_call_table:
-	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
-	.long sys_exit
-	.long sys_fork
-	.long sys_read
-	.long sys_write
-	.long sys_open		/* 5 */
-	.long sys_close
-	.long sys_waitpid
-	.long sys_creat
-	.long sys_link
-	.long sys_unlink	/* 10 */
-	.long sys_execve
-	.long sys_chdir
-	.long sys_time
-	.long sys_mknod
-	.long sys_chmod		/* 15 */
-	.long sys_chown16
-	.long sys_ni_syscall				/* old break syscall holder */
-	.long sys_stat
-	.long sys_lseek
-	.long sys_getpid	/* 20 */
-	.long sys_mount
-	.long sys_oldumount
-	.long sys_setuid16
-	.long sys_getuid16
-	.long sys_stime		/* 25 */
-	.long sys_ptrace
-	.long sys_alarm
-	.long sys_fstat
-	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall				/* old stty syscall holder */
-	.long sys_ni_syscall				/* old gtty syscall holder */
-	.long sys_access
-	.long sys_nice
-	.long sys_ni_syscall	/* 35 */	/* old ftime syscall holder */
-	.long sys_sync
-	.long sys_kill
-	.long sys_rename
-	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
-	.long sys_dup
-	.long sys_pipe
-	.long sys_times
-	.long sys_ni_syscall				/* old prof syscall holder */
-	.long sys_brk		/* 45 */
-	.long sys_setgid16
-	.long sys_getgid16
-	.long sys_signal
-	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
-	.long sys_acct
-	.long sys_umount				/* recycled never used phys() */
-	.long sys_ni_syscall				/* old lock syscall holder */
-	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall				/* old mpx syscall holder */
-	.long sys_setpgid
-	.long sys_ni_syscall				/* old ulimit syscall holder */
-	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
-	.long sys_chroot
-	.long sys_ustat
-	.long sys_dup2
-	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
-	.long sys_setsid
-	.long sys_sigaction
-	.long sys_sgetmask
-	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
-	.long sys_setregid16
-	.long sys_sigsuspend
-	.long sys_sigpending
-	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
-	.long sys_old_getrlimit
-	.long sys_getrusage
-	.long sys_gettimeofday
-	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
-	.long sys_setgroups16
-	.long sys_old_select
-	.long sys_symlink
-	.long sys_lstat
-	.long sys_readlink	/* 85 */
-	.long sys_uselib
-	.long sys_swapon
-	.long sys_reboot
-	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
-	.long sys_munmap
-	.long sys_truncate
-	.long sys_ftruncate
-	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
-	.long sys_getpriority
-	.long sys_setpriority
-	.long sys_ni_syscall				/* old profil syscall holder */
-	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall				/* ioperm for i386 */
-	.long sys_socketcall
-	.long sys_syslog
-	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
-	.long sys_newstat
-	.long sys_newlstat
-	.long sys_newfstat
-	.long sys_ni_syscall
-	.long sys_ni_syscall	/* 110 */	/* iopl for i386 */
-	.long sys_vhangup
-	.long sys_ni_syscall				/* obsolete idle() syscall */
-	.long sys_ni_syscall				/* vm86old for i386 */
-	.long sys_wait4
-	.long sys_swapoff	/* 115 */
-	.long sys_sysinfo
-	.long sys_ipc
-	.long sys_fsync
-	.long sys_sigreturn
-	.long sys_clone		/* 120 */
-	.long sys_setdomainname
-	.long sys_newuname
-	.long sys_cacheflush				/* modify_ldt for i386 */
-	.long sys_adjtimex
-	.long sys_mprotect	/* 125 */
-	.long sys_sigprocmask
-	.long sys_ni_syscall		/* old "create_module" */
-	.long sys_init_module
-	.long sys_delete_module
-	.long sys_ni_syscall	/* 130 - old "get_kernel_syms" */
-	.long sys_quotactl
-	.long sys_getpgid
-	.long sys_fchdir
-	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
-	.long sys_personality
-	.long sys_ni_syscall				/* for afs_syscall */
-	.long sys_setfsuid16
-	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
-	.long sys_getdents
-	.long sys_select
-	.long sys_flock
-	.long sys_msync
-	.long sys_readv		/* 145 */
-	.long sys_writev
-	.long sys_getsid
-	.long sys_fdatasync
-	.long sys_sysctl
-	.long sys_mlock		/* 150 */
-	.long sys_munlock
-	.long sys_mlockall
-	.long sys_munlockall
-	.long sys_sched_setparam
-	.long sys_sched_getparam	/* 155 */
-	.long sys_sched_setscheduler
-	.long sys_sched_getscheduler
-	.long sys_sched_yield
-	.long sys_sched_get_priority_max
-	.long sys_sched_get_priority_min  /* 160 */
-	.long sys_sched_rr_get_interval
-	.long sys_nanosleep
-	.long sys_mremap
-	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize
-	.long sys_ni_syscall		/* old sys_query_module */
-	.long sys_poll
-	.long sys_nfsservctl
-	.long sys_setresgid16	/* 170 */
-	.long sys_getresgid16
-	.long sys_prctl
-	.long sys_rt_sigreturn
-	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask	/* 175 */
-	.long sys_rt_sigpending
-	.long sys_rt_sigtimedwait
-	.long sys_rt_sigqueueinfo
-	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
-	.long sys_pwrite64
-	.long sys_lchown16;
-	.long sys_getcwd
-	.long sys_capget
-	.long sys_capset	/* 185 */
-	.long sys_sigaltstack
-	.long sys_sendfile
-	.long sys_ni_syscall				/* streams1 */
-	.long sys_ni_syscall				/* streams2 */
-	.long sys_vfork		/* 190 */
-	.long sys_getrlimit
-	.long sys_mmap2
-	.long sys_truncate64
-	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
-	.long sys_lstat64
-	.long sys_fstat64
-	.long sys_chown
-	.long sys_getuid
-	.long sys_getgid	/* 200 */
-	.long sys_geteuid
-	.long sys_getegid
-	.long sys_setreuid
-	.long sys_setregid
-	.long sys_getgroups	/* 205 */
-	.long sys_setgroups
-	.long sys_fchown
-	.long sys_setresuid
-	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
-	.long sys_getresgid
-	.long sys_lchown
-	.long sys_setuid
-	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
-	.long sys_setfsgid
-	.long sys_pivot_root
-	.long sys_ni_syscall
-	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
-	.long sys_gettid
-	.long sys_tkill
-	.long sys_setxattr
-	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
-	.long sys_getxattr
-	.long sys_lgetxattr
-	.long sys_fgetxattr
-	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
-	.long sys_flistxattr
-	.long sys_removexattr
-	.long sys_lremovexattr
-	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
-	.long sys_sendfile64
-	.long sys_mincore
-	.long sys_madvise
-	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
-	.long sys_io_setup
-	.long sys_io_destroy
-	.long sys_io_getevents
-	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
-	.long sys_fadvise64
-	.long sys_exit_group
-	.long sys_lookup_dcookie
-	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
-	.long sys_epoll_wait
-	.long sys_remap_file_pages
-	.long sys_set_tid_address
-	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
-	.long sys_timer_gettime
-	.long sys_timer_getoverrun
-	.long sys_timer_delete
-	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
-	.long sys_clock_getres
-	.long sys_clock_nanosleep
-	.long sys_statfs64
-	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
-	.long sys_utimes
-	.long sys_fadvise64_64
-	.long sys_mbind
-	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
-	.long sys_mq_open
-	.long sys_mq_unlink
-	.long sys_mq_timedsend
-	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
-	.long sys_mq_getsetattr
-	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
-	.long sys_add_key
-	.long sys_request_key	/* 280 */
-	.long sys_keyctl
-	.long sys_ioprio_set
-	.long sys_ioprio_get
-	.long sys_inotify_init
-	.long sys_inotify_add_watch	/* 285 */
-	.long sys_inotify_rm_watch
-	.long sys_migrate_pages
-	.long sys_openat
-	.long sys_mkdirat
-	.long sys_mknodat		/* 290 */
-	.long sys_fchownat
-	.long sys_futimesat
-	.long sys_fstatat64
-	.long sys_unlinkat
-	.long sys_renameat		/* 295 */
-	.long sys_linkat
-	.long sys_symlinkat
-	.long sys_readlinkat
-	.long sys_fchmodat
-	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
-	.long sys_unshare
-	.long sys_set_robust_list
-	.long sys_get_robust_list	/* 305 */
-	.long sys_splice
-	.long sys_sync_file_range
-	.long sys_tee
-	.long sys_vmsplice
-	.long sys_move_pages		/* 310 */
-	.long sys_sched_setaffinity
-	.long sys_sched_getaffinity
-	.long sys_kexec_load
-	.long sys_getcpu
-	.long sys_epoll_pwait		/* 315 */
-	.long sys_utimensat
-	.long sys_signalfd
-	.long sys_timerfd_create
-	.long sys_eventfd
-	.long sys_fallocate		/* 320 */
-	.long sys_timerfd_settime
-	.long sys_timerfd_gettime
-	.long sys_signalfd4
-	.long sys_eventfd2
-	.long sys_epoll_create1		/* 325 */
-	.long sys_dup3
-	.long sys_pipe2
-	.long sys_inotify_init1
-	.long sys_preadv
-	.long sys_pwritev		/* 330 */
-	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_event_open
-	.long sys_get_thread_area
-	.long sys_set_thread_area
-	.long sys_atomic_cmpxchg_32	/* 335 */
-	.long sys_atomic_barrier
-	.long sys_fanotify_init
-	.long sys_fanotify_mark
-	.long sys_prlimit64
-	.long sys_name_to_handle_at	/* 340 */
-	.long sys_open_by_handle_at
-	.long sys_clock_adjtime
-	.long sys_syncfs
-
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 9b8393d8adb8..0284192f84f6 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -1,6 +1,4 @@
 /*
- *  linux/arch/m68knommu/kernel/syscalltable.S
- *
  *  Copyright (C) 2002, Greg Ungerer (gerg@snapgear.com)
  *
  *  Based on older entry.S files, the following copyrights apply:
@@ -9,171 +7,176 @@
  *                      Kenneth Albanowski <kjahds@kjahds.com>,
  *  Copyright (C) 2000  Lineo Inc. (www.lineo.com) 
  *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Linux/m68k support by Hamish Macdonald
  */
 
 #include <linux/sys.h>
 #include <linux/linkage.h>
-#include <asm/unistd.h>
 
-.text
+#ifndef CONFIG_MMU
+#define sys_mmap2		sys_mmap_pgoff
+#endif
+
+.section .rodata
 ALIGN
 ENTRY(sys_call_table)
-	.long sys_restart_syscall	/* 0  -  old "setup()" system call */
+	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
 	.long sys_exit
 	.long sys_fork
 	.long sys_read
 	.long sys_write
-	.long sys_open		/* 5 */
+	.long sys_open			/* 5 */
 	.long sys_close
 	.long sys_waitpid
 	.long sys_creat
 	.long sys_link
-	.long sys_unlink	/* 10 */
+	.long sys_unlink		/* 10 */
 	.long sys_execve
 	.long sys_chdir
 	.long sys_time
 	.long sys_mknod
-	.long sys_chmod		/* 15 */
+	.long sys_chmod			/* 15 */
 	.long sys_chown16
-	.long sys_ni_syscall	/* old break syscall holder */
+	.long sys_ni_syscall		/* old break syscall holder */
 	.long sys_stat
 	.long sys_lseek
-	.long sys_getpid	/* 20 */
+	.long sys_getpid		/* 20 */
 	.long sys_mount
 	.long sys_oldumount
 	.long sys_setuid16
 	.long sys_getuid16
-	.long sys_stime		/* 25 */
+	.long sys_stime			/* 25 */
 	.long sys_ptrace
 	.long sys_alarm
 	.long sys_fstat
 	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall	/* old stty syscall holder */
-	.long sys_ni_syscall	/* old gtty syscall holder */
+	.long sys_utime			/* 30 */
+	.long sys_ni_syscall		/* old stty syscall holder */
+	.long sys_ni_syscall		/* old gtty syscall holder */
 	.long sys_access
 	.long sys_nice
-	.long sys_ni_syscall	/* 35 */ /* old ftime syscall holder */
+	.long sys_ni_syscall		/* 35 - old ftime syscall holder */
 	.long sys_sync
 	.long sys_kill
 	.long sys_rename
 	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
+	.long sys_rmdir			/* 40 */
 	.long sys_dup
 	.long sys_pipe
 	.long sys_times
-	.long sys_ni_syscall	/* old prof syscall holder */
-	.long sys_brk		/* 45 */
+	.long sys_ni_syscall		/* old prof syscall holder */
+	.long sys_brk			/* 45 */
 	.long sys_setgid16
 	.long sys_getgid16
 	.long sys_signal
 	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
+	.long sys_getegid16		/* 50 */
 	.long sys_acct
-	.long sys_umount	/* recycled never used phys() */
-	.long sys_ni_syscall	/* old lock syscall holder */
+	.long sys_umount		/* recycled never used phys() */
+	.long sys_ni_syscall		/* old lock syscall holder */
 	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall	/* old mpx syscall holder */
+	.long sys_fcntl			/* 55 */
+	.long sys_ni_syscall		/* old mpx syscall holder */
 	.long sys_setpgid
-	.long sys_ni_syscall	/* old ulimit syscall holder */
+	.long sys_ni_syscall		/* old ulimit syscall holder */
 	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
+	.long sys_umask			/* 60 */
 	.long sys_chroot
 	.long sys_ustat
 	.long sys_dup2
 	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
+	.long sys_getpgrp		/* 65 */
 	.long sys_setsid
 	.long sys_sigaction
 	.long sys_sgetmask
 	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
+	.long sys_setreuid16		/* 70 */
 	.long sys_setregid16
 	.long sys_sigsuspend
 	.long sys_sigpending
 	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
+	.long sys_setrlimit		/* 75 */
 	.long sys_old_getrlimit
 	.long sys_getrusage
 	.long sys_gettimeofday
 	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
+	.long sys_getgroups16		/* 80 */
 	.long sys_setgroups16
 	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
-	.long sys_readlink	/* 85 */
+	.long sys_readlink		/* 85 */
 	.long sys_uselib
-	.long sys_ni_syscall	/* sys_swapon */
+	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
+	.long sys_old_mmap		/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
 	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
+	.long sys_fchown16		/* 95 */
 	.long sys_getpriority
 	.long sys_setpriority
-	.long sys_ni_syscall	/* old profil syscall holder */
+	.long sys_ni_syscall		/* old profil syscall holder */
 	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall	/* ioperm for i386 */
+	.long sys_fstatfs		/* 100 */
+	.long sys_ni_syscall		/* ioperm for i386 */
 	.long sys_socketcall
 	.long sys_syslog
 	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
+	.long sys_getitimer		/* 105 */
 	.long sys_newstat
 	.long sys_newlstat
 	.long sys_newfstat
 	.long sys_ni_syscall
-	.long sys_ni_syscall	/* iopl for i386 */ /* 110 */
+	.long sys_ni_syscall		/* 110 - iopl for i386 */
 	.long sys_vhangup
-	.long sys_ni_syscall	/* obsolete idle() syscall */
-	.long sys_ni_syscall	/* vm86old for i386 */
+	.long sys_ni_syscall		/* obsolete idle() syscall */
+	.long sys_ni_syscall		/* vm86old for i386 */
 	.long sys_wait4
-	.long sys_ni_syscall	/* 115 */ /* sys_swapoff */
+	.long sys_swapoff		/* 115 */
 	.long sys_sysinfo
 	.long sys_ipc
 	.long sys_fsync
 	.long sys_sigreturn
-	.long sys_clone		/* 120 */
+	.long sys_clone			/* 120 */
 	.long sys_setdomainname
 	.long sys_newuname
-	.long sys_cacheflush	/* modify_ldt for i386 */
+	.long sys_cacheflush		/* modify_ldt for i386 */
 	.long sys_adjtimex
-	.long sys_ni_syscall	/* 125 */ /* sys_mprotect */
+	.long sys_mprotect		/* 125 */
 	.long sys_sigprocmask
-	.long sys_ni_syscall	/* old "creat_module" */
+	.long sys_ni_syscall		/* old "create_module" */
 	.long sys_init_module
 	.long sys_delete_module
-	.long sys_ni_syscall	/* 130: old "get_kernel_syms" */
+	.long sys_ni_syscall		/* 130 - old "get_kernel_syms" */
 	.long sys_quotactl
 	.long sys_getpgid
 	.long sys_fchdir
 	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
+	.long sys_sysfs			/* 135 */
 	.long sys_personality
-	.long sys_ni_syscall	/* for afs_syscall */
+	.long sys_ni_syscall		/* for afs_syscall */
 	.long sys_setfsuid16
 	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
+	.long sys_llseek		/* 140 */
 	.long sys_getdents
 	.long sys_select
 	.long sys_flock
-	.long sys_ni_syscall	/* sys_msync */
-	.long sys_readv		/* 145 */
+	.long sys_msync
+	.long sys_readv			/* 145 */
 	.long sys_writev
 	.long sys_getsid
 	.long sys_fdatasync
 	.long sys_sysctl
-	.long sys_ni_syscall	/* 150 */ /* sys_mlock */
-	.long sys_ni_syscall	/* sys_munlock */
-	.long sys_ni_syscall	/* sys_mlockall */
-	.long sys_ni_syscall	/* sys_munlockall */
+	.long sys_mlock			/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
 	.long sys_sched_setparam
-	.long sys_sched_getparam /* 155 */
+	.long sys_sched_getparam	/* 155 */
 	.long sys_sched_setscheduler
 	.long sys_sched_getscheduler
 	.long sys_sched_yield
@@ -181,124 +184,124 @@ ENTRY(sys_call_table)
 	.long sys_sched_get_priority_min  /* 160 */
 	.long sys_sched_rr_get_interval
 	.long sys_nanosleep
-	.long sys_ni_syscall	/* sys_mremap */
+	.long sys_mremap
 	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize	/* sys_getpagesize */
-	.long sys_ni_syscall	/* old "query_module" */
+	.long sys_getresuid16		/* 165 */
+	.long sys_getpagesize
+	.long sys_ni_syscall		/* old "query_module" */
 	.long sys_poll
-	.long sys_ni_syscall	/* sys_nfsservctl */
-	.long sys_setresgid16	/* 170 */
+	.long sys_nfsservctl
+	.long sys_setresgid16		/* 170 */
 	.long sys_getresgid16
 	.long sys_prctl
 	.long sys_rt_sigreturn
 	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask /* 175 */
+	.long sys_rt_sigprocmask	/* 175 */
 	.long sys_rt_sigpending
 	.long sys_rt_sigtimedwait
 	.long sys_rt_sigqueueinfo
 	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
+	.long sys_pread64		/* 180 */
 	.long sys_pwrite64
 	.long sys_lchown16
 	.long sys_getcwd
 	.long sys_capget
-	.long sys_capset	/* 185 */
+	.long sys_capset		/* 185 */
 	.long sys_sigaltstack
 	.long sys_sendfile
-	.long sys_ni_syscall	/* streams1 */
-	.long sys_ni_syscall	/* streams2 */
-	.long sys_vfork		/* 190 */
+	.long sys_ni_syscall		/* streams1 */
+	.long sys_ni_syscall		/* streams2 */
+	.long sys_vfork			/* 190 */
 	.long sys_getrlimit
-	.long sys_mmap_pgoff
+	.long sys_mmap2
 	.long sys_truncate64
 	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
+	.long sys_stat64		/* 195 */
 	.long sys_lstat64
 	.long sys_fstat64
 	.long sys_chown
 	.long sys_getuid
-	.long sys_getgid	/* 200 */
+	.long sys_getgid		/* 200 */
 	.long sys_geteuid
 	.long sys_getegid
 	.long sys_setreuid
 	.long sys_setregid
-	.long sys_getgroups	/* 205 */
+	.long sys_getgroups		/* 205 */
 	.long sys_setgroups
 	.long sys_fchown
 	.long sys_setresuid
 	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
+	.long sys_setresgid		/* 210 */
 	.long sys_getresgid
 	.long sys_lchown
 	.long sys_setuid
 	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
+	.long sys_setfsuid		/* 215 */
 	.long sys_setfsgid
 	.long sys_pivot_root
 	.long sys_ni_syscall
 	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
+	.long sys_getdents64		/* 220 */
 	.long sys_gettid
 	.long sys_tkill
 	.long sys_setxattr
 	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
+	.long sys_fsetxattr		/* 225 */
 	.long sys_getxattr
 	.long sys_lgetxattr
 	.long sys_fgetxattr
 	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
+	.long sys_llistxattr		/* 230 */
 	.long sys_flistxattr
 	.long sys_removexattr
 	.long sys_lremovexattr
 	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
+	.long sys_futex			/* 235 */
 	.long sys_sendfile64
-	.long sys_ni_syscall	/* sys_mincore */
-	.long sys_ni_syscall	/* sys_madvise */
+	.long sys_mincore
+	.long sys_madvise
 	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
+	.long sys_readahead		/* 240 */
 	.long sys_io_setup
 	.long sys_io_destroy
 	.long sys_io_getevents
 	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
+	.long sys_io_cancel		/* 245 */
 	.long sys_fadvise64
 	.long sys_exit_group
 	.long sys_lookup_dcookie
 	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
+	.long sys_epoll_ctl		/* 250 */
 	.long sys_epoll_wait
-	.long sys_ni_syscall	/* sys_remap_file_pages */
+	.long sys_remap_file_pages
 	.long sys_set_tid_address
 	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
+	.long sys_timer_settime		/* 255 */
 	.long sys_timer_gettime
 	.long sys_timer_getoverrun
 	.long sys_timer_delete
 	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
+	.long sys_clock_gettime		/* 260 */
 	.long sys_clock_getres
 	.long sys_clock_nanosleep
 	.long sys_statfs64
 	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
+	.long sys_tgkill		/* 265 */
 	.long sys_utimes
 	.long sys_fadvise64_64
-	.long sys_mbind	
+	.long sys_mbind
 	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
+	.long sys_set_mempolicy		/* 270 */
 	.long sys_mq_open
 	.long sys_mq_unlink
 	.long sys_mq_timedsend
 	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
+	.long sys_mq_notify		/* 275 */
 	.long sys_mq_getsetattr
 	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
+	.long sys_ni_syscall		/* for sys_vserver */
 	.long sys_add_key
-	.long sys_request_key	/* 280 */
+	.long sys_request_key		/* 280 */
 	.long sys_keyctl
 	.long sys_ioprio_set
 	.long sys_ioprio_get
@@ -363,7 +366,3 @@ ENTRY(sys_call_table)
 	.long sys_clock_adjtime
 	.long sys_syncfs
 
-	.rept NR_syscalls-(.-sys_call_table)/4
-		.long sys_ni_syscall
-	.endr
-
-- 
cgit v1.2.3


From d6d42bb2f85d875dc0c421699de5a1401b2af6a6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 6 May 2011 20:57:11 +0200
Subject: m68k: Really wire up sys_pselect6 and sys_ppoll

We reserved the numbers a long time ago, but never wired them up in the
syscall table as they need TIF_RESTORE_SIGMASK, which we only got last year
in commit cb6831d5d3099e772a510eb3e1ed0760ccffb45e ("m68k: Switch to saner
sigsuspend()")

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: stable@kernel.org
---
 arch/m68k/kernel/syscalltable.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 0284192f84f6..5909e392cb1e 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -322,8 +322,8 @@ ENTRY(sys_call_table)
 	.long sys_readlinkat
 	.long sys_fchmodat
 	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
+	.long sys_pselect6
+	.long sys_ppoll
 	.long sys_unshare
 	.long sys_set_robust_list
 	.long sys_get_robust_list	/* 305 */
-- 
cgit v1.2.3


From 1fc74ac61229edfe053fb87e8939ae9ca3794389 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 5 May 2011 20:33:02 +0200
Subject: m68k: unistd - Comment out definitions for unimplemented syscalls

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/unistd.h | 46 ++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 29e17907d9f2..f3b649de2a1b 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -22,7 +22,7 @@
 #define __NR_mknod		 14
 #define __NR_chmod		 15
 #define __NR_chown		 16
-#define __NR_break		 17
+/*#define __NR_break		 17*/
 #define __NR_oldstat		 18
 #define __NR_lseek		 19
 #define __NR_getpid		 20
@@ -36,11 +36,11 @@
 #define __NR_oldfstat		 28
 #define __NR_pause		 29
 #define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
+/*#define __NR_stty		 31*/
+/*#define __NR_gtty		 32*/
 #define __NR_access		 33
 #define __NR_nice		 34
-#define __NR_ftime		 35
+/*#define __NR_ftime		 35*/
 #define __NR_sync		 36
 #define __NR_kill		 37
 #define __NR_rename		 38
@@ -49,7 +49,7 @@
 #define __NR_dup		 41
 #define __NR_pipe		 42
 #define __NR_times		 43
-#define __NR_prof		 44
+/*#define __NR_prof		 44*/
 #define __NR_brk		 45
 #define __NR_setgid		 46
 #define __NR_getgid		 47
@@ -58,13 +58,13 @@
 #define __NR_getegid		 50
 #define __NR_acct		 51
 #define __NR_umount2		 52
-#define __NR_lock		 53
+/*#define __NR_lock		 53*/
 #define __NR_ioctl		 54
 #define __NR_fcntl		 55
-#define __NR_mpx		 56
+/*#define __NR_mpx		 56*/
 #define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
+/*#define __NR_ulimit		 58*/
+/*#define __NR_oldolduname	 59*/
 #define __NR_umask		 60
 #define __NR_chroot		 61
 #define __NR_ustat		 62
@@ -103,10 +103,10 @@
 #define __NR_fchown		 95
 #define __NR_getpriority	 96
 #define __NR_setpriority	 97
-#define __NR_profil		 98
+/*#define __NR_profil		 98*/
 #define __NR_statfs		 99
 #define __NR_fstatfs		100
-#define __NR_ioperm		101
+/*#define __NR_ioperm		101*/
 #define __NR_socketcall		102
 #define __NR_syslog		103
 #define __NR_setitimer		104
@@ -114,11 +114,11 @@
 #define __NR_stat		106
 #define __NR_lstat		107
 #define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		/* 110 */ not supported
+/*#define __NR_olduname		109*/
+/*#define __NR_iopl		110*/ /* not supported */
 #define __NR_vhangup		111
-#define __NR_idle		/* 112 */ Obsolete
-#define __NR_vm86		/* 113 */ not supported
+/*#define __NR_idle		112*/ /* Obsolete */
+/*#define __NR_vm86		113*/ /* not supported */
 #define __NR_wait4		114
 #define __NR_swapoff		115
 #define __NR_sysinfo		116
@@ -132,17 +132,17 @@
 #define __NR_adjtimex		124
 #define __NR_mprotect		125
 #define __NR_sigprocmask	126
-#define __NR_create_module	127
+/*#define __NR_create_module	127*/
 #define __NR_init_module	128
 #define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
+/*#define __NR_get_kernel_syms	130*/
 #define __NR_quotactl		131
 #define __NR_getpgid		132
 #define __NR_fchdir		133
 #define __NR_bdflush		134
 #define __NR_sysfs		135
 #define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
+/*#define __NR_afs_syscall	137*/ /* Syscall for Andrew File System */
 #define __NR_setfsuid		138
 #define __NR_setfsgid		139
 #define __NR__llseek		140
@@ -172,7 +172,7 @@
 #define __NR_setresuid		164
 #define __NR_getresuid		165
 #define __NR_getpagesize	166
-#define __NR_query_module	167
+/*#define __NR_query_module	167*/
 #define __NR_poll		168
 #define __NR_nfsservctl		169
 #define __NR_setresgid		170
@@ -193,8 +193,8 @@
 #define __NR_capset		185
 #define __NR_sigaltstack	186
 #define __NR_sendfile		187
-#define __NR_getpmsg		188	/* some people actually want streams */
-#define __NR_putpmsg		189	/* some people actually want streams */
+/*#define __NR_getpmsg		188*/	/* some people actually want streams */
+/*#define __NR_putpmsg		189*/	/* some people actually want streams */
 #define __NR_vfork		190
 #define __NR_ugetrlimit		191
 #define __NR_mmap2		192
@@ -223,6 +223,8 @@
 #define __NR_setfsuid32		215
 #define __NR_setfsgid32		216
 #define __NR_pivot_root		217
+/* 218*/
+/* 219*/
 #define __NR_getdents64		220
 #define __NR_gettid		221
 #define __NR_tkill		222
@@ -281,7 +283,7 @@
 #define __NR_mq_notify		275
 #define __NR_mq_getsetattr	276
 #define __NR_waitid		277
-#define __NR_vserver		278
+/*#define __NR_vserver		278*/
 #define __NR_add_key		279
 #define __NR_request_key	280
 #define __NR_keyctl		281
-- 
cgit v1.2.3


From 79abeed6ee93231d494c191a9251c0845bd71fdd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 4 May 2011 14:55:41 +0200
Subject: m68k/atari: Do not use "/" in interrupt names

It may trigger a warning in fs/proc/generic.c:__xlate_proc_name() when
trying to add an entry for the interrupt handler to sysfs.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/atari/atakeyb.c | 2 +-
 arch/m68k/atari/stdma.c   | 2 +-
 drivers/net/atarilance.c  | 2 +-
 drivers/video/atafb.c     | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index b995513d527f..db26f0e926a4 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -575,7 +575,7 @@ int atari_keyb_init(void)
 	kb_state.len = 0;
 
 	error = request_irq(IRQ_MFP_ACIA, atari_keyboard_interrupt,
-			    IRQ_TYPE_SLOW, "keyboard/mouse/MIDI",
+			    IRQ_TYPE_SLOW, "keyboard,mouse,MIDI",
 			    atari_keyboard_interrupt);
 	if (error)
 		return error;
diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c
index 604329fafbb8..ddbf43ca8858 100644
--- a/arch/m68k/atari/stdma.c
+++ b/arch/m68k/atari/stdma.c
@@ -180,7 +180,7 @@ void __init stdma_init(void)
 {
 	stdma_isr = NULL;
 	if (request_irq(IRQ_MFP_FDC, stdma_int, IRQ_TYPE_SLOW | IRQF_SHARED,
-			"ST-DMA: floppy/ACSI/IDE/Falcon-SCSI", stdma_int))
+			"ST-DMA floppy,ACSI,IDE,Falcon-SCSI", stdma_int))
 		pr_err("Couldn't register ST-DMA interrupt\n");
 }
 
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index ce0091eb06f5..1264d781b554 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -554,7 +554,7 @@ static unsigned long __init lance_probe1( struct net_device *dev,
 		memaddr == (unsigned short *)0xffe00000) {
 		/* PAMs card and Riebl on ST use level 5 autovector */
 		if (request_irq(IRQ_AUTO_5, lance_interrupt, IRQ_TYPE_PRIO,
-		            "PAM/Riebl-ST Ethernet", dev)) {
+		            "PAM,Riebl-ST Ethernet", dev)) {
 			printk( "Lance: request for irq %d failed\n", IRQ_AUTO_5 );
 			return 0;
 		}
diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index 5b2b5ef4edba..64e41f5448c4 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -3117,7 +3117,7 @@ int __init atafb_init(void)
 			atafb_ops.fb_setcolreg = &falcon_setcolreg;
 			error = request_irq(IRQ_AUTO_4, falcon_vbl_switcher,
 					    IRQ_TYPE_PRIO,
-					    "framebuffer/modeswitch",
+					    "framebuffer:modeswitch",
 					    falcon_vbl_switcher);
 			if (error)
 				return error;
-- 
cgit v1.2.3


From 7786908c3c1bb38dcc5cd2c037251c05507eef16 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitz@biophys.uni-duesseldorf.de>
Date: Tue, 16 Dec 2008 21:26:03 +0100
Subject: input/atari: Use the correct mouse interrupt hook

The Atari keyboard driver calls atari_mouse_interrupt_hook if it's set, not
atari_input_mouse_interrupt_hook. Fix below.

[geert] Killed off atari_mouse_interrupt_hook completely, after fixing another
incorrect assignment in atarimouse.c.

Signed-off-by: Michael Schmitz <schmitz@debian.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/atari/atakeyb.c        | 7 ++-----
 arch/m68k/include/asm/atarikb.h  | 2 --
 drivers/input/mouse/atarimouse.c | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index db26f0e926a4..95022b04b62d 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -36,13 +36,10 @@
 
 /* Hook for MIDI serial driver */
 void (*atari_MIDI_interrupt_hook) (void);
-/* Hook for mouse driver */
-void (*atari_mouse_interrupt_hook) (char *);
 /* Hook for keyboard inputdev  driver */
 void (*atari_input_keyboard_interrupt_hook) (unsigned char, char);
 /* Hook for mouse inputdev  driver */
 void (*atari_input_mouse_interrupt_hook) (char *);
-EXPORT_SYMBOL(atari_mouse_interrupt_hook);
 EXPORT_SYMBOL(atari_input_keyboard_interrupt_hook);
 EXPORT_SYMBOL(atari_input_mouse_interrupt_hook);
 
@@ -263,8 +260,8 @@ repeat:
 			kb_state.buf[kb_state.len++] = scancode;
 			if (kb_state.len == 3) {
 				kb_state.state = KEYBOARD;
-				if (atari_mouse_interrupt_hook)
-					atari_mouse_interrupt_hook(kb_state.buf);
+				if (atari_input_mouse_interrupt_hook)
+					atari_input_mouse_interrupt_hook(kb_state.buf);
 			}
 			break;
 
diff --git a/arch/m68k/include/asm/atarikb.h b/arch/m68k/include/asm/atarikb.h
index 546e7da5804f..68f3622bf591 100644
--- a/arch/m68k/include/asm/atarikb.h
+++ b/arch/m68k/include/asm/atarikb.h
@@ -34,8 +34,6 @@ void ikbd_joystick_disable(void);
 
 /* Hook for MIDI serial driver */
 extern void (*atari_MIDI_interrupt_hook) (void);
-/* Hook for mouse driver */
-extern void (*atari_mouse_interrupt_hook) (char *);
 /* Hook for keyboard inputdev  driver */
 extern void (*atari_input_keyboard_interrupt_hook) (unsigned char, char);
 /* Hook for mouse inputdev  driver */
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index adf45b3040e9..a57143ce80b5 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -108,7 +108,7 @@ static int atamouse_open(struct input_dev *dev)
 static void atamouse_close(struct input_dev *dev)
 {
 	ikbd_mouse_disable();
-	atari_mouse_interrupt_hook = NULL;
+	atari_input_mouse_interrupt_hook = NULL;
 }
 
 static int __init atamouse_init(void)
-- 
cgit v1.2.3


From bb0a56ecc4ba2a3db1b6ea6949c309886e3447d3 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 19 May 2011 18:51:07 -0400
Subject: [CPUFREQ] Move x86 drivers to drivers/cpufreq/

Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/Kconfig                                 |    2 +-
 arch/x86/kernel/cpu/Makefile                     |    1 -
 arch/x86/kernel/cpu/cpufreq/Kconfig              |  266 ----
 arch/x86/kernel/cpu/cpufreq/Makefile             |   21 -
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c       |  773 -----------
 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c    |  444 ------
 arch/x86/kernel/cpu/cpufreq/e_powersaver.c       |  367 -----
 arch/x86/kernel/cpu/cpufreq/elanfreq.c           |  309 -----
 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c         |  514 -------
 arch/x86/kernel/cpu/cpufreq/longhaul.c           | 1024 --------------
 arch/x86/kernel/cpu/cpufreq/longhaul.h           |  353 -----
 arch/x86/kernel/cpu/cpufreq/longrun.c            |  324 -----
 arch/x86/kernel/cpu/cpufreq/mperf.c              |   51 -
 arch/x86/kernel/cpu/cpufreq/mperf.h              |    9 -
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c        |  329 -----
 arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c        |  621 ---------
 arch/x86/kernel/cpu/cpufreq/powernow-k6.c        |  261 ----
 arch/x86/kernel/cpu/cpufreq/powernow-k7.c        |  747 ----------
 arch/x86/kernel/cpu/cpufreq/powernow-k7.h        |   43 -
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c        | 1607 ----------------------
 arch/x86/kernel/cpu/cpufreq/powernow-k8.h        |  222 ---
 arch/x86/kernel/cpu/cpufreq/sc520_freq.c         |  192 ---
 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c |  633 ---------
 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c      |  448 ------
 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c      |  478 -------
 arch/x86/kernel/cpu/cpufreq/speedstep-lib.h      |   49 -
 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c      |  464 -------
 drivers/cpufreq/Kconfig                          |   10 +-
 drivers/cpufreq/Kconfig.x86                      |  255 ++++
 drivers/cpufreq/Makefile                         |   26 +
 drivers/cpufreq/acpi-cpufreq.c                   |  773 +++++++++++
 drivers/cpufreq/cpufreq-nforce2.c                |  444 ++++++
 drivers/cpufreq/e_powersaver.c                   |  367 +++++
 drivers/cpufreq/elanfreq.c                       |  309 +++++
 drivers/cpufreq/gx-suspmod.c                     |  514 +++++++
 drivers/cpufreq/longhaul.c                       | 1024 ++++++++++++++
 drivers/cpufreq/longhaul.h                       |  353 +++++
 drivers/cpufreq/longrun.c                        |  324 +++++
 drivers/cpufreq/mperf.c                          |   51 +
 drivers/cpufreq/mperf.h                          |    9 +
 drivers/cpufreq/p4-clockmod.c                    |  329 +++++
 drivers/cpufreq/pcc-cpufreq.c                    |  621 +++++++++
 drivers/cpufreq/powernow-k6.c                    |  261 ++++
 drivers/cpufreq/powernow-k7.c                    |  747 ++++++++++
 drivers/cpufreq/powernow-k7.h                    |   43 +
 drivers/cpufreq/powernow-k8.c                    | 1607 ++++++++++++++++++++++
 drivers/cpufreq/powernow-k8.h                    |  222 +++
 drivers/cpufreq/sc520_freq.c                     |  192 +++
 drivers/cpufreq/speedstep-centrino.c             |  633 +++++++++
 drivers/cpufreq/speedstep-ich.c                  |  448 ++++++
 drivers/cpufreq/speedstep-lib.c                  |  478 +++++++
 drivers/cpufreq/speedstep-lib.h                  |   49 +
 drivers/cpufreq/speedstep-smi.c                  |  464 +++++++
 53 files changed, 10553 insertions(+), 10552 deletions(-)
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/Kconfig
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/Makefile
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/e_powersaver.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/elanfreq.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/longhaul.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/longhaul.h
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/longrun.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/mperf.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/mperf.h
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k6.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k7.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k7.h
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k8.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k8.h
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/sc520_freq.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
 delete mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
 create mode 100644 drivers/cpufreq/Kconfig.x86
 create mode 100644 drivers/cpufreq/acpi-cpufreq.c
 create mode 100644 drivers/cpufreq/cpufreq-nforce2.c
 create mode 100644 drivers/cpufreq/e_powersaver.c
 create mode 100644 drivers/cpufreq/elanfreq.c
 create mode 100644 drivers/cpufreq/gx-suspmod.c
 create mode 100644 drivers/cpufreq/longhaul.c
 create mode 100644 drivers/cpufreq/longhaul.h
 create mode 100644 drivers/cpufreq/longrun.c
 create mode 100644 drivers/cpufreq/mperf.c
 create mode 100644 drivers/cpufreq/mperf.h
 create mode 100644 drivers/cpufreq/p4-clockmod.c
 create mode 100644 drivers/cpufreq/pcc-cpufreq.c
 create mode 100644 drivers/cpufreq/powernow-k6.c
 create mode 100644 drivers/cpufreq/powernow-k7.c
 create mode 100644 drivers/cpufreq/powernow-k7.h
 create mode 100644 drivers/cpufreq/powernow-k8.c
 create mode 100644 drivers/cpufreq/powernow-k8.h
 create mode 100644 drivers/cpufreq/sc520_freq.c
 create mode 100644 drivers/cpufreq/speedstep-centrino.c
 create mode 100644 drivers/cpufreq/speedstep-ich.c
 create mode 100644 drivers/cpufreq/speedstep-lib.c
 create mode 100644 drivers/cpufreq/speedstep-lib.h
 create mode 100644 drivers/cpufreq/speedstep-smi.c

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc6c53a95bfd..e7f94a52a5d3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1848,7 +1848,7 @@ config APM_ALLOW_INTS
 
 endif # APM
 
-source "arch/x86/kernel/cpu/cpufreq/Kconfig"
+source "drivers/cpufreq/Kconfig"
 
 source "drivers/cpuidle/Kconfig"
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3f0ebe429a01..6042981d0309 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
-obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
deleted file mode 100644
index 870e6cc6ad28..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ /dev/null
@@ -1,266 +0,0 @@
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config X86_PCC_CPUFREQ
-	tristate "Processor Clocking Control interface driver"
-	depends on ACPI && ACPI_PROCESSOR
-	help
-	  This driver adds support for the PCC interface.
-
-	  For details, take a look at:
-	  <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called pcc-cpufreq.
-
-	  If in doubt, say N.
-
-config X86_ACPI_CPUFREQ
-	tristate "ACPI Processor P-States driver"
-	select CPU_FREQ_TABLE
-	depends on ACPI_PROCESSOR
-	help
-	  This driver adds a CPUFreq driver which utilizes the ACPI
-	  Processor Performance States.
-	  This driver also supports Intel Enhanced Speedstep.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called acpi-cpufreq.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config ELAN_CPUFREQ
-	tristate "AMD Elan SC400 and SC410"
-	select CPU_FREQ_TABLE
-	depends on X86_ELAN
-	---help---
-	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
-	  processors.
-
-	  You need to specify the processor maximum speed as boot
-	  parameter: elanfreq=maxspeed (in kHz) or as module
-	  parameter "max_freq".
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config SC520_CPUFREQ
-	tristate "AMD Elan SC520"
-	select CPU_FREQ_TABLE
-	depends on X86_ELAN
-	---help---
-	  This adds the CPUFreq driver for AMD Elan SC520 processor.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-
-config X86_POWERNOW_K6
-	tristate "AMD Mobile K6-2/K6-3 PowerNow!"
-	select CPU_FREQ_TABLE
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
-	  AMD K6-3+ processors.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_POWERNOW_K7
-	tristate "AMD Mobile Athlon/Duron PowerNow!"
-	select CPU_FREQ_TABLE
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for mobile AMD K7 mobile processors.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_POWERNOW_K7_ACPI
-	bool
-	depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
-	depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
-	depends on X86_32
-	default y
-
-config X86_POWERNOW_K8
-	tristate "AMD Opteron/Athlon64 PowerNow!"
-	select CPU_FREQ_TABLE
-	depends on ACPI && ACPI_PROCESSOR
-	help
-	  This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called powernow-k8.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-config X86_GX_SUSPMOD
-	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
-	depends on X86_32 && PCI
-	help
-	 This add the CPUFreq driver for NatSemi Geode processors which
-	 support suspend modulation.
-
-	 For details, take a look at <file:Documentation/cpu-freq/>.
-
-	 If in doubt, say N.
-
-config X86_SPEEDSTEP_CENTRINO
-	tristate "Intel Enhanced SpeedStep (deprecated)"
-	select CPU_FREQ_TABLE
-	select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32
-	depends on X86_32 || (X86_64 && ACPI_PROCESSOR)
-	help
-	  This is deprecated and this functionality is now merged into
-	  acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
-	  speedstep_centrino.
-	  This adds the CPUFreq driver for Enhanced SpeedStep enabled
-	  mobile CPUs.  This means Intel Pentium M (Centrino) CPUs
-	  or 64bit enabled Intel Xeons.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called speedstep-centrino.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_SPEEDSTEP_CENTRINO_TABLE
-	bool "Built-in tables for Banias CPUs"
-	depends on X86_32 && X86_SPEEDSTEP_CENTRINO
-	default y
-	help
-	  Use built-in tables for Banias CPUs if ACPI encoding
-	  is not available.
-
-	  If in doubt, say N.
-
-config X86_SPEEDSTEP_ICH
-	tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
-	select CPU_FREQ_TABLE
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for certain mobile Intel Pentium III
-	  (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
-	  mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
-	  ICH3 or ICH4 southbridge.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_SPEEDSTEP_SMI
-	tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
-	select CPU_FREQ_TABLE
-	depends on X86_32 && EXPERIMENTAL
-	help
-	  This adds the CPUFreq driver for certain mobile Intel Pentium III
-	  (Coppermine), all mobile Intel Pentium III-M (Tualatin)
-	  on systems which have an Intel 440BX/ZX/MX southbridge.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_P4_CLOCKMOD
-	tristate "Intel Pentium 4 clock modulation"
-	select CPU_FREQ_TABLE
-	help
-	  This adds the CPUFreq driver for Intel Pentium 4 / XEON
-	  processors.  When enabled it will lower CPU temperature by skipping
-	  clocks.
-
-	  This driver should be only used in exceptional
-	  circumstances when very low power is needed because it causes severe
-	  slowdowns and noticeable latencies.  Normally Speedstep should be used
-	  instead.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called p4-clockmod.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  Unless you are absolutely sure say N.
-
-config X86_CPUFREQ_NFORCE2
-	tristate "nVidia nForce2 FSB changing"
-	depends on X86_32 && EXPERIMENTAL
-	help
-	  This adds the CPUFreq driver for FSB changing on nVidia nForce2
-	  platforms.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_LONGRUN
-	tristate "Transmeta LongRun"
-	depends on X86_32
-	help
-	  This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
-	  which support LongRun.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_LONGHAUL
-	tristate "VIA Cyrix III Longhaul"
-	select CPU_FREQ_TABLE
-	depends on X86_32 && ACPI_PROCESSOR
-	help
-	  This adds the CPUFreq driver for VIA Samuel/CyrixIII,
-	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
-	  processors.
-
-	  For details, take a look at <file:Documentation/cpu-freq/>.
-
-	  If in doubt, say N.
-
-config X86_E_POWERSAVER
-	tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
-	select CPU_FREQ_TABLE
-	depends on X86_32 && EXPERIMENTAL
-	help
-	  This adds the CPUFreq driver for VIA C7 processors.  However, this driver
-	  does not have any safeguards to prevent operating the CPU out of spec
-	  and is thus considered dangerous.  Please use the regular ACPI cpufreq
-	  driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
-
-	  If in doubt, say N.
-
-comment "shared options"
-
-config X86_SPEEDSTEP_LIB
-	tristate
-	default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
-
-config X86_SPEEDSTEP_RELAXED_CAP_CHECK
-	bool "Relaxed speedstep capability checks"
-	depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
-	help
-	  Don't perform all checks for a speedstep capable system which would
-	  normally be done. Some ancient or strange systems, though speedstep
-	  capable, don't always indicate that they are speedstep capable. This
-	  option lets the probing code bypass some of those checks if the
-	  parameter "relaxed_check=1" is passed to the module.
-
-endif	# CPU_FREQ
-
-endmenu
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
deleted file mode 100644
index bd54bf67e6fb..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
-# K8 systems. ACPI is preferred to all other hardware-specific drivers.
-# speedstep-* is preferred over p4-clockmod.
-
-obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o mperf.o
-obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o mperf.o
-obj-$(CONFIG_X86_PCC_CPUFREQ)		+= pcc-cpufreq.o
-obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
-obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
-obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
-obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
-obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
-obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
-obj-$(CONFIG_X86_LONGRUN)		+= longrun.o  
-obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
-obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
-obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
-obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
-obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
-obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
-obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
deleted file mode 100644
index 4e04e1274388..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ /dev/null
@@ -1,773 +0,0 @@
-/*
- * acpi-cpufreq.c - ACPI Processor P-States Driver
- *
- *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
- *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
- *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
- *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or (at
- *  your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/compiler.h>
-#include <linux/dmi.h>
-#include <linux/slab.h>
-
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/uaccess.h>
-
-#include <acpi/processor.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-#include "mperf.h"
-
-MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
-MODULE_DESCRIPTION("ACPI Processor P-States Driver");
-MODULE_LICENSE("GPL");
-
-enum {
-	UNDEFINED_CAPABLE = 0,
-	SYSTEM_INTEL_MSR_CAPABLE,
-	SYSTEM_IO_CAPABLE,
-};
-
-#define INTEL_MSR_RANGE		(0xffff)
-
-struct acpi_cpufreq_data {
-	struct acpi_processor_performance *acpi_data;
-	struct cpufreq_frequency_table *freq_table;
-	unsigned int resume;
-	unsigned int cpu_feature;
-};
-
-static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
-
-/* acpi_perf_data is a pointer to percpu data. */
-static struct acpi_processor_performance __percpu *acpi_perf_data;
-
-static struct cpufreq_driver acpi_cpufreq_driver;
-
-static unsigned int acpi_pstate_strict;
-
-static int check_est_cpu(unsigned int cpuid)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
-
-	return cpu_has(cpu, X86_FEATURE_EST);
-}
-
-static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
-{
-	struct acpi_processor_performance *perf;
-	int i;
-
-	perf = data->acpi_data;
-
-	for (i = 0; i < perf->state_count; i++) {
-		if (value == perf->states[i].status)
-			return data->freq_table[i].frequency;
-	}
-	return 0;
-}
-
-static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
-{
-	int i;
-	struct acpi_processor_performance *perf;
-
-	msr &= INTEL_MSR_RANGE;
-	perf = data->acpi_data;
-
-	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		if (msr == perf->states[data->freq_table[i].index].status)
-			return data->freq_table[i].frequency;
-	}
-	return data->freq_table[0].frequency;
-}
-
-static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
-{
-	switch (data->cpu_feature) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		return extract_msr(val, data);
-	case SYSTEM_IO_CAPABLE:
-		return extract_io(val, data);
-	default:
-		return 0;
-	}
-}
-
-struct msr_addr {
-	u32 reg;
-};
-
-struct io_addr {
-	u16 port;
-	u8 bit_width;
-};
-
-struct drv_cmd {
-	unsigned int type;
-	const struct cpumask *mask;
-	union {
-		struct msr_addr msr;
-		struct io_addr io;
-	} addr;
-	u32 val;
-};
-
-/* Called via smp_call_function_single(), on the target CPU */
-static void do_drv_read(void *_cmd)
-{
-	struct drv_cmd *cmd = _cmd;
-	u32 h;
-
-	switch (cmd->type) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		rdmsr(cmd->addr.msr.reg, cmd->val, h);
-		break;
-	case SYSTEM_IO_CAPABLE:
-		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
-				&cmd->val,
-				(u32)cmd->addr.io.bit_width);
-		break;
-	default:
-		break;
-	}
-}
-
-/* Called via smp_call_function_many(), on the target CPUs */
-static void do_drv_write(void *_cmd)
-{
-	struct drv_cmd *cmd = _cmd;
-	u32 lo, hi;
-
-	switch (cmd->type) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		rdmsr(cmd->addr.msr.reg, lo, hi);
-		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
-		wrmsr(cmd->addr.msr.reg, lo, hi);
-		break;
-	case SYSTEM_IO_CAPABLE:
-		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
-				cmd->val,
-				(u32)cmd->addr.io.bit_width);
-		break;
-	default:
-		break;
-	}
-}
-
-static void drv_read(struct drv_cmd *cmd)
-{
-	int err;
-	cmd->val = 0;
-
-	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
-	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
-}
-
-static void drv_write(struct drv_cmd *cmd)
-{
-	int this_cpu;
-
-	this_cpu = get_cpu();
-	if (cpumask_test_cpu(this_cpu, cmd->mask))
-		do_drv_write(cmd);
-	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
-	put_cpu();
-}
-
-static u32 get_cur_val(const struct cpumask *mask)
-{
-	struct acpi_processor_performance *perf;
-	struct drv_cmd cmd;
-
-	if (unlikely(cpumask_empty(mask)))
-		return 0;
-
-	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
-		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
-		break;
-	case SYSTEM_IO_CAPABLE:
-		cmd.type = SYSTEM_IO_CAPABLE;
-		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
-		cmd.addr.io.port = perf->control_register.address;
-		cmd.addr.io.bit_width = perf->control_register.bit_width;
-		break;
-	default:
-		return 0;
-	}
-
-	cmd.mask = mask;
-	drv_read(&cmd);
-
-	pr_debug("get_cur_val = %u\n", cmd.val);
-
-	return cmd.val;
-}
-
-static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
-	unsigned int freq;
-	unsigned int cached_freq;
-
-	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
-
-	if (unlikely(data == NULL ||
-		     data->acpi_data == NULL || data->freq_table == NULL)) {
-		return 0;
-	}
-
-	cached_freq = data->freq_table[data->acpi_data->state].frequency;
-	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
-	if (freq != cached_freq) {
-		/*
-		 * The dreaded BIOS frequency change behind our back.
-		 * Force set the frequency on next target call.
-		 */
-		data->resume = 1;
-	}
-
-	pr_debug("cur freq = %u\n", freq);
-
-	return freq;
-}
-
-static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
-				struct acpi_cpufreq_data *data)
-{
-	unsigned int cur_freq;
-	unsigned int i;
-
-	for (i = 0; i < 100; i++) {
-		cur_freq = extract_freq(get_cur_val(mask), data);
-		if (cur_freq == freq)
-			return 1;
-		udelay(10);
-	}
-	return 0;
-}
-
-static int acpi_cpufreq_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq, unsigned int relation)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-	struct acpi_processor_performance *perf;
-	struct cpufreq_freqs freqs;
-	struct drv_cmd cmd;
-	unsigned int next_state = 0; /* Index into freq_table */
-	unsigned int next_perf_state = 0; /* Index into perf table */
-	unsigned int i;
-	int result = 0;
-
-	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
-
-	if (unlikely(data == NULL ||
-	     data->acpi_data == NULL || data->freq_table == NULL)) {
-		return -ENODEV;
-	}
-
-	perf = data->acpi_data;
-	result = cpufreq_frequency_table_target(policy,
-						data->freq_table,
-						target_freq,
-						relation, &next_state);
-	if (unlikely(result)) {
-		result = -ENODEV;
-		goto out;
-	}
-
-	next_perf_state = data->freq_table[next_state].index;
-	if (perf->state == next_perf_state) {
-		if (unlikely(data->resume)) {
-			pr_debug("Called after resume, resetting to P%d\n",
-				next_perf_state);
-			data->resume = 0;
-		} else {
-			pr_debug("Already at target state (P%d)\n",
-				next_perf_state);
-			goto out;
-		}
-	}
-
-	switch (data->cpu_feature) {
-	case SYSTEM_INTEL_MSR_CAPABLE:
-		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
-		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
-		cmd.val = (u32) perf->states[next_perf_state].control;
-		break;
-	case SYSTEM_IO_CAPABLE:
-		cmd.type = SYSTEM_IO_CAPABLE;
-		cmd.addr.io.port = perf->control_register.address;
-		cmd.addr.io.bit_width = perf->control_register.bit_width;
-		cmd.val = (u32) perf->states[next_perf_state].control;
-		break;
-	default:
-		result = -ENODEV;
-		goto out;
-	}
-
-	/* cpufreq holds the hotplug lock, so we are safe from here on */
-	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
-		cmd.mask = policy->cpus;
-	else
-		cmd.mask = cpumask_of(policy->cpu);
-
-	freqs.old = perf->states[perf->state].core_frequency * 1000;
-	freqs.new = data->freq_table[next_state].frequency;
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	drv_write(&cmd);
-
-	if (acpi_pstate_strict) {
-		if (!check_freqs(cmd.mask, freqs.new, data)) {
-			pr_debug("acpi_cpufreq_target failed (%d)\n",
-				policy->cpu);
-			result = -EAGAIN;
-			goto out;
-		}
-	}
-
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-	perf->state = next_perf_state;
-
-out:
-	return result;
-}
-
-static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-
-	pr_debug("acpi_cpufreq_verify\n");
-
-	return cpufreq_frequency_table_verify(policy, data->freq_table);
-}
-
-static unsigned long
-acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
-{
-	struct acpi_processor_performance *perf = data->acpi_data;
-
-	if (cpu_khz) {
-		/* search the closest match to cpu_khz */
-		unsigned int i;
-		unsigned long freq;
-		unsigned long freqn = perf->states[0].core_frequency * 1000;
-
-		for (i = 0; i < (perf->state_count-1); i++) {
-			freq = freqn;
-			freqn = perf->states[i+1].core_frequency * 1000;
-			if ((2 * cpu_khz) > (freqn + freq)) {
-				perf->state = i;
-				return freq;
-			}
-		}
-		perf->state = perf->state_count-1;
-		return freqn;
-	} else {
-		/* assume CPU is at P0... */
-		perf->state = 0;
-		return perf->states[0].core_frequency * 1000;
-	}
-}
-
-static void free_acpi_perf_data(void)
-{
-	unsigned int i;
-
-	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
-	for_each_possible_cpu(i)
-		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
-				 ->shared_cpu_map);
-	free_percpu(acpi_perf_data);
-}
-
-/*
- * acpi_cpufreq_early_init - initialize ACPI P-States library
- *
- * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
- * in order to determine correct frequency and voltage pairings. We can
- * do _PDC and _PSD and find out the processor dependency for the
- * actual init that will happen later...
- */
-static int __init acpi_cpufreq_early_init(void)
-{
-	unsigned int i;
-	pr_debug("acpi_cpufreq_early_init\n");
-
-	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
-	if (!acpi_perf_data) {
-		pr_debug("Memory allocation error for acpi_perf_data.\n");
-		return -ENOMEM;
-	}
-	for_each_possible_cpu(i) {
-		if (!zalloc_cpumask_var_node(
-			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
-			GFP_KERNEL, cpu_to_node(i))) {
-
-			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
-			free_acpi_perf_data();
-			return -ENOMEM;
-		}
-	}
-
-	/* Do initialization in ACPI core */
-	acpi_processor_preregister_performance(acpi_perf_data);
-	return 0;
-}
-
-#ifdef CONFIG_SMP
-/*
- * Some BIOSes do SW_ANY coordination internally, either set it up in hw
- * or do it in BIOS firmware and won't inform about it to OS. If not
- * detected, this has a side effect of making CPU run at a different speed
- * than OS intended it to run at. Detect it and handle it cleanly.
- */
-static int bios_with_sw_any_bug;
-
-static int sw_any_bug_found(const struct dmi_system_id *d)
-{
-	bios_with_sw_any_bug = 1;
-	return 0;
-}
-
-static const struct dmi_system_id sw_any_bug_dmi_table[] = {
-	{
-		.callback = sw_any_bug_found,
-		.ident = "Supermicro Server X6DLP",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
-			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
-		},
-	},
-	{ }
-};
-
-static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
-{
-	/* Intel Xeon Processor 7100 Series Specification Update
-	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
-	 * AL30: A Machine Check Exception (MCE) Occurring during an
-	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
-	 * Both Processor Cores to Lock Up. */
-	if (c->x86_vendor == X86_VENDOR_INTEL) {
-		if ((c->x86 == 15) &&
-		    (c->x86_model == 6) &&
-		    (c->x86_mask == 8)) {
-			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
-			    "Xeon(R) 7100 Errata AL30, processors may "
-			    "lock up on frequency changes: disabling "
-			    "acpi-cpufreq.\n");
-			return -ENODEV;
-		    }
-		}
-	return 0;
-}
-#endif
-
-static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int i;
-	unsigned int valid_states = 0;
-	unsigned int cpu = policy->cpu;
-	struct acpi_cpufreq_data *data;
-	unsigned int result = 0;
-	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
-	struct acpi_processor_performance *perf;
-#ifdef CONFIG_SMP
-	static int blacklisted;
-#endif
-
-	pr_debug("acpi_cpufreq_cpu_init\n");
-
-#ifdef CONFIG_SMP
-	if (blacklisted)
-		return blacklisted;
-	blacklisted = acpi_cpufreq_blacklist(c);
-	if (blacklisted)
-		return blacklisted;
-#endif
-
-	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
-	per_cpu(acfreq_data, cpu) = data;
-
-	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
-		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
-
-	result = acpi_processor_register_performance(data->acpi_data, cpu);
-	if (result)
-		goto err_free;
-
-	perf = data->acpi_data;
-	policy->shared_type = perf->shared_type;
-
-	/*
-	 * Will let policy->cpus know about dependency only when software
-	 * coordination is required.
-	 */
-	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
-		cpumask_copy(policy->cpus, perf->shared_cpu_map);
-	}
-	cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
-
-#ifdef CONFIG_SMP
-	dmi_check_system(sw_any_bug_dmi_table);
-	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
-		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
-	}
-#endif
-
-	/* capability check */
-	if (perf->state_count <= 1) {
-		pr_debug("No P-States\n");
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	if (perf->control_register.space_id != perf->status_register.space_id) {
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	switch (perf->control_register.space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_IO:
-		pr_debug("SYSTEM IO addr space\n");
-		data->cpu_feature = SYSTEM_IO_CAPABLE;
-		break;
-	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		pr_debug("HARDWARE addr space\n");
-		if (!check_est_cpu(cpu)) {
-			result = -ENODEV;
-			goto err_unreg;
-		}
-		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
-		break;
-	default:
-		pr_debug("Unknown addr space %d\n",
-			(u32) (perf->control_register.space_id));
-		result = -ENODEV;
-		goto err_unreg;
-	}
-
-	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
-		    (perf->state_count+1), GFP_KERNEL);
-	if (!data->freq_table) {
-		result = -ENOMEM;
-		goto err_unreg;
-	}
-
-	/* detect transition latency */
-	policy->cpuinfo.transition_latency = 0;
-	for (i = 0; i < perf->state_count; i++) {
-		if ((perf->states[i].transition_latency * 1000) >
-		    policy->cpuinfo.transition_latency)
-			policy->cpuinfo.transition_latency =
-			    perf->states[i].transition_latency * 1000;
-	}
-
-	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
-	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
-	    policy->cpuinfo.transition_latency > 20 * 1000) {
-		policy->cpuinfo.transition_latency = 20 * 1000;
-		printk_once(KERN_INFO
-			    "P-state transition latency capped at 20 uS\n");
-	}
-
-	/* table init */
-	for (i = 0; i < perf->state_count; i++) {
-		if (i > 0 && perf->states[i].core_frequency >=
-		    data->freq_table[valid_states-1].frequency / 1000)
-			continue;
-
-		data->freq_table[valid_states].index = i;
-		data->freq_table[valid_states].frequency =
-		    perf->states[i].core_frequency * 1000;
-		valid_states++;
-	}
-	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
-	perf->state = 0;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
-	if (result)
-		goto err_freqfree;
-
-	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
-		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
-
-	switch (perf->control_register.space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_IO:
-		/* Current speed is unknown and not detectable by IO port */
-		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
-		break;
-	case ACPI_ADR_SPACE_FIXED_HARDWARE:
-		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
-		policy->cur = get_cur_freq_on_cpu(cpu);
-		break;
-	default:
-		break;
-	}
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	/* Check for APERF/MPERF support in hardware */
-	if (cpu_has(c, X86_FEATURE_APERFMPERF))
-		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
-
-	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
-	for (i = 0; i < perf->state_count; i++)
-		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
-			(i == perf->state ? '*' : ' '), i,
-			(u32) perf->states[i].core_frequency,
-			(u32) perf->states[i].power,
-			(u32) perf->states[i].transition_latency);
-
-	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
-
-	/*
-	 * the first call to ->target() should result in us actually
-	 * writing something to the appropriate registers.
-	 */
-	data->resume = 1;
-
-	return result;
-
-err_freqfree:
-	kfree(data->freq_table);
-err_unreg:
-	acpi_processor_unregister_performance(perf, cpu);
-err_free:
-	kfree(data);
-	per_cpu(acfreq_data, cpu) = NULL;
-
-	return result;
-}
-
-static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-
-	pr_debug("acpi_cpufreq_cpu_exit\n");
-
-	if (data) {
-		cpufreq_frequency_table_put_attr(policy->cpu);
-		per_cpu(acfreq_data, policy->cpu) = NULL;
-		acpi_processor_unregister_performance(data->acpi_data,
-						      policy->cpu);
-		kfree(data->freq_table);
-		kfree(data);
-	}
-
-	return 0;
-}
-
-static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
-{
-	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
-
-	pr_debug("acpi_cpufreq_resume\n");
-
-	data->resume = 1;
-
-	return 0;
-}
-
-static struct freq_attr *acpi_cpufreq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver acpi_cpufreq_driver = {
-	.verify		= acpi_cpufreq_verify,
-	.target		= acpi_cpufreq_target,
-	.bios_limit	= acpi_processor_get_bios_limit,
-	.init		= acpi_cpufreq_cpu_init,
-	.exit		= acpi_cpufreq_cpu_exit,
-	.resume		= acpi_cpufreq_resume,
-	.name		= "acpi-cpufreq",
-	.owner		= THIS_MODULE,
-	.attr		= acpi_cpufreq_attr,
-};
-
-static int __init acpi_cpufreq_init(void)
-{
-	int ret;
-
-	if (acpi_disabled)
-		return 0;
-
-	pr_debug("acpi_cpufreq_init\n");
-
-	ret = acpi_cpufreq_early_init();
-	if (ret)
-		return ret;
-
-	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
-	if (ret)
-		free_acpi_perf_data();
-
-	return ret;
-}
-
-static void __exit acpi_cpufreq_exit(void)
-{
-	pr_debug("acpi_cpufreq_exit\n");
-
-	cpufreq_unregister_driver(&acpi_cpufreq_driver);
-
-	free_percpu(acpi_perf_data);
-}
-
-module_param(acpi_pstate_strict, uint, 0644);
-MODULE_PARM_DESC(acpi_pstate_strict,
-	"value 0 or non-zero. non-zero -> strict ACPI checks are "
-	"performed during frequency changes.");
-
-late_initcall(acpi_cpufreq_init);
-module_exit(acpi_cpufreq_exit);
-
-MODULE_ALIAS("acpi");
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
deleted file mode 100644
index 7bac808804f3..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ /dev/null
@@ -1,444 +0,0 @@
-/*
- * (C) 2004-2006  Sebastian Witt <se.witt@gmx.net>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon reverse engineered information
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#define NFORCE2_XTAL 25
-#define NFORCE2_BOOTFSB 0x48
-#define NFORCE2_PLLENABLE 0xa8
-#define NFORCE2_PLLREG 0xa4
-#define NFORCE2_PLLADR 0xa0
-#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
-
-#define NFORCE2_MIN_FSB 50
-#define NFORCE2_SAFE_DISTANCE 50
-
-/* Delay in ms between FSB changes */
-/* #define NFORCE2_DELAY 10 */
-
-/*
- * nforce2_chipset:
- * FSB is changed using the chipset
- */
-static struct pci_dev *nforce2_dev;
-
-/* fid:
- * multiplier * 10
- */
-static int fid;
-
-/* min_fsb, max_fsb:
- * minimum and maximum FSB (= FSB at boot time)
- */
-static int min_fsb;
-static int max_fsb;
-
-MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
-MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
-MODULE_LICENSE("GPL");
-
-module_param(fid, int, 0444);
-module_param(min_fsb, int, 0444);
-
-MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
-MODULE_PARM_DESC(min_fsb,
-		"Minimum FSB to use, if not defined: current FSB - 50");
-
-#define PFX "cpufreq-nforce2: "
-
-/**
- * nforce2_calc_fsb - calculate FSB
- * @pll: PLL value
- *
- *   Calculates FSB from PLL value
- */
-static int nforce2_calc_fsb(int pll)
-{
-	unsigned char mul, div;
-
-	mul = (pll >> 8) & 0xff;
-	div = pll & 0xff;
-
-	if (div > 0)
-		return NFORCE2_XTAL * mul / div;
-
-	return 0;
-}
-
-/**
- * nforce2_calc_pll - calculate PLL value
- * @fsb: FSB
- *
- *   Calculate PLL value for given FSB
- */
-static int nforce2_calc_pll(unsigned int fsb)
-{
-	unsigned char xmul, xdiv;
-	unsigned char mul = 0, div = 0;
-	int tried = 0;
-
-	/* Try to calculate multiplier and divider up to 4 times */
-	while (((mul == 0) || (div == 0)) && (tried <= 3)) {
-		for (xdiv = 2; xdiv <= 0x80; xdiv++)
-			for (xmul = 1; xmul <= 0xfe; xmul++)
-				if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
-				    fsb + tried) {
-					mul = xmul;
-					div = xdiv;
-				}
-		tried++;
-	}
-
-	if ((mul == 0) || (div == 0))
-		return -1;
-
-	return NFORCE2_PLL(mul, div);
-}
-
-/**
- * nforce2_write_pll - write PLL value to chipset
- * @pll: PLL value
- *
- *   Writes new FSB PLL value to chipset
- */
-static void nforce2_write_pll(int pll)
-{
-	int temp;
-
-	/* Set the pll addr. to 0x00 */
-	pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
-
-	/* Now write the value in all 64 registers */
-	for (temp = 0; temp <= 0x3f; temp++)
-		pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
-
-	return;
-}
-
-/**
- * nforce2_fsb_read - Read FSB
- *
- *   Read FSB from chipset
- *   If bootfsb != 0, return FSB at boot-time
- */
-static unsigned int nforce2_fsb_read(int bootfsb)
-{
-	struct pci_dev *nforce2_sub5;
-	u32 fsb, temp = 0;
-
-	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
-	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
-				PCI_ANY_ID, PCI_ANY_ID, NULL);
-	if (!nforce2_sub5)
-		return 0;
-
-	pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
-	fsb /= 1000000;
-
-	/* Check if PLL register is already set */
-	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
-
-	if (bootfsb || !temp)
-		return fsb;
-
-	/* Use PLL register FSB value */
-	pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
-	fsb = nforce2_calc_fsb(temp);
-
-	return fsb;
-}
-
-/**
- * nforce2_set_fsb - set new FSB
- * @fsb: New FSB
- *
- *   Sets new FSB
- */
-static int nforce2_set_fsb(unsigned int fsb)
-{
-	u32 temp = 0;
-	unsigned int tfsb;
-	int diff;
-	int pll = 0;
-
-	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
-		printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
-		return -EINVAL;
-	}
-
-	tfsb = nforce2_fsb_read(0);
-	if (!tfsb) {
-		printk(KERN_ERR PFX "Error while reading the FSB\n");
-		return -EINVAL;
-	}
-
-	/* First write? Then set actual value */
-	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
-	if (!temp) {
-		pll = nforce2_calc_pll(tfsb);
-
-		if (pll < 0)
-			return -EINVAL;
-
-		nforce2_write_pll(pll);
-	}
-
-	/* Enable write access */
-	temp = 0x01;
-	pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
-
-	diff = tfsb - fsb;
-
-	if (!diff)
-		return 0;
-
-	while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
-		if (diff < 0)
-			tfsb++;
-		else
-			tfsb--;
-
-		/* Calculate the PLL reg. value */
-		pll = nforce2_calc_pll(tfsb);
-		if (pll == -1)
-			return -EINVAL;
-
-		nforce2_write_pll(pll);
-#ifdef NFORCE2_DELAY
-		mdelay(NFORCE2_DELAY);
-#endif
-	}
-
-	temp = 0x40;
-	pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
-
-	return 0;
-}
-
-/**
- * nforce2_get - get the CPU frequency
- * @cpu: CPU number
- *
- * Returns the CPU frequency
- */
-static unsigned int nforce2_get(unsigned int cpu)
-{
-	if (cpu)
-		return 0;
-	return nforce2_fsb_read(0) * fid * 100;
-}
-
-/**
- * nforce2_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int nforce2_target(struct cpufreq_policy *policy,
-			  unsigned int target_freq, unsigned int relation)
-{
-/*        unsigned long         flags; */
-	struct cpufreq_freqs freqs;
-	unsigned int target_fsb;
-
-	if ((target_freq > policy->max) || (target_freq < policy->min))
-		return -EINVAL;
-
-	target_fsb = target_freq / (fid * 100);
-
-	freqs.old = nforce2_get(policy->cpu);
-	freqs.new = target_fsb * fid * 100;
-	freqs.cpu = 0;		/* Only one CPU on nForce2 platforms */
-
-	if (freqs.old == freqs.new)
-		return 0;
-
-	pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
-	       freqs.old, freqs.new);
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* Disable IRQs */
-	/* local_irq_save(flags); */
-
-	if (nforce2_set_fsb(target_fsb) < 0)
-		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
-			target_fsb);
-	else
-		pr_debug("Changed FSB successfully to %d\n",
-			target_fsb);
-
-	/* Enable IRQs */
-	/* local_irq_restore(flags); */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	return 0;
-}
-
-/**
- * nforce2_verify - verifies a new CPUFreq policy
- * @policy: new policy
- */
-static int nforce2_verify(struct cpufreq_policy *policy)
-{
-	unsigned int fsb_pol_max;
-
-	fsb_pol_max = policy->max / (fid * 100);
-
-	if (policy->min < (fsb_pol_max * fid * 100))
-		policy->max = (fsb_pol_max + 1) * fid * 100;
-
-	cpufreq_verify_within_limits(policy,
-				     policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
-	return 0;
-}
-
-static int nforce2_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int fsb;
-	unsigned int rfid;
-
-	/* capability check */
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* Get current FSB */
-	fsb = nforce2_fsb_read(0);
-
-	if (!fsb)
-		return -EIO;
-
-	/* FIX: Get FID from CPU */
-	if (!fid) {
-		if (!cpu_khz) {
-			printk(KERN_WARNING PFX
-			"cpu_khz not set, can't calculate multiplier!\n");
-			return -ENODEV;
-		}
-
-		fid = cpu_khz / (fsb * 100);
-		rfid = fid % 5;
-
-		if (rfid) {
-			if (rfid > 2)
-				fid += 5 - rfid;
-			else
-				fid -= rfid;
-		}
-	}
-
-	printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
-	       fid / 10, fid % 10);
-
-	/* Set maximum FSB to FSB at boot time */
-	max_fsb = nforce2_fsb_read(1);
-
-	if (!max_fsb)
-		return -EIO;
-
-	if (!min_fsb)
-		min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
-
-	if (min_fsb < NFORCE2_MIN_FSB)
-		min_fsb = NFORCE2_MIN_FSB;
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.min_freq = min_fsb * fid * 100;
-	policy->cpuinfo.max_freq = max_fsb * fid * 100;
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	policy->cur = nforce2_get(policy->cpu);
-	policy->min = policy->cpuinfo.min_freq;
-	policy->max = policy->cpuinfo.max_freq;
-
-	return 0;
-}
-
-static int nforce2_cpu_exit(struct cpufreq_policy *policy)
-{
-	return 0;
-}
-
-static struct cpufreq_driver nforce2_driver = {
-	.name = "nforce2",
-	.verify = nforce2_verify,
-	.target = nforce2_target,
-	.get = nforce2_get,
-	.init = nforce2_cpu_init,
-	.exit = nforce2_cpu_exit,
-	.owner = THIS_MODULE,
-};
-
-/**
- * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
- *
- * Detects nForce2 A2 and C1 stepping
- *
- */
-static int nforce2_detect_chipset(void)
-{
-	nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
-					PCI_DEVICE_ID_NVIDIA_NFORCE2,
-					PCI_ANY_ID, PCI_ANY_ID, NULL);
-
-	if (nforce2_dev == NULL)
-		return -ENODEV;
-
-	printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
-	       nforce2_dev->revision);
-	printk(KERN_INFO PFX
-	       "FSB changing is maybe unstable and can lead to "
-	       "crashes and data loss.\n");
-
-	return 0;
-}
-
-/**
- * nforce2_init - initializes the nForce2 CPUFreq driver
- *
- * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
- * devices, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init nforce2_init(void)
-{
-	/* TODO: do we need to detect the processor? */
-
-	/* detect chipset */
-	if (nforce2_detect_chipset()) {
-		printk(KERN_INFO PFX "No nForce2 chipset.\n");
-		return -ENODEV;
-	}
-
-	return cpufreq_register_driver(&nforce2_driver);
-}
-
-/**
- * nforce2_exit - unregisters cpufreq module
- *
- *   Unregisters nForce2 FSB change support.
- */
-static void __exit nforce2_exit(void)
-{
-	cpufreq_unregister_driver(&nforce2_driver);
-}
-
-module_init(nforce2_init);
-module_exit(nforce2_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
deleted file mode 100644
index 35a257dd4bb7..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- *  Based on documentation provided by Dave Jones. Thanks!
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/msr.h>
-#include <asm/tsc.h>
-
-#define EPS_BRAND_C7M	0
-#define EPS_BRAND_C7	1
-#define EPS_BRAND_EDEN	2
-#define EPS_BRAND_C3	3
-#define EPS_BRAND_C7D	4
-
-struct eps_cpu_data {
-	u32 fsb;
-	struct cpufreq_frequency_table freq_table[];
-};
-
-static struct eps_cpu_data *eps_cpu[NR_CPUS];
-
-
-static unsigned int eps_get(unsigned int cpu)
-{
-	struct eps_cpu_data *centaur;
-	u32 lo, hi;
-
-	if (cpu)
-		return 0;
-	centaur = eps_cpu[cpu];
-	if (centaur == NULL)
-		return 0;
-
-	/* Return current frequency */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	return centaur->fsb * ((lo >> 8) & 0xff);
-}
-
-static int eps_set_state(struct eps_cpu_data *centaur,
-			 unsigned int cpu,
-			 u32 dest_state)
-{
-	struct cpufreq_freqs freqs;
-	u32 lo, hi;
-	int err = 0;
-	int i;
-
-	freqs.old = eps_get(cpu);
-	freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
-	freqs.cpu = cpu;
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* Wait while CPU is busy */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	i = 0;
-	while (lo & ((1 << 16) | (1 << 17))) {
-		udelay(16);
-		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-		i++;
-		if (unlikely(i > 64)) {
-			err = -ENODEV;
-			goto postchange;
-		}
-	}
-	/* Set new multiplier and voltage */
-	wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
-	/* Wait until transition end */
-	i = 0;
-	do {
-		udelay(16);
-		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-		i++;
-		if (unlikely(i > 64)) {
-			err = -ENODEV;
-			goto postchange;
-		}
-	} while (lo & ((1 << 16) | (1 << 17)));
-
-	/* Return current frequency */
-postchange:
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
-
-#ifdef DEBUG
-	{
-	u8 current_multiplier, current_voltage;
-
-	/* Print voltage and multiplier */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-		current_voltage * 16 + 700);
-	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n",
-		current_multiplier);
-	}
-#endif
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	return err;
-}
-
-static int eps_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq,
-			       unsigned int relation)
-{
-	struct eps_cpu_data *centaur;
-	unsigned int newstate = 0;
-	unsigned int cpu = policy->cpu;
-	unsigned int dest_state;
-	int ret;
-
-	if (unlikely(eps_cpu[cpu] == NULL))
-		return -ENODEV;
-	centaur = eps_cpu[cpu];
-
-	if (unlikely(cpufreq_frequency_table_target(policy,
-			&eps_cpu[cpu]->freq_table[0],
-			target_freq,
-			relation,
-			&newstate))) {
-		return -EINVAL;
-	}
-
-	/* Make frequency transition */
-	dest_state = centaur->freq_table[newstate].index & 0xffff;
-	ret = eps_set_state(centaur, cpu, dest_state);
-	if (ret)
-		printk(KERN_ERR "eps: Timeout!\n");
-	return ret;
-}
-
-static int eps_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy,
-			&eps_cpu[policy->cpu]->freq_table[0]);
-}
-
-static int eps_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int i;
-	u32 lo, hi;
-	u64 val;
-	u8 current_multiplier, current_voltage;
-	u8 max_multiplier, max_voltage;
-	u8 min_multiplier, min_voltage;
-	u8 brand = 0;
-	u32 fsb;
-	struct eps_cpu_data *centaur;
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	struct cpufreq_frequency_table *f_table;
-	int k, step, voltage;
-	int ret;
-	int states;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* Check brand */
-	printk(KERN_INFO "eps: Detected VIA ");
-
-	switch (c->x86_model) {
-	case 10:
-		rdmsr(0x1153, lo, hi);
-		brand = (((lo >> 2) ^ lo) >> 18) & 3;
-		printk(KERN_CONT "Model A ");
-		break;
-	case 13:
-		rdmsr(0x1154, lo, hi);
-		brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
-		printk(KERN_CONT "Model D ");
-		break;
-	}
-
-	switch (brand) {
-	case EPS_BRAND_C7M:
-		printk(KERN_CONT "C7-M\n");
-		break;
-	case EPS_BRAND_C7:
-		printk(KERN_CONT "C7\n");
-		break;
-	case EPS_BRAND_EDEN:
-		printk(KERN_CONT "Eden\n");
-		break;
-	case EPS_BRAND_C7D:
-		printk(KERN_CONT "C7-D\n");
-		break;
-	case EPS_BRAND_C3:
-		printk(KERN_CONT "C3\n");
-		return -ENODEV;
-		break;
-	}
-	/* Enable Enhanced PowerSaver */
-	rdmsrl(MSR_IA32_MISC_ENABLE, val);
-	if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-		val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		wrmsrl(MSR_IA32_MISC_ENABLE, val);
-		/* Can be locked at 0 */
-		rdmsrl(MSR_IA32_MISC_ENABLE, val);
-		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
-			return -ENODEV;
-		}
-	}
-
-	/* Print voltage and multiplier */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-			current_voltage * 16 + 700);
-	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
-
-	/* Print limits */
-	max_voltage = hi & 0xff;
-	printk(KERN_INFO "eps: Highest voltage = %dmV\n",
-			max_voltage * 16 + 700);
-	max_multiplier = (hi >> 8) & 0xff;
-	printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
-	min_voltage = (hi >> 16) & 0xff;
-	printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
-			min_voltage * 16 + 700);
-	min_multiplier = (hi >> 24) & 0xff;
-	printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
-
-	/* Sanity checks */
-	if (current_multiplier == 0 || max_multiplier == 0
-	    || min_multiplier == 0)
-		return -EINVAL;
-	if (current_multiplier > max_multiplier
-	    || max_multiplier <= min_multiplier)
-		return -EINVAL;
-	if (current_voltage > 0x1f || max_voltage > 0x1f)
-		return -EINVAL;
-	if (max_voltage < min_voltage)
-		return -EINVAL;
-
-	/* Calc FSB speed */
-	fsb = cpu_khz / current_multiplier;
-	/* Calc number of p-states supported */
-	if (brand == EPS_BRAND_C7M)
-		states = max_multiplier - min_multiplier + 1;
-	else
-		states = 2;
-
-	/* Allocate private data and frequency table for current cpu */
-	centaur = kzalloc(sizeof(struct eps_cpu_data)
-		    + (states + 1) * sizeof(struct cpufreq_frequency_table),
-		    GFP_KERNEL);
-	if (!centaur)
-		return -ENOMEM;
-	eps_cpu[0] = centaur;
-
-	/* Copy basic values */
-	centaur->fsb = fsb;
-
-	/* Fill frequency and MSR value table */
-	f_table = &centaur->freq_table[0];
-	if (brand != EPS_BRAND_C7M) {
-		f_table[0].frequency = fsb * min_multiplier;
-		f_table[0].index = (min_multiplier << 8) | min_voltage;
-		f_table[1].frequency = fsb * max_multiplier;
-		f_table[1].index = (max_multiplier << 8) | max_voltage;
-		f_table[2].frequency = CPUFREQ_TABLE_END;
-	} else {
-		k = 0;
-		step = ((max_voltage - min_voltage) * 256)
-			/ (max_multiplier - min_multiplier);
-		for (i = min_multiplier; i <= max_multiplier; i++) {
-			voltage = (k * step) / 256 + min_voltage;
-			f_table[k].frequency = fsb * i;
-			f_table[k].index = (i << 8) | voltage;
-			k++;
-		}
-		f_table[k].frequency = CPUFREQ_TABLE_END;
-	}
-
-	policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
-	policy->cur = fsb * current_multiplier;
-
-	ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
-	if (ret) {
-		kfree(centaur);
-		return ret;
-	}
-
-	cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
-	return 0;
-}
-
-static int eps_cpu_exit(struct cpufreq_policy *policy)
-{
-	unsigned int cpu = policy->cpu;
-	struct eps_cpu_data *centaur;
-	u32 lo, hi;
-
-	if (eps_cpu[cpu] == NULL)
-		return -ENODEV;
-	centaur = eps_cpu[cpu];
-
-	/* Get max frequency */
-	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
-	/* Set max frequency */
-	eps_set_state(centaur, cpu, hi & 0xffff);
-	/* Bye */
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	kfree(eps_cpu[cpu]);
-	eps_cpu[cpu] = NULL;
-	return 0;
-}
-
-static struct freq_attr *eps_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver eps_driver = {
-	.verify		= eps_verify,
-	.target		= eps_target,
-	.init		= eps_cpu_init,
-	.exit		= eps_cpu_exit,
-	.get		= eps_get,
-	.name		= "e_powersaver",
-	.owner		= THIS_MODULE,
-	.attr		= eps_attr,
-};
-
-static int __init eps_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	/* This driver will work only on Centaur C7 processors with
-	 * Enhanced SpeedStep/PowerSaver registers */
-	if (c->x86_vendor != X86_VENDOR_CENTAUR
-	    || c->x86 != 6 || c->x86_model < 10)
-		return -ENODEV;
-	if (!cpu_has(c, X86_FEATURE_EST))
-		return -ENODEV;
-
-	if (cpufreq_register_driver(&eps_driver))
-		return -EINVAL;
-	return 0;
-}
-
-static void __exit eps_exit(void)
-{
-	cpufreq_unregister_driver(&eps_driver);
-}
-
-MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
-MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
-MODULE_LICENSE("GPL");
-
-module_init(eps_init);
-module_exit(eps_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
deleted file mode 100644
index c587db472a75..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- *	elanfreq:	cpufreq driver for the AMD ELAN family
- *
- *	(c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
- *
- *	Parts of this code are (c) Sven Geggus <sven@geggus.net>
- *
- *      All Rights Reserved.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#define REG_CSCIR 0x22		/* Chip Setup and Control Index Register    */
-#define REG_CSCDR 0x23		/* Chip Setup and Control Data  Register    */
-
-/* Module parameter */
-static int max_freq;
-
-struct s_elan_multiplier {
-	int clock;		/* frequency in kHz                         */
-	int val40h;		/* PMU Force Mode register                  */
-	int val80h;		/* CPU Clock Speed Register                 */
-};
-
-/*
- * It is important that the frequencies
- * are listed in ascending order here!
- */
-static struct s_elan_multiplier elan_multiplier[] = {
-	{1000,	0x02,	0x18},
-	{2000,	0x02,	0x10},
-	{4000,	0x02,	0x08},
-	{8000,	0x00,	0x00},
-	{16000,	0x00,	0x02},
-	{33000,	0x00,	0x04},
-	{66000,	0x01,	0x04},
-	{99000,	0x01,	0x05}
-};
-
-static struct cpufreq_frequency_table elanfreq_table[] = {
-	{0,	1000},
-	{1,	2000},
-	{2,	4000},
-	{3,	8000},
-	{4,	16000},
-	{5,	33000},
-	{6,	66000},
-	{7,	99000},
-	{0,	CPUFREQ_TABLE_END},
-};
-
-
-/**
- *	elanfreq_get_cpu_frequency: determine current cpu speed
- *
- *	Finds out at which frequency the CPU of the Elan SOC runs
- *	at the moment. Frequencies from 1 to 33 MHz are generated
- *	the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
- *	and have the rest of the chip running with 33 MHz.
- */
-
-static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
-{
-	u8 clockspeed_reg;    /* Clock Speed Register */
-
-	local_irq_disable();
-	outb_p(0x80, REG_CSCIR);
-	clockspeed_reg = inb_p(REG_CSCDR);
-	local_irq_enable();
-
-	if ((clockspeed_reg & 0xE0) == 0xE0)
-		return 0;
-
-	/* Are we in CPU clock multiplied mode (66/99 MHz)? */
-	if ((clockspeed_reg & 0xE0) == 0xC0) {
-		if ((clockspeed_reg & 0x01) == 0)
-			return 66000;
-		else
-			return 99000;
-	}
-
-	/* 33 MHz is not 32 MHz... */
-	if ((clockspeed_reg & 0xE0) == 0xA0)
-		return 33000;
-
-	return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
-}
-
-
-/**
- *	elanfreq_set_cpu_frequency: Change the CPU core frequency
- *	@cpu: cpu number
- *	@freq: frequency in kHz
- *
- *	This function takes a frequency value and changes the CPU frequency
- *	according to this. Note that the frequency has to be checked by
- *	elanfreq_validatespeed() for correctness!
- *
- *	There is no return value.
- */
-
-static void elanfreq_set_cpu_state(unsigned int state)
-{
-	struct cpufreq_freqs    freqs;
-
-	freqs.old = elanfreq_get_cpu_frequency(0);
-	freqs.new = elan_multiplier[state].clock;
-	freqs.cpu = 0; /* elanfreq.c is UP only driver */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",
-			elan_multiplier[state].clock);
-
-
-	/*
-	 * Access to the Elan's internal registers is indexed via
-	 * 0x22: Chip Setup & Control Register Index Register (CSCI)
-	 * 0x23: Chip Setup & Control Register Data  Register (CSCD)
-	 *
-	 */
-
-	/*
-	 * 0x40 is the Power Management Unit's Force Mode Register.
-	 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
-	 */
-
-	local_irq_disable();
-	outb_p(0x40, REG_CSCIR);		/* Disable hyperspeed mode */
-	outb_p(0x00, REG_CSCDR);
-	local_irq_enable();		/* wait till internal pipelines and */
-	udelay(1000);			/* buffers have cleaned up          */
-
-	local_irq_disable();
-
-	/* now, set the CPU clock speed register (0x80) */
-	outb_p(0x80, REG_CSCIR);
-	outb_p(elan_multiplier[state].val80h, REG_CSCDR);
-
-	/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
-	outb_p(0x40, REG_CSCIR);
-	outb_p(elan_multiplier[state].val40h, REG_CSCDR);
-	udelay(10000);
-	local_irq_enable();
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-};
-
-
-/**
- *	elanfreq_validatespeed: test if frequency range is valid
- *	@policy: the policy to validate
- *
- *	This function checks if a given frequency range in kHz is valid
- *	for the hardware supported by the driver.
- */
-
-static int elanfreq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
-}
-
-static int elanfreq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	elanfreq_set_cpu_state(newstate);
-
-	return 0;
-}
-
-
-/*
- *	Module init and exit code
- */
-
-static int elanfreq_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	unsigned int i;
-	int result;
-
-	/* capability check */
-	if ((c->x86_vendor != X86_VENDOR_AMD) ||
-	    (c->x86 != 4) || (c->x86_model != 10))
-		return -ENODEV;
-
-	/* max freq */
-	if (!max_freq)
-		max_freq = elanfreq_get_cpu_frequency(0);
-
-	/* table init */
-	for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-		if (elanfreq_table[i].frequency > max_freq)
-			elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-	}
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	policy->cur = elanfreq_get_cpu_frequency(0);
-
-	result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
-	return 0;
-}
-
-
-static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-
-#ifndef MODULE
-/**
- * elanfreq_setup - elanfreq command line parameter parsing
- *
- * elanfreq command line parameter.  Use:
- *  elanfreq=66000
- * to set the maximum CPU frequency to 66 MHz. Note that in
- * case you do not give this boot parameter, the maximum
- * frequency will fall back to _current_ CPU frequency which
- * might be lower. If you build this as a module, use the
- * max_freq module parameter instead.
- */
-static int __init elanfreq_setup(char *str)
-{
-	max_freq = simple_strtoul(str, &str, 0);
-	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
-	return 1;
-}
-__setup("elanfreq=", elanfreq_setup);
-#endif
-
-
-static struct freq_attr *elanfreq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver elanfreq_driver = {
-	.get		= elanfreq_get_cpu_frequency,
-	.verify		= elanfreq_verify,
-	.target		= elanfreq_target,
-	.init		= elanfreq_cpu_init,
-	.exit		= elanfreq_cpu_exit,
-	.name		= "elanfreq",
-	.owner		= THIS_MODULE,
-	.attr		= elanfreq_attr,
-};
-
-
-static int __init elanfreq_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	/* Test if we have the right hardware */
-	if ((c->x86_vendor != X86_VENDOR_AMD) ||
-		(c->x86 != 4) || (c->x86_model != 10)) {
-		printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
-		return -ENODEV;
-	}
-	return cpufreq_register_driver(&elanfreq_driver);
-}
-
-
-static void __exit elanfreq_exit(void)
-{
-	cpufreq_unregister_driver(&elanfreq_driver);
-}
-
-
-module_param(max_freq, int, 0444);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
-		"Sven Geggus <sven@geggus.net>");
-MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
-
-module_init(elanfreq_init);
-module_exit(elanfreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
deleted file mode 100644
index ffe1f2c92ed3..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ /dev/null
@@ -1,514 +0,0 @@
-/*
- *	Cyrix MediaGX and NatSemi Geode Suspend Modulation
- *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- *	(C) 2002 Hiroshi Miura   <miura@da-cha.org>
- *	All Rights Reserved
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      version 2 as published by the Free Software Foundation
- *
- *      The author(s) of this software shall not be held liable for damages
- *      of any nature resulting due to the use of this software. This
- *      software is provided AS-IS with no warranties.
- *
- * Theoretical note:
- *
- *	(see Geode(tm) CS5530 manual (rev.4.1) page.56)
- *
- *	CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
- *	are based on Suspend Modulation.
- *
- *	Suspend Modulation works by asserting and de-asserting the SUSP# pin
- *	to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
- *	the CPU enters an idle state. GX1 stops its core clock when SUSP# is
- *	asserted then power consumption is reduced.
- *
- *	Suspend Modulation's OFF/ON duration are configurable
- *	with 'Suspend Modulation OFF Count Register'
- *	and 'Suspend Modulation ON Count Register'.
- *	These registers are 8bit counters that represent the number of
- *	32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
- *	to the processor.
- *
- *	These counters define a ratio which is the effective frequency
- *	of operation of the system.
- *
- *			       OFF Count
- *	F_eff = Fgx * ----------------------
- *	                OFF Count + ON Count
- *
- *	0 <= On Count, Off Count <= 255
- *
- *	From these limits, we can get register values
- *
- *	off_duration + on_duration <= MAX_DURATION
- *	on_duration = off_duration * (stock_freq - freq) / freq
- *
- *      off_duration  =  (freq * DURATION) / stock_freq
- *      on_duration = DURATION - off_duration
- *
- *
- *---------------------------------------------------------------------------
- *
- * ChangeLog:
- *	Dec. 12, 2003	Hiroshi Miura <miura@da-cha.org>
- *		- fix on/off register mistake
- *		- fix cpu_khz calc when it stops cpu modulation.
- *
- *	Dec. 11, 2002	Hiroshi Miura <miura@da-cha.org>
- *		- rewrite for Cyrix MediaGX Cx5510/5520 and
- *		  NatSemi Geode Cs5530(A).
- *
- *	Jul. ??, 2002  Zwane Mwaikambo <zwane@commfireservices.com>
- *		- cs5530_mod patch for 2.4.19-rc1.
- *
- *---------------------------------------------------------------------------
- *
- * Todo
- *	Test on machines with 5510, 5530, 5530A
- */
-
-/************************************************************************
- *			Suspend Modulation - Definitions		*
- ************************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-
-#include <asm/processor-cyrix.h>
-
-/* PCI config registers, all at F0 */
-#define PCI_PMER1	0x80	/* power management enable register 1 */
-#define PCI_PMER2	0x81	/* power management enable register 2 */
-#define PCI_PMER3	0x82	/* power management enable register 3 */
-#define PCI_IRQTC	0x8c	/* irq speedup timer counter register:typical 2 to 4ms */
-#define PCI_VIDTC	0x8d	/* video speedup timer counter register: typical 50 to 100ms */
-#define PCI_MODOFF	0x94	/* suspend modulation OFF counter register, 1 = 32us */
-#define PCI_MODON	0x95	/* suspend modulation ON counter register */
-#define PCI_SUSCFG	0x96	/* suspend configuration register */
-
-/* PMER1 bits */
-#define GPM		(1<<0)	/* global power management */
-#define GIT		(1<<1)	/* globally enable PM device idle timers */
-#define GTR		(1<<2)	/* globally enable IO traps */
-#define IRQ_SPDUP	(1<<3)	/* disable clock throttle during interrupt handling */
-#define VID_SPDUP	(1<<4)	/* disable clock throttle during vga video handling */
-
-/* SUSCFG bits */
-#define SUSMOD		(1<<0)	/* enable/disable suspend modulation */
-/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */
-#define SMISPDUP	(1<<1)	/* select how SMI re-enable suspend modulation: */
-				/* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
-#define SUSCFG		(1<<2)	/* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
-/* the below is supported only with cs5530A */
-#define PWRSVE_ISA	(1<<3)	/* stop ISA clock  */
-#define PWRSVE		(1<<4)	/* active idle */
-
-struct gxfreq_params {
-	u8 on_duration;
-	u8 off_duration;
-	u8 pci_suscfg;
-	u8 pci_pmer1;
-	u8 pci_pmer2;
-	struct pci_dev *cs55x0;
-};
-
-static struct gxfreq_params *gx_params;
-static int stock_freq;
-
-/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
-static int pci_busclk;
-module_param(pci_busclk, int, 0444);
-
-/* maximum duration for which the cpu may be suspended
- * (32us * MAX_DURATION). If no parameter is given, this defaults
- * to 255.
- * Note that this leads to a maximum of 8 ms(!) where the CPU clock
- * is suspended -- processing power is just 0.39% of what it used to be,
- * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
-static int max_duration = 255;
-module_param(max_duration, int, 0444);
-
-/* For the default policy, we want at least some processing power
- * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
- */
-#define POLICY_MIN_DIV 20
-
-
-/**
- * we can detect a core multipiler from dir0_lsb
- * from GX1 datasheet p.56,
- *	MULT[3:0]:
- *	0000 = SYSCLK multiplied by 4 (test only)
- *	0001 = SYSCLK multiplied by 10
- *	0010 = SYSCLK multiplied by 4
- *	0011 = SYSCLK multiplied by 6
- *	0100 = SYSCLK multiplied by 9
- *	0101 = SYSCLK multiplied by 5
- *	0110 = SYSCLK multiplied by 7
- *	0111 = SYSCLK multiplied by 8
- *              of 33.3MHz
- **/
-static int gx_freq_mult[16] = {
-		4, 10, 4, 6, 9, 5, 7, 8,
-		0, 0, 0, 0, 0, 0, 0, 0
-};
-
-
-/****************************************************************
- *	Low Level chipset interface				*
- ****************************************************************/
-static struct pci_device_id gx_chipset_tbl[] __initdata = {
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), },
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), },
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },
-	{ 0, },
-};
-
-static void gx_write_byte(int reg, int value)
-{
-	pci_write_config_byte(gx_params->cs55x0, reg, value);
-}
-
-/**
- * gx_detect_chipset:
- *
- **/
-static __init struct pci_dev *gx_detect_chipset(void)
-{
-	struct pci_dev *gx_pci = NULL;
-
-	/* check if CPU is a MediaGX or a Geode. */
-	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
-	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
-		pr_debug("error: no MediaGX/Geode processor found!\n");
-		return NULL;
-	}
-
-	/* detect which companion chip is used */
-	for_each_pci_dev(gx_pci) {
-		if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
-			return gx_pci;
-	}
-
-	pr_debug("error: no supported chipset found!\n");
-	return NULL;
-}
-
-/**
- * gx_get_cpuspeed:
- *
- * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
- * Geode CPU runs.
- */
-static unsigned int gx_get_cpuspeed(unsigned int cpu)
-{
-	if ((gx_params->pci_suscfg & SUSMOD) == 0)
-		return stock_freq;
-
-	return (stock_freq * gx_params->off_duration)
-		/ (gx_params->on_duration + gx_params->off_duration);
-}
-
-/**
- *      gx_validate_speed:
- *      determine current cpu speed
- *
- **/
-
-static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
-		u8 *off_duration)
-{
-	unsigned int i;
-	u8 tmp_on, tmp_off;
-	int old_tmp_freq = stock_freq;
-	int tmp_freq;
-
-	*off_duration = 1;
-	*on_duration = 0;
-
-	for (i = max_duration; i > 0; i--) {
-		tmp_off = ((khz * i) / stock_freq) & 0xff;
-		tmp_on = i - tmp_off;
-		tmp_freq = (stock_freq * tmp_off) / i;
-		/* if this relation is closer to khz, use this. If it's equal,
-		 * prefer it, too - lower latency */
-		if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
-			*on_duration = tmp_on;
-			*off_duration = tmp_off;
-			old_tmp_freq = tmp_freq;
-		}
-	}
-
-	return old_tmp_freq;
-}
-
-
-/**
- * gx_set_cpuspeed:
- * set cpu speed in khz.
- **/
-
-static void gx_set_cpuspeed(unsigned int khz)
-{
-	u8 suscfg, pmer1;
-	unsigned int new_khz;
-	unsigned long flags;
-	struct cpufreq_freqs freqs;
-
-	freqs.cpu = 0;
-	freqs.old = gx_get_cpuspeed(0);
-
-	new_khz = gx_validate_speed(khz, &gx_params->on_duration,
-			&gx_params->off_duration);
-
-	freqs.new = new_khz;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	local_irq_save(flags);
-
-
-
-	if (new_khz != stock_freq) {
-		/* if new khz == 100% of CPU speed, it is special case */
-		switch (gx_params->cs55x0->device) {
-		case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
-			pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
-			/* FIXME: need to test other values -- Zwane,Miura */
-			/* typical 2 to 4ms */
-			gx_write_byte(PCI_IRQTC, 4);
-			/* typical 50 to 100ms */
-			gx_write_byte(PCI_VIDTC, 100);
-			gx_write_byte(PCI_PMER1, pmer1);
-
-			if (gx_params->cs55x0->revision < 0x10) {
-				/* CS5530(rev 1.2, 1.3) */
-				suscfg = gx_params->pci_suscfg|SUSMOD;
-			} else {
-				/* CS5530A,B.. */
-				suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
-			}
-			break;
-		case PCI_DEVICE_ID_CYRIX_5520:
-		case PCI_DEVICE_ID_CYRIX_5510:
-			suscfg = gx_params->pci_suscfg | SUSMOD;
-			break;
-		default:
-			local_irq_restore(flags);
-			pr_debug("fatal: try to set unknown chipset.\n");
-			return;
-		}
-	} else {
-		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
-		gx_params->off_duration = 0;
-		gx_params->on_duration = 0;
-		pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
-	}
-
-	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
-	gx_write_byte(PCI_MODON, gx_params->on_duration);
-
-	gx_write_byte(PCI_SUSCFG, suscfg);
-	pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
-
-	local_irq_restore(flags);
-
-	gx_params->pci_suscfg = suscfg;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
-		gx_params->on_duration * 32, gx_params->off_duration * 32);
-	pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
-}
-
-/****************************************************************
- *             High level functions                             *
- ****************************************************************/
-
-/*
- *	cpufreq_gx_verify: test if frequency range is valid
- *
- *	This function checks if a given frequency range in kHz is valid
- *      for the hardware supported by the driver.
- */
-
-static int cpufreq_gx_verify(struct cpufreq_policy *policy)
-{
-	unsigned int tmp_freq = 0;
-	u8 tmp1, tmp2;
-
-	if (!stock_freq || !policy)
-		return -EINVAL;
-
-	policy->cpu = 0;
-	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
-			stock_freq);
-
-	/* it needs to be assured that at least one supported frequency is
-	 * within policy->min and policy->max. If it is not, policy->max
-	 * needs to be increased until one freuqency is supported.
-	 * policy->min may not be decreased, though. This way we guarantee a
-	 * specific processing capacity.
-	 */
-	tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
-	if (tmp_freq < policy->min)
-		tmp_freq += stock_freq / max_duration;
-	policy->min = tmp_freq;
-	if (policy->min > policy->max)
-		policy->max = tmp_freq;
-	tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
-	if (tmp_freq > policy->max)
-		tmp_freq -= stock_freq / max_duration;
-	policy->max = tmp_freq;
-	if (policy->max < policy->min)
-		policy->max = policy->min;
-	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
-			stock_freq);
-
-	return 0;
-}
-
-/*
- *      cpufreq_gx_target:
- *
- */
-static int cpufreq_gx_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
-{
-	u8 tmp1, tmp2;
-	unsigned int tmp_freq;
-
-	if (!stock_freq || !policy)
-		return -EINVAL;
-
-	policy->cpu = 0;
-
-	tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
-	while (tmp_freq < policy->min) {
-		tmp_freq += stock_freq / max_duration;
-		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
-	}
-	while (tmp_freq > policy->max) {
-		tmp_freq -= stock_freq / max_duration;
-		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
-	}
-
-	gx_set_cpuspeed(tmp_freq);
-
-	return 0;
-}
-
-static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int maxfreq, curfreq;
-
-	if (!policy || policy->cpu != 0)
-		return -ENODEV;
-
-	/* determine maximum frequency */
-	if (pci_busclk)
-		maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
-	else if (cpu_khz)
-		maxfreq = cpu_khz;
-	else
-		maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
-
-	stock_freq = maxfreq;
-	curfreq = gx_get_cpuspeed(0);
-
-	pr_debug("cpu max frequency is %d.\n", maxfreq);
-	pr_debug("cpu current frequency is %dkHz.\n", curfreq);
-
-	/* setup basic struct for cpufreq API */
-	policy->cpu = 0;
-
-	if (max_duration < POLICY_MIN_DIV)
-		policy->min = maxfreq / max_duration;
-	else
-		policy->min = maxfreq / POLICY_MIN_DIV;
-	policy->max = maxfreq;
-	policy->cur = curfreq;
-	policy->cpuinfo.min_freq = maxfreq / max_duration;
-	policy->cpuinfo.max_freq = maxfreq;
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-
-	return 0;
-}
-
-/*
- * cpufreq_gx_init:
- *   MediaGX/Geode GX initialize cpufreq driver
- */
-static struct cpufreq_driver gx_suspmod_driver = {
-	.get		= gx_get_cpuspeed,
-	.verify		= cpufreq_gx_verify,
-	.target		= cpufreq_gx_target,
-	.init		= cpufreq_gx_cpu_init,
-	.name		= "gx-suspmod",
-	.owner		= THIS_MODULE,
-};
-
-static int __init cpufreq_gx_init(void)
-{
-	int ret;
-	struct gxfreq_params *params;
-	struct pci_dev *gx_pci;
-
-	/* Test if we have the right hardware */
-	gx_pci = gx_detect_chipset();
-	if (gx_pci == NULL)
-		return -ENODEV;
-
-	/* check whether module parameters are sane */
-	if (max_duration > 0xff)
-		max_duration = 0xff;
-
-	pr_debug("geode suspend modulation available.\n");
-
-	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
-	if (params == NULL)
-		return -ENOMEM;
-
-	params->cs55x0 = gx_pci;
-	gx_params = params;
-
-	/* keep cs55x0 configurations */
-	pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
-	pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
-	pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
-	pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
-	pci_read_config_byte(params->cs55x0, PCI_MODOFF,
-			&(params->off_duration));
-
-	ret = cpufreq_register_driver(&gx_suspmod_driver);
-	if (ret) {
-		kfree(params);
-		return ret;                   /* register error! */
-	}
-
-	return 0;
-}
-
-static void __exit cpufreq_gx_exit(void)
-{
-	cpufreq_unregister_driver(&gx_suspmod_driver);
-	pci_dev_put(gx_params->cs55x0);
-	kfree(gx_params);
-}
-
-MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
-MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
-MODULE_LICENSE("GPL");
-
-module_init(cpufreq_gx_init);
-module_exit(cpufreq_gx_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
deleted file mode 100644
index f47d26e2a135..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
- *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon datasheets & sample CPUs kindly provided by VIA.
- *
- *  VIA have currently 3 different versions of Longhaul.
- *  Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
- *   It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
- *  Version 2 of longhaul is backward compatible with v1, but adds
- *   LONGHAUL MSR for purpose of both frequency and voltage scaling.
- *   Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
- *  Version 3 of longhaul got renamed to Powersaver and redesigned
- *   to use only the POWERSAVER MSR at 0x110a.
- *   It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
- *   It's pretty much the same feature wise to longhaul v2, though
- *   there is provision for scaling FSB too, but this doesn't work
- *   too well in practice so we don't even try to use this.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-#include <linux/acpi.h>
-
-#include <asm/msr.h>
-#include <acpi/processor.h>
-
-#include "longhaul.h"
-
-#define PFX "longhaul: "
-
-#define TYPE_LONGHAUL_V1	1
-#define TYPE_LONGHAUL_V2	2
-#define TYPE_POWERSAVER		3
-
-#define	CPU_SAMUEL	1
-#define	CPU_SAMUEL2	2
-#define	CPU_EZRA	3
-#define	CPU_EZRA_T	4
-#define	CPU_NEHEMIAH	5
-#define	CPU_NEHEMIAH_C	6
-
-/* Flags */
-#define USE_ACPI_C3		(1 << 1)
-#define USE_NORTHBRIDGE		(1 << 2)
-
-static int cpu_model;
-static unsigned int numscales = 16;
-static unsigned int fsb;
-
-static const struct mV_pos *vrm_mV_table;
-static const unsigned char *mV_vrm_table;
-
-static unsigned int highest_speed, lowest_speed; /* kHz */
-static unsigned int minmult, maxmult;
-static int can_scale_voltage;
-static struct acpi_processor *pr;
-static struct acpi_processor_cx *cx;
-static u32 acpi_regs_addr;
-static u8 longhaul_flags;
-static unsigned int longhaul_index;
-
-/* Module parameters */
-static int scale_voltage;
-static int disable_acpi_c3;
-static int revid_errata;
-
-
-/* Clock ratios multiplied by 10 */
-static int mults[32];
-static int eblcr[32];
-static int longhaul_version;
-static struct cpufreq_frequency_table *longhaul_table;
-
-static char speedbuffer[8];
-
-static char *print_speed(int speed)
-{
-	if (speed < 1000) {
-		snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
-		return speedbuffer;
-	}
-
-	if (speed%1000 == 0)
-		snprintf(speedbuffer, sizeof(speedbuffer),
-			"%dGHz", speed/1000);
-	else
-		snprintf(speedbuffer, sizeof(speedbuffer),
-			"%d.%dGHz", speed/1000, (speed%1000)/100);
-
-	return speedbuffer;
-}
-
-
-static unsigned int calc_speed(int mult)
-{
-	int khz;
-	khz = (mult/10)*fsb;
-	if (mult%10)
-		khz += fsb/2;
-	khz *= 1000;
-	return khz;
-}
-
-
-static int longhaul_get_cpu_mult(void)
-{
-	unsigned long invalue = 0, lo, hi;
-
-	rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
-	invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
-	if (longhaul_version == TYPE_LONGHAUL_V2 ||
-	    longhaul_version == TYPE_POWERSAVER) {
-		if (lo & (1<<27))
-			invalue += 16;
-	}
-	return eblcr[invalue];
-}
-
-/* For processor with BCR2 MSR */
-
-static void do_longhaul1(unsigned int mults_index)
-{
-	union msr_bcr2 bcr2;
-
-	rdmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Enable software clock multiplier */
-	bcr2.bits.ESOFTBF = 1;
-	bcr2.bits.CLOCKMUL = mults_index & 0xff;
-
-	/* Sync to timer tick */
-	safe_halt();
-	/* Change frequency on next halt or sleep */
-	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Invoke transition */
-	ACPI_FLUSH_CPU_CACHE();
-	halt();
-
-	/* Disable software clock multiplier */
-	local_irq_disable();
-	rdmsrl(MSR_VIA_BCR2, bcr2.val);
-	bcr2.bits.ESOFTBF = 0;
-	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-}
-
-/* For processor with Longhaul MSR */
-
-static void do_powersaver(int cx_address, unsigned int mults_index,
-			  unsigned int dir)
-{
-	union msr_longhaul longhaul;
-	u32 t;
-
-	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	/* Setup new frequency */
-	if (!revid_errata)
-		longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
-	else
-		longhaul.bits.RevisionKey = 0;
-	longhaul.bits.SoftBusRatio = mults_index & 0xf;
-	longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
-	/* Setup new voltage */
-	if (can_scale_voltage)
-		longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
-	/* Sync to timer tick */
-	safe_halt();
-	/* Raise voltage if necessary */
-	if (can_scale_voltage && dir) {
-		longhaul.bits.EnableSoftVID = 1;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-		/* Change voltage */
-		if (!cx_address) {
-			ACPI_FLUSH_CPU_CACHE();
-			halt();
-		} else {
-			ACPI_FLUSH_CPU_CACHE();
-			/* Invoke C3 */
-			inb(cx_address);
-			/* Dummy op - must do something useless after P_LVL3
-			 * read */
-			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		}
-		longhaul.bits.EnableSoftVID = 0;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	}
-
-	/* Change frequency on next halt or sleep */
-	longhaul.bits.EnableSoftBusRatio = 1;
-	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	if (!cx_address) {
-		ACPI_FLUSH_CPU_CACHE();
-		halt();
-	} else {
-		ACPI_FLUSH_CPU_CACHE();
-		/* Invoke C3 */
-		inb(cx_address);
-		/* Dummy op - must do something useless after P_LVL3 read */
-		t = inl(acpi_gbl_FADT.xpm_timer_block.address);
-	}
-	/* Disable bus ratio bit */
-	longhaul.bits.EnableSoftBusRatio = 0;
-	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-
-	/* Reduce voltage if necessary */
-	if (can_scale_voltage && !dir) {
-		longhaul.bits.EnableSoftVID = 1;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-		/* Change voltage */
-		if (!cx_address) {
-			ACPI_FLUSH_CPU_CACHE();
-			halt();
-		} else {
-			ACPI_FLUSH_CPU_CACHE();
-			/* Invoke C3 */
-			inb(cx_address);
-			/* Dummy op - must do something useless after P_LVL3
-			 * read */
-			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
-		}
-		longhaul.bits.EnableSoftVID = 0;
-		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	}
-}
-
-/**
- * longhaul_set_cpu_frequency()
- * @mults_index : bitpattern of the new multiplier.
- *
- * Sets a new clock ratio.
- */
-
-static void longhaul_setstate(unsigned int table_index)
-{
-	unsigned int mults_index;
-	int speed, mult;
-	struct cpufreq_freqs freqs;
-	unsigned long flags;
-	unsigned int pic1_mask, pic2_mask;
-	u16 bm_status = 0;
-	u32 bm_timeout = 1000;
-	unsigned int dir = 0;
-
-	mults_index = longhaul_table[table_index].index;
-	/* Safety precautions */
-	mult = mults[mults_index & 0x1f];
-	if (mult == -1)
-		return;
-	speed = calc_speed(mult);
-	if ((speed > highest_speed) || (speed < lowest_speed))
-		return;
-	/* Voltage transition before frequency transition? */
-	if (can_scale_voltage && longhaul_index < table_index)
-		dir = 1;
-
-	freqs.old = calc_speed(longhaul_get_cpu_mult());
-	freqs.new = speed;
-	freqs.cpu = 0; /* longhaul.c is UP only driver */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
-			fsb, mult/10, mult%10, print_speed(speed/1000));
-retry_loop:
-	preempt_disable();
-	local_irq_save(flags);
-
-	pic2_mask = inb(0xA1);
-	pic1_mask = inb(0x21);	/* works on C3. save mask. */
-	outb(0xFF, 0xA1);	/* Overkill */
-	outb(0xFE, 0x21);	/* TMR0 only */
-
-	/* Wait while PCI bus is busy. */
-	if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
-	    || ((pr != NULL) && pr->flags.bm_control))) {
-		bm_status = inw(acpi_regs_addr);
-		bm_status &= 1 << 4;
-		while (bm_status && bm_timeout) {
-			outw(1 << 4, acpi_regs_addr);
-			bm_timeout--;
-			bm_status = inw(acpi_regs_addr);
-			bm_status &= 1 << 4;
-		}
-	}
-
-	if (longhaul_flags & USE_NORTHBRIDGE) {
-		/* Disable AGP and PCI arbiters */
-		outb(3, 0x22);
-	} else if ((pr != NULL) && pr->flags.bm_control) {
-		/* Disable bus master arbitration */
-		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
-	}
-	switch (longhaul_version) {
-
-	/*
-	 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
-	 * Software controlled multipliers only.
-	 */
-	case TYPE_LONGHAUL_V1:
-		do_longhaul1(mults_index);
-		break;
-
-	/*
-	 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
-	 *
-	 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
-	 * Nehemiah can do FSB scaling too, but this has never been proven
-	 * to work in practice.
-	 */
-	case TYPE_LONGHAUL_V2:
-	case TYPE_POWERSAVER:
-		if (longhaul_flags & USE_ACPI_C3) {
-			/* Don't allow wakeup */
-			acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
-			do_powersaver(cx->address, mults_index, dir);
-		} else {
-			do_powersaver(0, mults_index, dir);
-		}
-		break;
-	}
-
-	if (longhaul_flags & USE_NORTHBRIDGE) {
-		/* Enable arbiters */
-		outb(0, 0x22);
-	} else if ((pr != NULL) && pr->flags.bm_control) {
-		/* Enable bus master arbitration */
-		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
-	}
-	outb(pic2_mask, 0xA1);	/* restore mask */
-	outb(pic1_mask, 0x21);
-
-	local_irq_restore(flags);
-	preempt_enable();
-
-	freqs.new = calc_speed(longhaul_get_cpu_mult());
-	/* Check if requested frequency is set. */
-	if (unlikely(freqs.new != speed)) {
-		printk(KERN_INFO PFX "Failed to set requested frequency!\n");
-		/* Revision ID = 1 but processor is expecting revision key
-		 * equal to 0. Jumpers at the bottom of processor will change
-		 * multiplier and FSB, but will not change bits in Longhaul
-		 * MSR nor enable voltage scaling. */
-		if (!revid_errata) {
-			printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
-						"option.\n");
-			revid_errata = 1;
-			msleep(200);
-			goto retry_loop;
-		}
-		/* Why ACPI C3 sometimes doesn't work is a mystery for me.
-		 * But it does happen. Processor is entering ACPI C3 state,
-		 * but it doesn't change frequency. I tried poking various
-		 * bits in northbridge registers, but without success. */
-		if (longhaul_flags & USE_ACPI_C3) {
-			printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
-			longhaul_flags &= ~USE_ACPI_C3;
-			if (revid_errata) {
-				printk(KERN_INFO PFX "Disabling \"Ignore "
-						"Revision ID\" option.\n");
-				revid_errata = 0;
-			}
-			msleep(200);
-			goto retry_loop;
-		}
-		/* This shouldn't happen. Longhaul ver. 2 was reported not
-		 * working on processors without voltage scaling, but with
-		 * RevID = 1. RevID errata will make things right. Just
-		 * to be 100% sure. */
-		if (longhaul_version == TYPE_LONGHAUL_V2) {
-			printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
-			longhaul_version = TYPE_LONGHAUL_V1;
-			msleep(200);
-			goto retry_loop;
-		}
-	}
-	/* Report true CPU frequency */
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	if (!bm_timeout)
-		printk(KERN_INFO PFX "Warning: Timeout while waiting for "
-				"idle PCI bus.\n");
-}
-
-/*
- * Centaur decided to make life a little more tricky.
- * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
- * Samuel2 and above have to try and guess what the FSB is.
- * We do this by assuming we booted at maximum multiplier, and interpolate
- * between that value multiplied by possible FSBs and cpu_mhz which
- * was calculated at boot time. Really ugly, but no other way to do this.
- */
-
-#define ROUNDING	0xf
-
-static int guess_fsb(int mult)
-{
-	int speed = cpu_khz / 1000;
-	int i;
-	int speeds[] = { 666, 1000, 1333, 2000 };
-	int f_max, f_min;
-
-	for (i = 0; i < 4; i++) {
-		f_max = ((speeds[i] * mult) + 50) / 100;
-		f_max += (ROUNDING / 2);
-		f_min = f_max - ROUNDING;
-		if ((speed <= f_max) && (speed >= f_min))
-			return speeds[i] / 10;
-	}
-	return 0;
-}
-
-
-static int __cpuinit longhaul_get_ranges(void)
-{
-	unsigned int i, j, k = 0;
-	unsigned int ratio;
-	int mult;
-
-	/* Get current frequency */
-	mult = longhaul_get_cpu_mult();
-	if (mult == -1) {
-		printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
-		return -EINVAL;
-	}
-	fsb = guess_fsb(mult);
-	if (fsb == 0) {
-		printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
-		return -EINVAL;
-	}
-	/* Get max multiplier - as we always did.
-	 * Longhaul MSR is useful only when voltage scaling is enabled.
-	 * C3 is booting at max anyway. */
-	maxmult = mult;
-	/* Get min multiplier */
-	switch (cpu_model) {
-	case CPU_NEHEMIAH:
-		minmult = 50;
-		break;
-	case CPU_NEHEMIAH_C:
-		minmult = 40;
-		break;
-	default:
-		minmult = 30;
-		break;
-	}
-
-	pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
-		 minmult/10, minmult%10, maxmult/10, maxmult%10);
-
-	highest_speed = calc_speed(maxmult);
-	lowest_speed = calc_speed(minmult);
-	pr_debug("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
-		 print_speed(lowest_speed/1000),
-		 print_speed(highest_speed/1000));
-
-	if (lowest_speed == highest_speed) {
-		printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
-		return -EINVAL;
-	}
-	if (lowest_speed > highest_speed) {
-		printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
-			lowest_speed, highest_speed);
-		return -EINVAL;
-	}
-
-	longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
-			GFP_KERNEL);
-	if (!longhaul_table)
-		return -ENOMEM;
-
-	for (j = 0; j < numscales; j++) {
-		ratio = mults[j];
-		if (ratio == -1)
-			continue;
-		if (ratio > maxmult || ratio < minmult)
-			continue;
-		longhaul_table[k].frequency = calc_speed(ratio);
-		longhaul_table[k].index	= j;
-		k++;
-	}
-	if (k <= 1) {
-		kfree(longhaul_table);
-		return -ENODEV;
-	}
-	/* Sort */
-	for (j = 0; j < k - 1; j++) {
-		unsigned int min_f, min_i;
-		min_f = longhaul_table[j].frequency;
-		min_i = j;
-		for (i = j + 1; i < k; i++) {
-			if (longhaul_table[i].frequency < min_f) {
-				min_f = longhaul_table[i].frequency;
-				min_i = i;
-			}
-		}
-		if (min_i != j) {
-			swap(longhaul_table[j].frequency,
-			     longhaul_table[min_i].frequency);
-			swap(longhaul_table[j].index,
-			     longhaul_table[min_i].index);
-		}
-	}
-
-	longhaul_table[k].frequency = CPUFREQ_TABLE_END;
-
-	/* Find index we are running on */
-	for (j = 0; j < k; j++) {
-		if (mults[longhaul_table[j].index & 0x1f] == mult) {
-			longhaul_index = j;
-			break;
-		}
-	}
-	return 0;
-}
-
-
-static void __cpuinit longhaul_setup_voltagescaling(void)
-{
-	union msr_longhaul longhaul;
-	struct mV_pos minvid, maxvid, vid;
-	unsigned int j, speed, pos, kHz_step, numvscales;
-	int min_vid_speed;
-
-	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
-	if (!(longhaul.bits.RevisionID & 1)) {
-		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
-		return;
-	}
-
-	if (!longhaul.bits.VRMRev) {
-		printk(KERN_INFO PFX "VRM 8.5\n");
-		vrm_mV_table = &vrm85_mV[0];
-		mV_vrm_table = &mV_vrm85[0];
-	} else {
-		printk(KERN_INFO PFX "Mobile VRM\n");
-		if (cpu_model < CPU_NEHEMIAH)
-			return;
-		vrm_mV_table = &mobilevrm_mV[0];
-		mV_vrm_table = &mV_mobilevrm[0];
-	}
-
-	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
-	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
-
-	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
-		printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
-					"Voltage scaling disabled.\n",
-					minvid.mV/1000, minvid.mV%1000,
-					maxvid.mV/1000, maxvid.mV%1000);
-		return;
-	}
-
-	if (minvid.mV == maxvid.mV) {
-		printk(KERN_INFO PFX "Claims to support voltage scaling but "
-				"min & max are both %d.%03d. "
-				"Voltage scaling disabled\n",
-				maxvid.mV/1000, maxvid.mV%1000);
-		return;
-	}
-
-	/* How many voltage steps*/
-	numvscales = maxvid.pos - minvid.pos + 1;
-	printk(KERN_INFO PFX
-		"Max VID=%d.%03d  "
-		"Min VID=%d.%03d, "
-		"%d possible voltage scales\n",
-		maxvid.mV/1000, maxvid.mV%1000,
-		minvid.mV/1000, minvid.mV%1000,
-		numvscales);
-
-	/* Calculate max frequency at min voltage */
-	j = longhaul.bits.MinMHzBR;
-	if (longhaul.bits.MinMHzBR4)
-		j += 16;
-	min_vid_speed = eblcr[j];
-	if (min_vid_speed == -1)
-		return;
-	switch (longhaul.bits.MinMHzFSB) {
-	case 0:
-		min_vid_speed *= 13333;
-		break;
-	case 1:
-		min_vid_speed *= 10000;
-		break;
-	case 3:
-		min_vid_speed *= 6666;
-		break;
-	default:
-		return;
-		break;
-	}
-	if (min_vid_speed >= highest_speed)
-		return;
-	/* Calculate kHz for one voltage step */
-	kHz_step = (highest_speed - min_vid_speed) / numvscales;
-
-	j = 0;
-	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
-		speed = longhaul_table[j].frequency;
-		if (speed > min_vid_speed)
-			pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
-		else
-			pos = minvid.pos;
-		longhaul_table[j].index |= mV_vrm_table[pos] << 8;
-		vid = vrm_mV_table[mV_vrm_table[pos]];
-		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
-				speed, j, vid.mV);
-		j++;
-	}
-
-	can_scale_voltage = 1;
-	printk(KERN_INFO PFX "Voltage scaling enabled.\n");
-}
-
-
-static int longhaul_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, longhaul_table);
-}
-
-
-static int longhaul_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq, unsigned int relation)
-{
-	unsigned int table_index = 0;
-	unsigned int i;
-	unsigned int dir = 0;
-	u8 vid, current_vid;
-
-	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
-				relation, &table_index))
-		return -EINVAL;
-
-	/* Don't set same frequency again */
-	if (longhaul_index == table_index)
-		return 0;
-
-	if (!can_scale_voltage)
-		longhaul_setstate(table_index);
-	else {
-		/* On test system voltage transitions exceeding single
-		 * step up or down were turning motherboard off. Both
-		 * "ondemand" and "userspace" are unsafe. C7 is doing
-		 * this in hardware, C3 is old and we need to do this
-		 * in software. */
-		i = longhaul_index;
-		current_vid = (longhaul_table[longhaul_index].index >> 8);
-		current_vid &= 0x1f;
-		if (table_index > longhaul_index)
-			dir = 1;
-		while (i != table_index) {
-			vid = (longhaul_table[i].index >> 8) & 0x1f;
-			if (vid != current_vid) {
-				longhaul_setstate(i);
-				current_vid = vid;
-				msleep(200);
-			}
-			if (dir)
-				i++;
-			else
-				i--;
-		}
-		longhaul_setstate(table_index);
-	}
-	longhaul_index = table_index;
-	return 0;
-}
-
-
-static unsigned int longhaul_get(unsigned int cpu)
-{
-	if (cpu)
-		return 0;
-	return calc_speed(longhaul_get_cpu_mult());
-}
-
-static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
-					  u32 nesting_level,
-					  void *context, void **return_value)
-{
-	struct acpi_device *d;
-
-	if (acpi_bus_get_device(obj_handle, &d))
-		return 0;
-
-	*return_value = acpi_driver_data(d);
-	return 1;
-}
-
-/* VIA don't support PM2 reg, but have something similar */
-static int enable_arbiter_disable(void)
-{
-	struct pci_dev *dev;
-	int status = 1;
-	int reg;
-	u8 pci_cmd;
-
-	/* Find PLE133 host bridge */
-	reg = 0x78;
-	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
-			     NULL);
-	/* Find PM133/VT8605 host bridge */
-	if (dev == NULL)
-		dev = pci_get_device(PCI_VENDOR_ID_VIA,
-				     PCI_DEVICE_ID_VIA_8605_0, NULL);
-	/* Find CLE266 host bridge */
-	if (dev == NULL) {
-		reg = 0x76;
-		dev = pci_get_device(PCI_VENDOR_ID_VIA,
-				     PCI_DEVICE_ID_VIA_862X_0, NULL);
-		/* Find CN400 V-Link host bridge */
-		if (dev == NULL)
-			dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL);
-	}
-	if (dev != NULL) {
-		/* Enable access to port 0x22 */
-		pci_read_config_byte(dev, reg, &pci_cmd);
-		if (!(pci_cmd & 1<<7)) {
-			pci_cmd |= 1<<7;
-			pci_write_config_byte(dev, reg, pci_cmd);
-			pci_read_config_byte(dev, reg, &pci_cmd);
-			if (!(pci_cmd & 1<<7)) {
-				printk(KERN_ERR PFX
-					"Can't enable access to port 0x22.\n");
-				status = 0;
-			}
-		}
-		pci_dev_put(dev);
-		return status;
-	}
-	return 0;
-}
-
-static int longhaul_setup_southbridge(void)
-{
-	struct pci_dev *dev;
-	u8 pci_cmd;
-
-	/* Find VT8235 southbridge */
-	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
-	if (dev == NULL)
-		/* Find VT8237 southbridge */
-		dev = pci_get_device(PCI_VENDOR_ID_VIA,
-				     PCI_DEVICE_ID_VIA_8237, NULL);
-	if (dev != NULL) {
-		/* Set transition time to max */
-		pci_read_config_byte(dev, 0xec, &pci_cmd);
-		pci_cmd &= ~(1 << 2);
-		pci_write_config_byte(dev, 0xec, pci_cmd);
-		pci_read_config_byte(dev, 0xe4, &pci_cmd);
-		pci_cmd &= ~(1 << 7);
-		pci_write_config_byte(dev, 0xe4, pci_cmd);
-		pci_read_config_byte(dev, 0xe5, &pci_cmd);
-		pci_cmd |= 1 << 7;
-		pci_write_config_byte(dev, 0xe5, pci_cmd);
-		/* Get address of ACPI registers block*/
-		pci_read_config_byte(dev, 0x81, &pci_cmd);
-		if (pci_cmd & 1 << 7) {
-			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
-			acpi_regs_addr &= 0xff00;
-			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
-					acpi_regs_addr);
-		}
-
-		pci_dev_put(dev);
-		return 1;
-	}
-	return 0;
-}
-
-static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	char *cpuname = NULL;
-	int ret;
-	u32 lo, hi;
-
-	/* Check what we have on this motherboard */
-	switch (c->x86_model) {
-	case 6:
-		cpu_model = CPU_SAMUEL;
-		cpuname = "C3 'Samuel' [C5A]";
-		longhaul_version = TYPE_LONGHAUL_V1;
-		memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
-		memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
-		break;
-
-	case 7:
-		switch (c->x86_mask) {
-		case 0:
-			longhaul_version = TYPE_LONGHAUL_V1;
-			cpu_model = CPU_SAMUEL2;
-			cpuname = "C3 'Samuel 2' [C5B]";
-			/* Note, this is not a typo, early Samuel2's had
-			 * Samuel1 ratios. */
-			memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
-			memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
-			break;
-		case 1 ... 15:
-			longhaul_version = TYPE_LONGHAUL_V2;
-			if (c->x86_mask < 8) {
-				cpu_model = CPU_SAMUEL2;
-				cpuname = "C3 'Samuel 2' [C5B]";
-			} else {
-				cpu_model = CPU_EZRA;
-				cpuname = "C3 'Ezra' [C5C]";
-			}
-			memcpy(mults, ezra_mults, sizeof(ezra_mults));
-			memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
-			break;
-		}
-		break;
-
-	case 8:
-		cpu_model = CPU_EZRA_T;
-		cpuname = "C3 'Ezra-T' [C5M]";
-		longhaul_version = TYPE_POWERSAVER;
-		numscales = 32;
-		memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
-		memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
-		break;
-
-	case 9:
-		longhaul_version = TYPE_POWERSAVER;
-		numscales = 32;
-		memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
-		memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
-		switch (c->x86_mask) {
-		case 0 ... 1:
-			cpu_model = CPU_NEHEMIAH;
-			cpuname = "C3 'Nehemiah A' [C5XLOE]";
-			break;
-		case 2 ... 4:
-			cpu_model = CPU_NEHEMIAH;
-			cpuname = "C3 'Nehemiah B' [C5XLOH]";
-			break;
-		case 5 ... 15:
-			cpu_model = CPU_NEHEMIAH_C;
-			cpuname = "C3 'Nehemiah C' [C5P]";
-			break;
-		}
-		break;
-
-	default:
-		cpuname = "Unknown";
-		break;
-	}
-	/* Check Longhaul ver. 2 */
-	if (longhaul_version == TYPE_LONGHAUL_V2) {
-		rdmsr(MSR_VIA_LONGHAUL, lo, hi);
-		if (lo == 0 && hi == 0)
-			/* Looks like MSR isn't present */
-			longhaul_version = TYPE_LONGHAUL_V1;
-	}
-
-	printk(KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
-	switch (longhaul_version) {
-	case TYPE_LONGHAUL_V1:
-	case TYPE_LONGHAUL_V2:
-		printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
-		break;
-	case TYPE_POWERSAVER:
-		printk(KERN_CONT "Powersaver supported.\n");
-		break;
-	};
-
-	/* Doesn't hurt */
-	longhaul_setup_southbridge();
-
-	/* Find ACPI data for processor */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-				ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
-				NULL, (void *)&pr);
-
-	/* Check ACPI support for C3 state */
-	if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
-		cx = &pr->power.states[ACPI_STATE_C3];
-		if (cx->address > 0 && cx->latency <= 1000)
-			longhaul_flags |= USE_ACPI_C3;
-	}
-	/* Disable if it isn't working */
-	if (disable_acpi_c3)
-		longhaul_flags &= ~USE_ACPI_C3;
-	/* Check if northbridge is friendly */
-	if (enable_arbiter_disable())
-		longhaul_flags |= USE_NORTHBRIDGE;
-
-	/* Check ACPI support for bus master arbiter disable */
-	if (!(longhaul_flags & USE_ACPI_C3
-	     || longhaul_flags & USE_NORTHBRIDGE)
-	    && ((pr == NULL) || !(pr->flags.bm_control))) {
-		printk(KERN_ERR PFX
-			"No ACPI support. Unsupported northbridge.\n");
-		return -ENODEV;
-	}
-
-	if (longhaul_flags & USE_NORTHBRIDGE)
-		printk(KERN_INFO PFX "Using northbridge support.\n");
-	if (longhaul_flags & USE_ACPI_C3)
-		printk(KERN_INFO PFX "Using ACPI support.\n");
-
-	ret = longhaul_get_ranges();
-	if (ret != 0)
-		return ret;
-
-	if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
-		longhaul_setup_voltagescaling();
-
-	policy->cpuinfo.transition_latency = 200000;	/* nsec */
-	policy->cur = calc_speed(longhaul_get_cpu_mult());
-
-	ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
-	if (ret)
-		return ret;
-
-	cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
-
-	return 0;
-}
-
-static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static struct freq_attr *longhaul_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver longhaul_driver = {
-	.verify	= longhaul_verify,
-	.target	= longhaul_target,
-	.get	= longhaul_get,
-	.init	= longhaul_cpu_init,
-	.exit	= __devexit_p(longhaul_cpu_exit),
-	.name	= "longhaul",
-	.owner	= THIS_MODULE,
-	.attr	= longhaul_attr,
-};
-
-
-static int __init longhaul_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
-		return -ENODEV;
-
-#ifdef CONFIG_SMP
-	if (num_online_cpus() > 1) {
-		printk(KERN_ERR PFX "More than 1 CPU detected, "
-				"longhaul disabled.\n");
-		return -ENODEV;
-	}
-#endif
-#ifdef CONFIG_X86_IO_APIC
-	if (cpu_has_apic) {
-		printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
-				"broken in this configuration.\n");
-		return -ENODEV;
-	}
-#endif
-	switch (c->x86_model) {
-	case 6 ... 9:
-		return cpufreq_register_driver(&longhaul_driver);
-	case 10:
-		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
-	default:
-		;
-	}
-
-	return -ENODEV;
-}
-
-
-static void __exit longhaul_exit(void)
-{
-	int i;
-
-	for (i = 0; i < numscales; i++) {
-		if (mults[i] == maxmult) {
-			longhaul_setstate(i);
-			break;
-		}
-	}
-
-	cpufreq_unregister_driver(&longhaul_driver);
-	kfree(longhaul_table);
-}
-
-/* Even if BIOS is exporting ACPI C3 state, and it is used
- * with success when CPU is idle, this state doesn't
- * trigger frequency transition in some cases. */
-module_param(disable_acpi_c3, int, 0644);
-MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
-/* Change CPU voltage with frequency. Very useful to save
- * power, but most VIA C3 processors aren't supporting it. */
-module_param(scale_voltage, int, 0644);
-MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
-/* Force revision key to 0 for processors which doesn't
- * support voltage scaling, but are introducing itself as
- * such. */
-module_param(revid_errata, int, 0644);
-MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
-
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
-MODULE_LICENSE("GPL");
-
-late_initcall(longhaul_init);
-module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
deleted file mode 100644
index cbf48fbca881..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- *  longhaul.h
- *  (C) 2003 Dave Jones.
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  VIA-specific information
- */
-
-union msr_bcr2 {
-	struct {
-		unsigned Reseved:19,	// 18:0
-		ESOFTBF:1,		// 19
-		Reserved2:3,		// 22:20
-		CLOCKMUL:4,		// 26:23
-		Reserved3:5;		// 31:27
-	} bits;
-	unsigned long val;
-};
-
-union msr_longhaul {
-	struct {
-		unsigned RevisionID:4,	// 3:0
-		RevisionKey:4,		// 7:4
-		EnableSoftBusRatio:1,	// 8
-		EnableSoftVID:1,	// 9
-		EnableSoftBSEL:1,	// 10
-		Reserved:3,		// 11:13
-		SoftBusRatio4:1,	// 14
-		VRMRev:1,		// 15
-		SoftBusRatio:4,		// 19:16
-		SoftVID:5,		// 24:20
-		Reserved2:3,		// 27:25
-		SoftBSEL:2,		// 29:28
-		Reserved3:2,		// 31:30
-		MaxMHzBR:4,		// 35:32
-		MaximumVID:5,		// 40:36
-		MaxMHzFSB:2,		// 42:41
-		MaxMHzBR4:1,		// 43
-		Reserved4:4,		// 47:44
-		MinMHzBR:4,		// 51:48
-		MinimumVID:5,		// 56:52
-		MinMHzFSB:2,		// 58:57
-		MinMHzBR4:1,		// 59
-		Reserved5:4;		// 63:60
-	} bits;
-	unsigned long long val;
-};
-
-/*
- * Clock ratio tables. Div/Mod by 10 to get ratio.
- * The eblcr values specify the ratio read from the CPU.
- * The mults values specify what to write to the CPU.
- */
-
-/*
- * VIA C3 Samuel 1  & Samuel 2 (stepping 0)
- */
-static const int __cpuinitdata samuel1_mults[16] = {
-	-1, /* 0000 -> RESERVED */
-	30, /* 0001 ->  3.0x */
-	40, /* 0010 ->  4.0x */
-	-1, /* 0011 -> RESERVED */
-	-1, /* 0100 -> RESERVED */
-	35, /* 0101 ->  3.5x */
-	45, /* 0110 ->  4.5x */
-	55, /* 0111 ->  5.5x */
-	60, /* 1000 ->  6.0x */
-	70, /* 1001 ->  7.0x */
-	80, /* 1010 ->  8.0x */
-	50, /* 1011 ->  5.0x */
-	65, /* 1100 ->  6.5x */
-	75, /* 1101 ->  7.5x */
-	-1, /* 1110 -> RESERVED */
-	-1, /* 1111 -> RESERVED */
-};
-
-static const int __cpuinitdata samuel1_eblcr[16] = {
-	50, /* 0000 -> RESERVED */
-	30, /* 0001 ->  3.0x */
-	40, /* 0010 ->  4.0x */
-	-1, /* 0011 -> RESERVED */
-	55, /* 0100 ->  5.5x */
-	35, /* 0101 ->  3.5x */
-	45, /* 0110 ->  4.5x */
-	-1, /* 0111 -> RESERVED */
-	-1, /* 1000 -> RESERVED */
-	70, /* 1001 ->  7.0x */
-	80, /* 1010 ->  8.0x */
-	60, /* 1011 ->  6.0x */
-	-1, /* 1100 -> RESERVED */
-	75, /* 1101 ->  7.5x */
-	-1, /* 1110 -> RESERVED */
-	65, /* 1111 ->  6.5x */
-};
-
-/*
- * VIA C3 Samuel2 Stepping 1->15
- */
-static const int __cpuinitdata samuel2_eblcr[16] = {
-	50,  /* 0000 ->  5.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	110, /* 0111 -> 11.0x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	130, /* 1110 -> 13.0x */
-	65,  /* 1111 ->  6.5x */
-};
-
-/*
- * VIA C3 Ezra
- */
-static const int __cpuinitdata ezra_mults[16] = {
-	100, /* 0000 -> 10.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	90,  /* 0011 ->  9.0x */
-	95,  /* 0100 ->  9.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	55,  /* 0111 ->  5.5x */
-	60,  /* 1000 ->  6.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	50,  /* 1011 ->  5.0x */
-	65,  /* 1100 ->  6.5x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	120, /* 1111 -> 12.0x */
-};
-
-static const int __cpuinitdata ezra_eblcr[16] = {
-	50,  /* 0000 ->  5.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	95,  /* 0111 ->  9.5x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	65,  /* 1111 ->  6.5x */
-};
-
-/*
- * VIA C3 (Ezra-T) [C5M].
- */
-static const int __cpuinitdata ezrat_mults[32] = {
-	100, /* 0000 -> 10.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	90,  /* 0011 ->  9.0x */
-	95,  /* 0100 ->  9.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	55,  /* 0111 ->  5.5x */
-	60,  /* 1000 ->  6.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	50,  /* 1011 ->  5.0x */
-	65,  /* 1100 ->  6.5x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	120, /* 1111 ->  12.0x */
-
-	-1,  /* 0000 -> RESERVED (10.0x) */
-	110, /* 0001 -> 11.0x */
-	-1, /* 0010 -> 12.0x */
-	-1,  /* 0011 -> RESERVED (9.0x)*/
-	105, /* 0100 -> 10.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	135, /* 0111 -> 13.5x */
-	140, /* 1000 -> 14.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	130, /* 1011 -> 13.0x */
-	145, /* 1100 -> 14.5x */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	-1,  /* 1111 -> RESERVED (12.0x) */
-};
-
-static const int __cpuinitdata ezrat_eblcr[32] = {
-	50,  /* 0000 ->  5.0x */
-	30,  /* 0001 ->  3.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	35,  /* 0101 ->  3.5x */
-	45,  /* 0110 ->  4.5x */
-	95,  /* 0111 ->  9.5x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	65,  /* 1111 ->  6.5x */
-
-	-1,  /* 0000 -> RESERVED (9.0x) */
-	110, /* 0001 -> 11.0x */
-	120, /* 0010 -> 12.0x */
-	-1,  /* 0011 -> RESERVED (10.0x)*/
-	135, /* 0100 -> 13.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	105, /* 0111 -> 10.5x */
-	130, /* 1000 -> 13.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	140, /* 1011 -> 14.0x */
-	-1,  /* 1100 -> RESERVED (12.0x) */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	145, /* 1111 -> 14.5x */
-};
-
-/*
- * VIA C3 Nehemiah */
-
-static const int __cpuinitdata nehemiah_mults[32] = {
-	100, /* 0000 -> 10.0x */
-	-1, /* 0001 -> 16.0x */
-	40,  /* 0010 ->  4.0x */
-	90,  /* 0011 ->  9.0x */
-	95,  /* 0100 ->  9.5x */
-	-1,  /* 0101 ->  RESERVED */
-	45,  /* 0110 ->  4.5x */
-	55,  /* 0111 ->  5.5x */
-	60,  /* 1000 ->  6.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	50,  /* 1011 ->  5.0x */
-	65,  /* 1100 ->  6.5x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	120, /* 1111 -> 12.0x */
-	-1, /* 0000 -> 10.0x */
-	110, /* 0001 -> 11.0x */
-	-1, /* 0010 -> 12.0x */
-	-1,  /* 0011 ->  9.0x */
-	105, /* 0100 -> 10.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	135, /* 0111 -> 13.5x */
-	140, /* 1000 -> 14.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	130, /* 1011 -> 13.0x */
-	145, /* 1100 -> 14.5x */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	-1, /* 1111 -> 12.0x */
-};
-
-static const int __cpuinitdata nehemiah_eblcr[32] = {
-	50,  /* 0000 ->  5.0x */
-	160, /* 0001 -> 16.0x */
-	40,  /* 0010 ->  4.0x */
-	100, /* 0011 -> 10.0x */
-	55,  /* 0100 ->  5.5x */
-	-1,  /* 0101 ->  RESERVED */
-	45,  /* 0110 ->  4.5x */
-	95,  /* 0111 ->  9.5x */
-	90,  /* 1000 ->  9.0x */
-	70,  /* 1001 ->  7.0x */
-	80,  /* 1010 ->  8.0x */
-	60,  /* 1011 ->  6.0x */
-	120, /* 1100 -> 12.0x */
-	75,  /* 1101 ->  7.5x */
-	85,  /* 1110 ->  8.5x */
-	65,  /* 1111 ->  6.5x */
-	90,  /* 0000 ->  9.0x */
-	110, /* 0001 -> 11.0x */
-	120, /* 0010 -> 12.0x */
-	100, /* 0011 -> 10.0x */
-	135, /* 0100 -> 13.5x */
-	115, /* 0101 -> 11.5x */
-	125, /* 0110 -> 12.5x */
-	105, /* 0111 -> 10.5x */
-	130, /* 1000 -> 13.0x */
-	150, /* 1001 -> 15.0x */
-	160, /* 1010 -> 16.0x */
-	140, /* 1011 -> 14.0x */
-	120, /* 1100 -> 12.0x */
-	155, /* 1101 -> 15.5x */
-	-1,  /* 1110 -> RESERVED (13.0x) */
-	145 /* 1111 -> 14.5x */
-};
-
-/*
- * Voltage scales. Div/Mod by 1000 to get actual voltage.
- * Which scale to use depends on the VRM type in use.
- */
-
-struct mV_pos {
-	unsigned short mV;
-	unsigned short pos;
-};
-
-static const struct mV_pos __cpuinitdata vrm85_mV[32] = {
-	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
-	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
-	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
-	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
-	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
-	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
-	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
-	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
-};
-
-static const unsigned char __cpuinitdata mV_vrm85[32] = {
-	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
-	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
-	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
-	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
-};
-
-static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {
-	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
-	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
-	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
-	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
-	{975, 15},	{950, 14},	{925, 13},	{900, 12},
-	{875, 11},	{850, 10},	{825, 9},	{800, 8},
-	{775, 7},	{750, 6},	{725, 5},	{700, 4},
-	{675, 3},	{650, 2},	{625, 1},	{600, 0}
-};
-
-static const unsigned char __cpuinitdata mV_mobilevrm[32] = {
-	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
-	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
-	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
-	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
-};
-
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
deleted file mode 100644
index 34ea359b370e..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/timex.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-
-static struct cpufreq_driver	longrun_driver;
-
-/**
- * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
- * values into per cent values. In TMTA microcode, the following is valid:
- * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
- */
-static unsigned int longrun_low_freq, longrun_high_freq;
-
-
-/**
- * longrun_get_policy - get the current LongRun policy
- * @policy: struct cpufreq_policy where current policy is written into
- *
- * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
- * and MSR_TMTA_LONGRUN_CTRL
- */
-static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
-{
-	u32 msr_lo, msr_hi;
-
-	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
-	if (msr_lo & 0x01)
-		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
-	else
-		policy->policy = CPUFREQ_POLICY_POWERSAVE;
-
-	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
-	msr_lo &= 0x0000007F;
-	msr_hi &= 0x0000007F;
-
-	if (longrun_high_freq <= longrun_low_freq) {
-		/* Assume degenerate Longrun table */
-		policy->min = policy->max = longrun_high_freq;
-	} else {
-		policy->min = longrun_low_freq + msr_lo *
-			((longrun_high_freq - longrun_low_freq) / 100);
-		policy->max = longrun_low_freq + msr_hi *
-			((longrun_high_freq - longrun_low_freq) / 100);
-	}
-	policy->cpu = 0;
-}
-
-
-/**
- * longrun_set_policy - sets a new CPUFreq policy
- * @policy: new policy
- *
- * Sets a new CPUFreq policy on LongRun-capable processors. This function
- * has to be called with cpufreq_driver locked.
- */
-static int longrun_set_policy(struct cpufreq_policy *policy)
-{
-	u32 msr_lo, msr_hi;
-	u32 pctg_lo, pctg_hi;
-
-	if (!policy)
-		return -EINVAL;
-
-	if (longrun_high_freq <= longrun_low_freq) {
-		/* Assume degenerate Longrun table */
-		pctg_lo = pctg_hi = 100;
-	} else {
-		pctg_lo = (policy->min - longrun_low_freq) /
-			((longrun_high_freq - longrun_low_freq) / 100);
-		pctg_hi = (policy->max - longrun_low_freq) /
-			((longrun_high_freq - longrun_low_freq) / 100);
-	}
-
-	if (pctg_hi > 100)
-		pctg_hi = 100;
-	if (pctg_lo > pctg_hi)
-		pctg_lo = pctg_hi;
-
-	/* performance or economy mode */
-	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-	msr_lo &= 0xFFFFFFFE;
-	switch (policy->policy) {
-	case CPUFREQ_POLICY_PERFORMANCE:
-		msr_lo |= 0x00000001;
-		break;
-	case CPUFREQ_POLICY_POWERSAVE:
-		break;
-	}
-	wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
-
-	/* lower and upper boundary */
-	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	msr_lo &= 0xFFFFFF80;
-	msr_hi &= 0xFFFFFF80;
-	msr_lo |= pctg_lo;
-	msr_hi |= pctg_hi;
-	wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-
-	return 0;
-}
-
-
-/**
- * longrun_verify_poliy - verifies a new CPUFreq policy
- * @policy: the policy to verify
- *
- * Validates a new CPUFreq policy. This function has to be called with
- * cpufreq_driver locked.
- */
-static int longrun_verify_policy(struct cpufreq_policy *policy)
-{
-	if (!policy)
-		return -EINVAL;
-
-	policy->cpu = 0;
-	cpufreq_verify_within_limits(policy,
-		policy->cpuinfo.min_freq,
-		policy->cpuinfo.max_freq);
-
-	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
-	    (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
-		return -EINVAL;
-
-	return 0;
-}
-
-static unsigned int longrun_get(unsigned int cpu)
-{
-	u32 eax, ebx, ecx, edx;
-
-	if (cpu)
-		return 0;
-
-	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	pr_debug("cpuid eax is %u\n", eax);
-
-	return eax * 1000;
-}
-
-/**
- * longrun_determine_freqs - determines the lowest and highest possible core frequency
- * @low_freq: an int to put the lowest frequency into
- * @high_freq: an int to put the highest frequency into
- *
- * Determines the lowest and highest possible core frequencies on this CPU.
- * This is necessary to calculate the performance percentage according to
- * TMTA rules:
- * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
- */
-static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
-						      unsigned int *high_freq)
-{
-	u32 msr_lo, msr_hi;
-	u32 save_lo, save_hi;
-	u32 eax, ebx, ecx, edx;
-	u32 try_hi;
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (!low_freq || !high_freq)
-		return -EINVAL;
-
-	if (cpu_has(c, X86_FEATURE_LRTI)) {
-		/* if the LongRun Table Interface is present, the
-		 * detection is a bit easier:
-		 * For minimum frequency, read out the maximum
-		 * level (msr_hi), write that into "currently
-		 * selected level", and read out the frequency.
-		 * For maximum frequency, read out level zero.
-		 */
-		/* minimum */
-		rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
-		wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
-		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
-		*low_freq = msr_lo * 1000; /* to kHz */
-
-		/* maximum */
-		wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
-		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
-		*high_freq = msr_lo * 1000; /* to kHz */
-
-		pr_debug("longrun table interface told %u - %u kHz\n",
-				*low_freq, *high_freq);
-
-		if (*low_freq > *high_freq)
-			*low_freq = *high_freq;
-		return 0;
-	}
-
-	/* set the upper border to the value determined during TSC init */
-	*high_freq = (cpu_khz / 1000);
-	*high_freq = *high_freq * 1000;
-	pr_debug("high frequency is %u kHz\n", *high_freq);
-
-	/* get current borders */
-	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-	save_lo = msr_lo & 0x0000007F;
-	save_hi = msr_hi & 0x0000007F;
-
-	/* if current perf_pctg is larger than 90%, we need to decrease the
-	 * upper limit to make the calculation more accurate.
-	 */
-	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-	/* try decreasing in 10% steps, some processors react only
-	 * on some barrier values */
-	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
-		/* set to 0 to try_hi perf_pctg */
-		msr_lo &= 0xFFFFFF80;
-		msr_hi &= 0xFFFFFF80;
-		msr_hi |= try_hi;
-		wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
-
-		/* read out current core MHz and current perf_pctg */
-		cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
-
-		/* restore values */
-		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
-	}
-	pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
-
-	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
-	 * eqals
-	 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
-	 *
-	 * high_freq * perf_pctg is stored tempoarily into "ebx".
-	 */
-	ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
-
-	if ((ecx > 95) || (ecx == 0) || (eax < ebx))
-		return -EIO;
-
-	edx = ((eax - ebx) * 100) / (100 - ecx);
-	*low_freq = edx * 1000; /* back to kHz */
-
-	pr_debug("low frequency is %u kHz\n", *low_freq);
-
-	if (*low_freq > *high_freq)
-		*low_freq = *high_freq;
-
-	return 0;
-}
-
-
-static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)
-{
-	int result = 0;
-
-	/* capability check */
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* detect low and high frequency */
-	result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
-	if (result)
-		return result;
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.min_freq = longrun_low_freq;
-	policy->cpuinfo.max_freq = longrun_high_freq;
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	longrun_get_policy(policy);
-
-	return 0;
-}
-
-
-static struct cpufreq_driver longrun_driver = {
-	.flags		= CPUFREQ_CONST_LOOPS,
-	.verify		= longrun_verify_policy,
-	.setpolicy	= longrun_set_policy,
-	.get		= longrun_get,
-	.init		= longrun_cpu_init,
-	.name		= "longrun",
-	.owner		= THIS_MODULE,
-};
-
-
-/**
- * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
- *
- * Initializes the LongRun support.
- */
-static int __init longrun_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
-	    !cpu_has(c, X86_FEATURE_LONGRUN))
-		return -ENODEV;
-
-	return cpufreq_register_driver(&longrun_driver);
-}
-
-
-/**
- * longrun_exit - unregisters LongRun support
- */
-static void __exit longrun_exit(void)
-{
-	cpufreq_unregister_driver(&longrun_driver);
-}
-
-
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
-		"Efficeon processors.");
-MODULE_LICENSE("GPL");
-
-module_init(longrun_init);
-module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c
deleted file mode 100644
index 911e193018ae..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/mperf.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-
-#include "mperf.h"
-
-static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
-
-/* Called via smp_call_function_single(), on the target CPU */
-static void read_measured_perf_ctrs(void *_cur)
-{
-	struct aperfmperf *am = _cur;
-
-	get_aperfmperf(am);
-}
-
-/*
- * Return the measured active (C0) frequency on this CPU since last call
- * to this function.
- * Input: cpu number
- * Return: Average CPU frequency in terms of max frequency (zero on error)
- *
- * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
- * over a period of time, while CPU is in C0 state.
- * IA32_MPERF counts at the rate of max advertised frequency
- * IA32_APERF counts at the rate of actual CPU frequency
- * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
- * no meaning should be associated with absolute values of these MSRs.
- */
-unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
-					unsigned int cpu)
-{
-	struct aperfmperf perf;
-	unsigned long ratio;
-	unsigned int retval;
-
-	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
-		return 0;
-
-	ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
-	per_cpu(acfreq_old_perf, cpu) = perf;
-
-	retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h
deleted file mode 100644
index 5dbf2950dc22..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/mperf.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- *  (c) 2010 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- */
-
-unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
-					unsigned int cpu);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
deleted file mode 100644
index 6be3e0760c26..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- *	Pentium 4/Xeon CPU on demand clock modulation/speed scaling
- *	(C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
- *	(C) 2002 Arjan van de Ven <arjanv@redhat.com>
- *	(C) 2002 Tora T. Engstad
- *	All Rights Reserved
- *
- *	This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *      The author(s) of this software shall not be held liable for damages
- *      of any nature resulting due to the use of this software. This
- *      software is provided AS-IS with no warranties.
- *
- *	Date		Errata			Description
- *	20020525	N44, O17	12.5% or 25% DC causes lockup
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpufreq.h>
-#include <linux/cpumask.h>
-#include <linux/timex.h>
-
-#include <asm/processor.h>
-#include <asm/msr.h>
-#include <asm/timer.h>
-
-#include "speedstep-lib.h"
-
-#define PFX	"p4-clockmod: "
-
-/*
- * Duty Cycle (3bits), note DC_DISABLE is not specified in
- * intel docs i just use it to mean disable
- */
-enum {
-	DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
-	DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
-};
-
-#define DC_ENTRIES	8
-
-
-static int has_N44_O17_errata[NR_CPUS];
-static unsigned int stock_freq;
-static struct cpufreq_driver p4clockmod_driver;
-static unsigned int cpufreq_p4_get(unsigned int cpu);
-
-static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
-{
-	u32 l, h;
-
-	if (!cpu_online(cpu) ||
-	    (newstate > DC_DISABLE) || (newstate == DC_RESV))
-		return -EINVAL;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
-
-	if (l & 0x01)
-		pr_debug("CPU#%d currently thermal throttled\n", cpu);
-
-	if (has_N44_O17_errata[cpu] &&
-	    (newstate == DC_25PT || newstate == DC_DFLT))
-		newstate = DC_38PT;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
-	if (newstate == DC_DISABLE) {
-		pr_debug("CPU#%d disabling modulation\n", cpu);
-		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
-	} else {
-		pr_debug("CPU#%d setting duty cycle to %d%%\n",
-			cpu, ((125 * newstate) / 10));
-		/* bits 63 - 5	: reserved
-		 * bit  4	: enable/disable
-		 * bits 3-1	: duty cycle
-		 * bit  0	: reserved
-		 */
-		l = (l & ~14);
-		l = l | (1<<4) | ((newstate & 0x7)<<1);
-		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
-	}
-
-	return 0;
-}
-
-
-static struct cpufreq_frequency_table p4clockmod_table[] = {
-	{DC_RESV, CPUFREQ_ENTRY_INVALID},
-	{DC_DFLT, 0},
-	{DC_25PT, 0},
-	{DC_38PT, 0},
-	{DC_50PT, 0},
-	{DC_64PT, 0},
-	{DC_75PT, 0},
-	{DC_88PT, 0},
-	{DC_DISABLE, 0},
-	{DC_RESV, CPUFREQ_TABLE_END},
-};
-
-
-static int cpufreq_p4_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
-{
-	unsigned int    newstate = DC_RESV;
-	struct cpufreq_freqs freqs;
-	int i;
-
-	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	freqs.old = cpufreq_p4_get(policy->cpu);
-	freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
-
-	if (freqs.new == freqs.old)
-		return 0;
-
-	/* notifiers */
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	/* run on each logical CPU,
-	 * see section 13.15.3 of IA32 Intel Architecture Software
-	 * Developer's Manual, Volume 3
-	 */
-	for_each_cpu(i, policy->cpus)
-		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
-
-	/* notifiers */
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-
-	return 0;
-}
-
-
-static int cpufreq_p4_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
-}
-
-
-static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
-{
-	if (c->x86 == 0x06) {
-		if (cpu_has(c, X86_FEATURE_EST))
-			printk_once(KERN_WARNING PFX "Warning: EST-capable "
-			       "CPU detected. The acpi-cpufreq module offers "
-			       "voltage scaling in addition to frequency "
-			       "scaling. You should use that instead of "
-			       "p4-clockmod, if possible.\n");
-		switch (c->x86_model) {
-		case 0x0E: /* Core */
-		case 0x0F: /* Core Duo */
-		case 0x16: /* Celeron Core */
-		case 0x1C: /* Atom */
-			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-			return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
-		case 0x0D: /* Pentium M (Dothan) */
-			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-			/* fall through */
-		case 0x09: /* Pentium M (Banias) */
-			return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
-		}
-	}
-
-	if (c->x86 != 0xF)
-		return 0;
-
-	/* on P-4s, the TSC runs with constant frequency independent whether
-	 * throttling is active or not. */
-	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
-
-	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
-		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
-		       "The speedstep-ich or acpi cpufreq modules offer "
-		       "voltage scaling in addition of frequency scaling. "
-		       "You should use either one instead of p4-clockmod, "
-		       "if possible.\n");
-		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
-	}
-
-	return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
-}
-
-
-
-static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
-	int cpuid = 0;
-	unsigned int i;
-
-#ifdef CONFIG_SMP
-	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
-#endif
-
-	/* Errata workaround */
-	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
-	switch (cpuid) {
-	case 0x0f07:
-	case 0x0f0a:
-	case 0x0f11:
-	case 0x0f12:
-		has_N44_O17_errata[policy->cpu] = 1;
-		pr_debug("has errata -- disabling low frequencies\n");
-	}
-
-	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
-	    c->x86_model < 2) {
-		/* switch to maximum frequency and measure result */
-		cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
-		recalibrate_cpu_khz();
-	}
-	/* get max frequency */
-	stock_freq = cpufreq_p4_get_frequency(c);
-	if (!stock_freq)
-		return -EINVAL;
-
-	/* table init */
-	for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
-		if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
-			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else
-			p4clockmod_table[i].frequency = (stock_freq * i)/8;
-	}
-	cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
-
-	/* cpuinfo and default policy values */
-
-	/* the transition latency is set to be 1 higher than the maximum
-	 * transition latency of the ondemand governor */
-	policy->cpuinfo.transition_latency = 10000001;
-	policy->cur = stock_freq;
-
-	return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
-}
-
-
-static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static unsigned int cpufreq_p4_get(unsigned int cpu)
-{
-	u32 l, h;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
-
-	if (l & 0x10) {
-		l = l >> 1;
-		l &= 0x7;
-	} else
-		l = DC_DISABLE;
-
-	if (l != DC_DISABLE)
-		return stock_freq * l / 8;
-
-	return stock_freq;
-}
-
-static struct freq_attr *p4clockmod_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver p4clockmod_driver = {
-	.verify		= cpufreq_p4_verify,
-	.target		= cpufreq_p4_target,
-	.init		= cpufreq_p4_cpu_init,
-	.exit		= cpufreq_p4_cpu_exit,
-	.get		= cpufreq_p4_get,
-	.name		= "p4-clockmod",
-	.owner		= THIS_MODULE,
-	.attr		= p4clockmod_attr,
-};
-
-
-static int __init cpufreq_p4_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int ret;
-
-	/*
-	 * THERM_CONTROL is architectural for IA32 now, so
-	 * we can rely on the capability checks
-	 */
-	if (c->x86_vendor != X86_VENDOR_INTEL)
-		return -ENODEV;
-
-	if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
-				!test_cpu_cap(c, X86_FEATURE_ACC))
-		return -ENODEV;
-
-	ret = cpufreq_register_driver(&p4clockmod_driver);
-	if (!ret)
-		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
-				"Modulation available\n");
-
-	return ret;
-}
-
-
-static void __exit cpufreq_p4_exit(void)
-{
-	cpufreq_unregister_driver(&p4clockmod_driver);
-}
-
-
-MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
-MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
-MODULE_LICENSE("GPL");
-
-late_initcall(cpufreq_p4_init);
-module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
deleted file mode 100644
index 7b0603eb0129..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- *  pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
- *
- *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
- *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
- *	Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
- *  INFRINGEMENT. See the GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/compiler.h>
-#include <linux/slab.h>
-
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
-
-#include <acpi/processor.h>
-
-#define PCC_VERSION	"1.10.00"
-#define POLL_LOOPS 	300
-
-#define CMD_COMPLETE 	0x1
-#define CMD_GET_FREQ 	0x0
-#define CMD_SET_FREQ 	0x1
-
-#define BUF_SZ		4
-
-struct pcc_register_resource {
-	u8 descriptor;
-	u16 length;
-	u8 space_id;
-	u8 bit_width;
-	u8 bit_offset;
-	u8 access_size;
-	u64 address;
-} __attribute__ ((packed));
-
-struct pcc_memory_resource {
-	u8 descriptor;
-	u16 length;
-	u8 space_id;
-	u8 resource_usage;
-	u8 type_specific;
-	u64 granularity;
-	u64 minimum;
-	u64 maximum;
-	u64 translation_offset;
-	u64 address_length;
-} __attribute__ ((packed));
-
-static struct cpufreq_driver pcc_cpufreq_driver;
-
-struct pcc_header {
-	u32 signature;
-	u16 length;
-	u8 major;
-	u8 minor;
-	u32 features;
-	u16 command;
-	u16 status;
-	u32 latency;
-	u32 minimum_time;
-	u32 maximum_time;
-	u32 nominal;
-	u32 throttled_frequency;
-	u32 minimum_frequency;
-};
-
-static void __iomem *pcch_virt_addr;
-static struct pcc_header __iomem *pcch_hdr;
-
-static DEFINE_SPINLOCK(pcc_lock);
-
-static struct acpi_generic_address doorbell;
-
-static u64 doorbell_preserve;
-static u64 doorbell_write;
-
-static u8 OSC_UUID[16] = {0x9F, 0x2C, 0x9B, 0x63, 0x91, 0x70, 0x1f, 0x49,
-			  0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
-
-struct pcc_cpu {
-	u32 input_offset;
-	u32 output_offset;
-};
-
-static struct pcc_cpu __percpu *pcc_cpu_info;
-
-static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
-{
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-				     policy->cpuinfo.max_freq);
-	return 0;
-}
-
-static inline void pcc_cmd(void)
-{
-	u64 doorbell_value;
-	int i;
-
-	acpi_read(&doorbell_value, &doorbell);
-	acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
-		   &doorbell);
-
-	for (i = 0; i < POLL_LOOPS; i++) {
-		if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
-			break;
-	}
-}
-
-static inline void pcc_clear_mapping(void)
-{
-	if (pcch_virt_addr)
-		iounmap(pcch_virt_addr);
-	pcch_virt_addr = NULL;
-}
-
-static unsigned int pcc_get_freq(unsigned int cpu)
-{
-	struct pcc_cpu *pcc_cpu_data;
-	unsigned int curr_freq;
-	unsigned int freq_limit;
-	u16 status;
-	u32 input_buffer;
-	u32 output_buffer;
-
-	spin_lock(&pcc_lock);
-
-	pr_debug("get: get_freq for CPU %d\n", cpu);
-	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
-
-	input_buffer = 0x1;
-	iowrite32(input_buffer,
-			(pcch_virt_addr + pcc_cpu_data->input_offset));
-	iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
-
-	pcc_cmd();
-
-	output_buffer =
-		ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
-
-	/* Clear the input buffer - we are done with the current command */
-	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
-
-	status = ioread16(&pcch_hdr->status);
-	if (status != CMD_COMPLETE) {
-		pr_debug("get: FAILED: for CPU %d, status is %d\n",
-			cpu, status);
-		goto cmd_incomplete;
-	}
-	iowrite16(0, &pcch_hdr->status);
-	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
-			/ 100) * 1000);
-
-	pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
-		"0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
-		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
-		output_buffer, curr_freq);
-
-	freq_limit = (output_buffer >> 8) & 0xff;
-	if (freq_limit != 0xff) {
-		pr_debug("get: frequency for cpu %d is being temporarily"
-			" capped at %d\n", cpu, curr_freq);
-	}
-
-	spin_unlock(&pcc_lock);
-	return curr_freq;
-
-cmd_incomplete:
-	iowrite16(0, &pcch_hdr->status);
-	spin_unlock(&pcc_lock);
-	return 0;
-}
-
-static int pcc_cpufreq_target(struct cpufreq_policy *policy,
-			      unsigned int target_freq,
-			      unsigned int relation)
-{
-	struct pcc_cpu *pcc_cpu_data;
-	struct cpufreq_freqs freqs;
-	u16 status;
-	u32 input_buffer;
-	int cpu;
-
-	spin_lock(&pcc_lock);
-	cpu = policy->cpu;
-	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
-
-	pr_debug("target: CPU %d should go to target freq: %d "
-		"(virtual) input_offset is 0x%p\n",
-		cpu, target_freq,
-		(pcch_virt_addr + pcc_cpu_data->input_offset));
-
-	freqs.new = target_freq;
-	freqs.cpu = cpu;
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	input_buffer = 0x1 | (((target_freq * 100)
-			       / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
-	iowrite32(input_buffer,
-			(pcch_virt_addr + pcc_cpu_data->input_offset));
-	iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
-
-	pcc_cmd();
-
-	/* Clear the input buffer - we are done with the current command */
-	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
-
-	status = ioread16(&pcch_hdr->status);
-	if (status != CMD_COMPLETE) {
-		pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
-			cpu, status);
-		goto cmd_incomplete;
-	}
-	iowrite16(0, &pcch_hdr->status);
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
-	spin_unlock(&pcc_lock);
-
-	return 0;
-
-cmd_incomplete:
-	iowrite16(0, &pcch_hdr->status);
-	spin_unlock(&pcc_lock);
-	return -EINVAL;
-}
-
-static int pcc_get_offset(int cpu)
-{
-	acpi_status status;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object *pccp, *offset;
-	struct pcc_cpu *pcc_cpu_data;
-	struct acpi_processor *pr;
-	int ret = 0;
-
-	pr = per_cpu(processors, cpu);
-	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
-
-	status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	pccp = buffer.pointer;
-	if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
-		ret = -ENODEV;
-		goto out_free;
-	};
-
-	offset = &(pccp->package.elements[0]);
-	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	pcc_cpu_data->input_offset = offset->integer.value;
-
-	offset = &(pccp->package.elements[1]);
-	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	pcc_cpu_data->output_offset = offset->integer.value;
-
-	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
-	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
-
-	pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
-		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
-		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
-out_free:
-	kfree(buffer.pointer);
-	return ret;
-}
-
-static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
-{
-	acpi_status status;
-	struct acpi_object_list input;
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	union acpi_object in_params[4];
-	union acpi_object *out_obj;
-	u32 capabilities[2];
-	u32 errors;
-	u32 supported;
-	int ret = 0;
-
-	input.count = 4;
-	input.pointer = in_params;
-	in_params[0].type               = ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length      = 16;
-	in_params[0].buffer.pointer     = OSC_UUID;
-	in_params[1].type               = ACPI_TYPE_INTEGER;
-	in_params[1].integer.value      = 1;
-	in_params[2].type               = ACPI_TYPE_INTEGER;
-	in_params[2].integer.value      = 2;
-	in_params[3].type               = ACPI_TYPE_BUFFER;
-	in_params[3].buffer.length      = 8;
-	in_params[3].buffer.pointer     = (u8 *)&capabilities;
-
-	capabilities[0] = OSC_QUERY_ENABLE;
-	capabilities[1] = 0x1;
-
-	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	if (!output.length)
-		return -ENODEV;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	supported = *((u32 *)(out_obj->buffer.pointer + 4));
-	if (!(supported & 0x1)) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	kfree(output.pointer);
-	capabilities[0] = 0x0;
-	capabilities[1] = 0x1;
-
-	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	if (!output.length)
-		return -ENODEV;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
-	if (errors) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	supported = *((u32 *)(out_obj->buffer.pointer + 4));
-	if (!(supported & 0x1)) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-out_free:
-	kfree(output.pointer);
-	return ret;
-}
-
-static int __init pcc_cpufreq_probe(void)
-{
-	acpi_status status;
-	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
-	struct pcc_memory_resource *mem_resource;
-	struct pcc_register_resource *reg_resource;
-	union acpi_object *out_obj, *member;
-	acpi_handle handle, osc_handle, pcch_handle;
-	int ret = 0;
-
-	status = acpi_get_handle(NULL, "\\_SB", &handle);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	status = acpi_get_handle(handle, "PCCH", &pcch_handle);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	status = acpi_get_handle(handle, "_OSC", &osc_handle);
-	if (ACPI_SUCCESS(status)) {
-		ret = pcc_cpufreq_do_osc(&osc_handle);
-		if (ret)
-			pr_debug("probe: _OSC evaluation did not succeed\n");
-		/* Firmware's use of _OSC is optional */
-		ret = 0;
-	}
-
-	status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_PACKAGE) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	member = &out_obj->package.elements[0];
-	if (member->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
-
-	pr_debug("probe: mem_resource descriptor: 0x%x,"
-		" length: %d, space_id: %d, resource_usage: %d,"
-		" type_specific: %d, granularity: 0x%llx,"
-		" minimum: 0x%llx, maximum: 0x%llx,"
-		" translation_offset: 0x%llx, address_length: 0x%llx\n",
-		mem_resource->descriptor, mem_resource->length,
-		mem_resource->space_id, mem_resource->resource_usage,
-		mem_resource->type_specific, mem_resource->granularity,
-		mem_resource->minimum, mem_resource->maximum,
-		mem_resource->translation_offset,
-		mem_resource->address_length);
-
-	if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-		ret = -ENODEV;
-		goto out_free;
-	}
-
-	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
-					mem_resource->address_length);
-	if (pcch_virt_addr == NULL) {
-		pr_debug("probe: could not map shared mem region\n");
-		goto out_free;
-	}
-	pcch_hdr = pcch_virt_addr;
-
-	pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
-	pr_debug("probe: PCCH header is at physical address: 0x%llx,"
-		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
-		" supported features: 0x%x, command field: 0x%x,"
-		" status field: 0x%x, nominal latency: %d us\n",
-		mem_resource->minimum, ioread32(&pcch_hdr->signature),
-		ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
-		ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
-		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
-		ioread32(&pcch_hdr->latency));
-
-	pr_debug("probe: min time between commands: %d us,"
-		" max time between commands: %d us,"
-		" nominal CPU frequency: %d MHz,"
-		" minimum CPU frequency: %d MHz,"
-		" minimum CPU frequency without throttling: %d MHz\n",
-		ioread32(&pcch_hdr->minimum_time),
-		ioread32(&pcch_hdr->maximum_time),
-		ioread32(&pcch_hdr->nominal),
-		ioread32(&pcch_hdr->throttled_frequency),
-		ioread32(&pcch_hdr->minimum_frequency));
-
-	member = &out_obj->package.elements[1];
-	if (member->type != ACPI_TYPE_BUFFER) {
-		ret = -ENODEV;
-		goto pcch_free;
-	}
-
-	reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
-
-	doorbell.space_id = reg_resource->space_id;
-	doorbell.bit_width = reg_resource->bit_width;
-	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
-	doorbell.address = reg_resource->address;
-
-	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
-		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
-		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
-		doorbell.access_width, reg_resource->address);
-
-	member = &out_obj->package.elements[2];
-	if (member->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto pcch_free;
-	}
-
-	doorbell_preserve = member->integer.value;
-
-	member = &out_obj->package.elements[3];
-	if (member->type != ACPI_TYPE_INTEGER) {
-		ret = -ENODEV;
-		goto pcch_free;
-	}
-
-	doorbell_write = member->integer.value;
-
-	pr_debug("probe: doorbell_preserve: 0x%llx,"
-		" doorbell_write: 0x%llx\n",
-		doorbell_preserve, doorbell_write);
-
-	pcc_cpu_info = alloc_percpu(struct pcc_cpu);
-	if (!pcc_cpu_info) {
-		ret = -ENOMEM;
-		goto pcch_free;
-	}
-
-	printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
-	       " limits: %d MHz, %d MHz\n", PCC_VERSION,
-	       ioread32(&pcch_hdr->minimum_frequency),
-	       ioread32(&pcch_hdr->nominal));
-	kfree(output.pointer);
-	return ret;
-pcch_free:
-	pcc_clear_mapping();
-out_free:
-	kfree(output.pointer);
-	return ret;
-}
-
-static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int cpu = policy->cpu;
-	unsigned int result = 0;
-
-	if (!pcch_virt_addr) {
-		result = -1;
-		goto out;
-	}
-
-	result = pcc_get_offset(cpu);
-	if (result) {
-		pr_debug("init: PCCP evaluation failed\n");
-		goto out;
-	}
-
-	policy->max = policy->cpuinfo.max_freq =
-		ioread32(&pcch_hdr->nominal) * 1000;
-	policy->min = policy->cpuinfo.min_freq =
-		ioread32(&pcch_hdr->minimum_frequency) * 1000;
-	policy->cur = pcc_get_freq(cpu);
-
-	if (!policy->cur) {
-		pr_debug("init: Unable to get current CPU frequency\n");
-		result = -EINVAL;
-		goto out;
-	}
-
-	pr_debug("init: policy->max is %d, policy->min is %d\n",
-		policy->max, policy->min);
-out:
-	return result;
-}
-
-static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
-{
-	return 0;
-}
-
-static struct cpufreq_driver pcc_cpufreq_driver = {
-	.flags = CPUFREQ_CONST_LOOPS,
-	.get = pcc_get_freq,
-	.verify = pcc_cpufreq_verify,
-	.target = pcc_cpufreq_target,
-	.init = pcc_cpufreq_cpu_init,
-	.exit = pcc_cpufreq_cpu_exit,
-	.name = "pcc-cpufreq",
-	.owner = THIS_MODULE,
-};
-
-static int __init pcc_cpufreq_init(void)
-{
-	int ret;
-
-	if (acpi_disabled)
-		return 0;
-
-	ret = pcc_cpufreq_probe();
-	if (ret) {
-		pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
-		return ret;
-	}
-
-	ret = cpufreq_register_driver(&pcc_cpufreq_driver);
-
-	return ret;
-}
-
-static void __exit pcc_cpufreq_exit(void)
-{
-	cpufreq_unregister_driver(&pcc_cpufreq_driver);
-
-	pcc_clear_mapping();
-
-	free_percpu(pcc_cpu_info);
-}
-
-MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
-MODULE_VERSION(PCC_VERSION);
-MODULE_DESCRIPTION("Processor Clocking Control interface driver");
-MODULE_LICENSE("GPL");
-
-late_initcall(pcc_cpufreq_init);
-module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
deleted file mode 100644
index b3379d6a5c57..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
- *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne Pänkälä,
- *                 Dominik Brodowski.
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#include <asm/msr.h>
-
-#define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
-					   as it is unused */
-
-#define PFX "powernow-k6: "
-static unsigned int                     busfreq;   /* FSB, in 10 kHz */
-static unsigned int                     max_multiplier;
-
-
-/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
-static struct cpufreq_frequency_table clock_ratio[] = {
-	{45,  /* 000 -> 4.5x */ 0},
-	{50,  /* 001 -> 5.0x */ 0},
-	{40,  /* 010 -> 4.0x */ 0},
-	{55,  /* 011 -> 5.5x */ 0},
-	{20,  /* 100 -> 2.0x */ 0},
-	{30,  /* 101 -> 3.0x */ 0},
-	{60,  /* 110 -> 6.0x */ 0},
-	{35,  /* 111 -> 3.5x */ 0},
-	{0, CPUFREQ_TABLE_END}
-};
-
-
-/**
- * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
- *
- *   Returns the current setting of the frequency multiplier. Core clock
- * speed is frequency of the Front-Side Bus multiplied with this value.
- */
-static int powernow_k6_get_cpu_multiplier(void)
-{
-	u64 invalue = 0;
-	u32 msrval;
-
-	msrval = POWERNOW_IOPORT + 0x1;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-	invalue = inl(POWERNOW_IOPORT + 0x8);
-	msrval = POWERNOW_IOPORT + 0x0;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
-
-	return clock_ratio[(invalue >> 5)&7].index;
-}
-
-
-/**
- * powernow_k6_set_state - set the PowerNow! multiplier
- * @best_i: clock_ratio[best_i] is the target multiplier
- *
- *   Tries to change the PowerNow! multiplier
- */
-static void powernow_k6_set_state(unsigned int best_i)
-{
-	unsigned long outvalue = 0, invalue = 0;
-	unsigned long msrval;
-	struct cpufreq_freqs freqs;
-
-	if (clock_ratio[best_i].index > max_multiplier) {
-		printk(KERN_ERR PFX "invalid target frequency\n");
-		return;
-	}
-
-	freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
-	freqs.new = busfreq * clock_ratio[best_i].index;
-	freqs.cpu = 0; /* powernow-k6.c is UP only driver */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* we now need to transform best_i to the BVC format, see AMD#23446 */
-
-	outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
-
-	msrval = POWERNOW_IOPORT + 0x1;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
-	invalue = inl(POWERNOW_IOPORT + 0x8);
-	invalue = invalue & 0xf;
-	outvalue = outvalue | invalue;
-	outl(outvalue , (POWERNOW_IOPORT + 0x8));
-	msrval = POWERNOW_IOPORT + 0x0;
-	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	return;
-}
-
-
-/**
- * powernow_k6_verify - verifies a new CPUfreq policy
- * @policy: new policy
- *
- * Policy must be within lowest and highest possible CPU Frequency,
- * and at least one possible state must be within min and max.
- */
-static int powernow_k6_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
-}
-
-
-/**
- * powernow_k6_setpolicy - sets a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * sets a new CPUFreq policy
- */
-static int powernow_k6_target(struct cpufreq_policy *policy,
-			       unsigned int target_freq,
-			       unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	powernow_k6_set_state(newstate);
-
-	return 0;
-}
-
-
-static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
-{
-	unsigned int i, f;
-	int result;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	/* get frequencies */
-	max_multiplier = powernow_k6_get_cpu_multiplier();
-	busfreq = cpu_khz / max_multiplier;
-
-	/* table init */
-	for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
-		f = clock_ratio[i].index;
-		if (f > max_multiplier)
-			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
-		else
-			clock_ratio[i].frequency = busfreq * f;
-	}
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = 200000;
-	policy->cur = busfreq * max_multiplier;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
-
-	return 0;
-}
-
-
-static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
-{
-	unsigned int i;
-	for (i = 0; i < 8; i++) {
-		if (i == max_multiplier)
-			powernow_k6_set_state(i);
-	}
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static unsigned int powernow_k6_get(unsigned int cpu)
-{
-	unsigned int ret;
-	ret = (busfreq * powernow_k6_get_cpu_multiplier());
-	return ret;
-}
-
-static struct freq_attr *powernow_k6_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver powernow_k6_driver = {
-	.verify		= powernow_k6_verify,
-	.target		= powernow_k6_target,
-	.init		= powernow_k6_cpu_init,
-	.exit		= powernow_k6_cpu_exit,
-	.get		= powernow_k6_get,
-	.name		= "powernow-k6",
-	.owner		= THIS_MODULE,
-	.attr		= powernow_k6_attr,
-};
-
-
-/**
- * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
- *
- *   Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
- * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
- * on success.
- */
-static int __init powernow_k6_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
-		((c->x86_model != 12) && (c->x86_model != 13)))
-		return -ENODEV;
-
-	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
-		printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
-		return -EIO;
-	}
-
-	if (cpufreq_register_driver(&powernow_k6_driver)) {
-		release_region(POWERNOW_IOPORT, 16);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
-/**
- * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
- *
- *   Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
- */
-static void __exit powernow_k6_exit(void)
-{
-	cpufreq_unregister_driver(&powernow_k6_driver);
-	release_region(POWERNOW_IOPORT, 16);
-}
-
-
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
-MODULE_LICENSE("GPL");
-
-module_init(powernow_k6_init);
-module_exit(powernow_k6_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
deleted file mode 100644
index d71d9f372359..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ /dev/null
@@ -1,747 +0,0 @@
-/*
- *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones on behalf of SuSE Labs.
- *  (C) 2003-2004 Dave Jones <davej@redhat.com>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon datasheets & sample CPUs kindly provided by AMD.
- *
- * Errata 5:
- *  CPU may fail to execute a FID/VID change in presence of interrupt.
- *  - We cli/sti on stepping A0 CPUs around the FID/VID transition.
- * Errata 15:
- *  CPU with half frequency multipliers may hang upon wakeup from disconnect.
- *  - We disable half multipliers if ACPI is used on A0 stepping CPUs.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/dmi.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#include <asm/timer.h>		/* Needed for recalibrate_cpu_khz() */
-#include <asm/msr.h>
-#include <asm/system.h>
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-#endif
-
-#include "powernow-k7.h"
-
-#define PFX "powernow: "
-
-
-struct psb_s {
-	u8 signature[10];
-	u8 tableversion;
-	u8 flags;
-	u16 settlingtime;
-	u8 reserved1;
-	u8 numpst;
-};
-
-struct pst_s {
-	u32 cpuid;
-	u8 fsbspeed;
-	u8 maxfid;
-	u8 startvid;
-	u8 numpstates;
-};
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-union powernow_acpi_control_t {
-	struct {
-		unsigned long fid:5,
-			vid:5,
-			sgtc:20,
-			res1:2;
-	} bits;
-	unsigned long val;
-};
-#endif
-
-/* divide by 1000 to get VCore voltage in V. */
-static const int mobile_vid_table[32] = {
-    2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-    1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
-    1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-    1075, 1050, 1025, 1000, 975, 950, 925, 0,
-};
-
-/* divide by 10 to get FID. */
-static const int fid_codes[32] = {
-    110, 115, 120, 125, 50, 55, 60, 65,
-    70, 75, 80, 85, 90, 95, 100, 105,
-    30, 190, 40, 200, 130, 135, 140, 210,
-    150, 225, 160, 165, 170, 180, -1, -1,
-};
-
-/* This parameter is used in order to force ACPI instead of legacy method for
- * configuration purpose.
- */
-
-static int acpi_force;
-
-static struct cpufreq_frequency_table *powernow_table;
-
-static unsigned int can_scale_bus;
-static unsigned int can_scale_vid;
-static unsigned int minimum_speed = -1;
-static unsigned int maximum_speed;
-static unsigned int number_scales;
-static unsigned int fsb;
-static unsigned int latency;
-static char have_a0;
-
-static int check_fsb(unsigned int fsbspeed)
-{
-	int delta;
-	unsigned int f = fsb / 1000;
-
-	delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
-	return delta < 5;
-}
-
-static int check_powernow(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	unsigned int maxei, eax, ebx, ecx, edx;
-
-	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
-#ifdef MODULE
-		printk(KERN_INFO PFX "This module only works with "
-				"AMD K7 CPUs\n");
-#endif
-		return 0;
-	}
-
-	/* Get maximum capabilities */
-	maxei = cpuid_eax(0x80000000);
-	if (maxei < 0x80000007) {	/* Any powernow info ? */
-#ifdef MODULE
-		printk(KERN_INFO PFX "No powernow capabilities detected\n");
-#endif
-		return 0;
-	}
-
-	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
-		printk(KERN_INFO PFX "K7 660[A0] core detected, "
-				"enabling errata workarounds\n");
-		have_a0 = 1;
-	}
-
-	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
-
-	/* Check we can actually do something before we say anything.*/
-	if (!(edx & (1 << 1 | 1 << 2)))
-		return 0;
-
-	printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
-
-	if (edx & 1 << 1) {
-		printk("frequency");
-		can_scale_bus = 1;
-	}
-
-	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
-		printk(" and ");
-
-	if (edx & 1 << 2) {
-		printk("voltage");
-		can_scale_vid = 1;
-	}
-
-	printk(".\n");
-	return 1;
-}
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-static void invalidate_entry(unsigned int entry)
-{
-	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
-}
-#endif
-
-static int get_ranges(unsigned char *pst)
-{
-	unsigned int j;
-	unsigned int speed;
-	u8 fid, vid;
-
-	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
-				(number_scales + 1)), GFP_KERNEL);
-	if (!powernow_table)
-		return -ENOMEM;
-
-	for (j = 0 ; j < number_scales; j++) {
-		fid = *pst++;
-
-		powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
-		powernow_table[j].index = fid; /* lower 8 bits */
-
-		speed = powernow_table[j].frequency;
-
-		if ((fid_codes[fid] % 10) == 5) {
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-			if (have_a0 == 1)
-				invalidate_entry(j);
-#endif
-		}
-
-		if (speed < minimum_speed)
-			minimum_speed = speed;
-		if (speed > maximum_speed)
-			maximum_speed = speed;
-
-		vid = *pst++;
-		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
-
-		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
-			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
-			 fid_codes[fid] % 10, speed/1000, vid,
-			 mobile_vid_table[vid]/1000,
-			 mobile_vid_table[vid]%1000);
-	}
-	powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
-	powernow_table[number_scales].index = 0;
-
-	return 0;
-}
-
-
-static void change_FID(int fid)
-{
-	union msr_fidvidctl fidvidctl;
-
-	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	if (fidvidctl.bits.FID != fid) {
-		fidvidctl.bits.SGTC = latency;
-		fidvidctl.bits.FID = fid;
-		fidvidctl.bits.VIDC = 0;
-		fidvidctl.bits.FIDC = 1;
-		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	}
-}
-
-
-static void change_VID(int vid)
-{
-	union msr_fidvidctl fidvidctl;
-
-	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	if (fidvidctl.bits.VID != vid) {
-		fidvidctl.bits.SGTC = latency;
-		fidvidctl.bits.VID = vid;
-		fidvidctl.bits.FIDC = 0;
-		fidvidctl.bits.VIDC = 1;
-		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
-	}
-}
-
-
-static void change_speed(unsigned int index)
-{
-	u8 fid, vid;
-	struct cpufreq_freqs freqs;
-	union msr_fidvidstatus fidvidstatus;
-	int cfid;
-
-	/* fid are the lower 8 bits of the index we stored into
-	 * the cpufreq frequency table in powernow_decode_bios,
-	 * vid are the upper 8 bits.
-	 */
-
-	fid = powernow_table[index].index & 0xFF;
-	vid = (powernow_table[index].index & 0xFF00) >> 8;
-
-	freqs.cpu = 0;
-
-	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-	cfid = fidvidstatus.bits.CFID;
-	freqs.old = fsb * fid_codes[cfid] / 10;
-
-	freqs.new = powernow_table[index].frequency;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	/* Now do the magic poking into the MSRs.  */
-
-	if (have_a0 == 1)	/* A0 errata 5 */
-		local_irq_disable();
-
-	if (freqs.old > freqs.new) {
-		/* Going down, so change FID first */
-		change_FID(fid);
-		change_VID(vid);
-	} else {
-		/* Going up, so change VID first */
-		change_VID(vid);
-		change_FID(fid);
-	}
-
-
-	if (have_a0 == 1)
-		local_irq_enable();
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-}
-
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-
-static struct acpi_processor_performance *acpi_processor_perf;
-
-static int powernow_acpi_init(void)
-{
-	int i;
-	int retval = 0;
-	union powernow_acpi_control_t pc;
-
-	if (acpi_processor_perf != NULL && powernow_table != NULL) {
-		retval = -EINVAL;
-		goto err0;
-	}
-
-	acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
-				      GFP_KERNEL);
-	if (!acpi_processor_perf) {
-		retval = -ENOMEM;
-		goto err0;
-	}
-
-	if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
-								GFP_KERNEL)) {
-		retval = -ENOMEM;
-		goto err05;
-	}
-
-	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
-		retval = -EIO;
-		goto err1;
-	}
-
-	if (acpi_processor_perf->control_register.space_id !=
-			ACPI_ADR_SPACE_FIXED_HARDWARE) {
-		retval = -ENODEV;
-		goto err2;
-	}
-
-	if (acpi_processor_perf->status_register.space_id !=
-			ACPI_ADR_SPACE_FIXED_HARDWARE) {
-		retval = -ENODEV;
-		goto err2;
-	}
-
-	number_scales = acpi_processor_perf->state_count;
-
-	if (number_scales < 2) {
-		retval = -ENODEV;
-		goto err2;
-	}
-
-	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
-				(number_scales + 1)), GFP_KERNEL);
-	if (!powernow_table) {
-		retval = -ENOMEM;
-		goto err2;
-	}
-
-	pc.val = (unsigned long) acpi_processor_perf->states[0].control;
-	for (i = 0; i < number_scales; i++) {
-		u8 fid, vid;
-		struct acpi_processor_px *state =
-			&acpi_processor_perf->states[i];
-		unsigned int speed, speed_mhz;
-
-		pc.val = (unsigned long) state->control;
-		pr_debug("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
-			 i,
-			 (u32) state->core_frequency,
-			 (u32) state->power,
-			 (u32) state->transition_latency,
-			 (u32) state->control,
-			 pc.bits.sgtc);
-
-		vid = pc.bits.vid;
-		fid = pc.bits.fid;
-
-		powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
-		powernow_table[i].index = fid; /* lower 8 bits */
-		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
-
-		speed = powernow_table[i].frequency;
-		speed_mhz = speed / 1000;
-
-		/* processor_perflib will multiply the MHz value by 1000 to
-		 * get a KHz value (e.g. 1266000). However, powernow-k7 works
-		 * with true KHz values (e.g. 1266768). To ensure that all
-		 * powernow frequencies are available, we must ensure that
-		 * ACPI doesn't restrict them, so we round up the MHz value
-		 * to ensure that perflib's computed KHz value is greater than
-		 * or equal to powernow's KHz value.
-		 */
-		if (speed % 1000 > 0)
-			speed_mhz++;
-
-		if ((fid_codes[fid] % 10) == 5) {
-			if (have_a0 == 1)
-				invalidate_entry(i);
-		}
-
-		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
-			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
-			 fid_codes[fid] % 10, speed_mhz, vid,
-			 mobile_vid_table[vid]/1000,
-			 mobile_vid_table[vid]%1000);
-
-		if (state->core_frequency != speed_mhz) {
-			state->core_frequency = speed_mhz;
-			pr_debug("   Corrected ACPI frequency to %d\n",
-				speed_mhz);
-		}
-
-		if (latency < pc.bits.sgtc)
-			latency = pc.bits.sgtc;
-
-		if (speed < minimum_speed)
-			minimum_speed = speed;
-		if (speed > maximum_speed)
-			maximum_speed = speed;
-	}
-
-	powernow_table[i].frequency = CPUFREQ_TABLE_END;
-	powernow_table[i].index = 0;
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	return 0;
-
-err2:
-	acpi_processor_unregister_performance(acpi_processor_perf, 0);
-err1:
-	free_cpumask_var(acpi_processor_perf->shared_cpu_map);
-err05:
-	kfree(acpi_processor_perf);
-err0:
-	printk(KERN_WARNING PFX "ACPI perflib can not be used on "
-			"this platform\n");
-	acpi_processor_perf = NULL;
-	return retval;
-}
-#else
-static int powernow_acpi_init(void)
-{
-	printk(KERN_INFO PFX "no support for ACPI processor found."
-	       "  Please recompile your kernel with ACPI processor\n");
-	return -EINVAL;
-}
-#endif
-
-static void print_pst_entry(struct pst_s *pst, unsigned int j)
-{
-	pr_debug("PST:%d (@%p)\n", j, pst);
-	pr_debug(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
-		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
-}
-
-static int powernow_decode_bios(int maxfid, int startvid)
-{
-	struct psb_s *psb;
-	struct pst_s *pst;
-	unsigned int i, j;
-	unsigned char *p;
-	unsigned int etuple;
-	unsigned int ret;
-
-	etuple = cpuid_eax(0x80000001);
-
-	for (i = 0xC0000; i < 0xffff0 ; i += 16) {
-
-		p = phys_to_virt(i);
-
-		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
-			pr_debug("Found PSB header at %p\n", p);
-			psb = (struct psb_s *) p;
-			pr_debug("Table version: 0x%x\n", psb->tableversion);
-			if (psb->tableversion != 0x12) {
-				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
-						" supported right now\n");
-				return -ENODEV;
-			}
-
-			pr_debug("Flags: 0x%x\n", psb->flags);
-			if ((psb->flags & 1) == 0)
-				pr_debug("Mobile voltage regulator\n");
-			else
-				pr_debug("Desktop voltage regulator\n");
-
-			latency = psb->settlingtime;
-			if (latency < 100) {
-				printk(KERN_INFO PFX "BIOS set settling time "
-						"to %d microseconds. "
-						"Should be at least 100. "
-						"Correcting.\n", latency);
-				latency = 100;
-			}
-			pr_debug("Settling Time: %d microseconds.\n",
-					psb->settlingtime);
-			pr_debug("Has %d PST tables. (Only dumping ones "
-					"relevant to this CPU).\n",
-					psb->numpst);
-
-			p += sizeof(struct psb_s);
-
-			pst = (struct pst_s *) p;
-
-			for (j = 0; j < psb->numpst; j++) {
-				pst = (struct pst_s *) p;
-				number_scales = pst->numpstates;
-
-				if ((etuple == pst->cpuid) &&
-				    check_fsb(pst->fsbspeed) &&
-				    (maxfid == pst->maxfid) &&
-				    (startvid == pst->startvid)) {
-					print_pst_entry(pst, j);
-					p = (char *)pst + sizeof(struct pst_s);
-					ret = get_ranges(p);
-					return ret;
-				} else {
-					unsigned int k;
-					p = (char *)pst + sizeof(struct pst_s);
-					for (k = 0; k < number_scales; k++)
-						p += 2;
-				}
-			}
-			printk(KERN_INFO PFX "No PST tables match this cpuid "
-					"(0x%x)\n", etuple);
-			printk(KERN_INFO PFX "This is indicative of a broken "
-					"BIOS.\n");
-
-			return -EINVAL;
-		}
-		p++;
-	}
-
-	return -ENODEV;
-}
-
-
-static int powernow_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate;
-
-	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
-				relation, &newstate))
-		return -EINVAL;
-
-	change_speed(newstate);
-
-	return 0;
-}
-
-
-static int powernow_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, powernow_table);
-}
-
-/*
- * We use the fact that the bus frequency is somehow
- * a multiple of 100000/3 khz, then we compute sgtc according
- * to this multiple.
- * That way, we match more how AMD thinks all of that work.
- * We will then get the same kind of behaviour already tested under
- * the "well-known" other OS.
- */
-static int __cpuinit fixup_sgtc(void)
-{
-	unsigned int sgtc;
-	unsigned int m;
-
-	m = fsb / 3333;
-	if ((m % 10) >= 5)
-		m += 5;
-
-	m /= 10;
-
-	sgtc = 100 * m * latency;
-	sgtc = sgtc / 3;
-	if (sgtc > 0xfffff) {
-		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
-		sgtc = 0xfffff;
-	}
-	return sgtc;
-}
-
-static unsigned int powernow_get(unsigned int cpu)
-{
-	union msr_fidvidstatus fidvidstatus;
-	unsigned int cfid;
-
-	if (cpu)
-		return 0;
-	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-	cfid = fidvidstatus.bits.CFID;
-
-	return fsb * fid_codes[cfid] / 10;
-}
-
-
-static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)
-{
-	printk(KERN_WARNING PFX
-		"%s laptop with broken PST tables in BIOS detected.\n",
-		d->ident);
-	printk(KERN_WARNING PFX
-		"You need to downgrade to 3A21 (09/09/2002), or try a newer "
-		"BIOS than 3A71 (01/20/2003)\n");
-	printk(KERN_WARNING PFX
-		"cpufreq scaling has been disabled as a result of this.\n");
-	return 0;
-}
-
-/*
- * Some Athlon laptops have really fucked PST tables.
- * A BIOS update is all that can save them.
- * Mention this, and disable cpufreq.
- */
-static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {
-	{
-		.callback = acer_cpufreq_pst,
-		.ident = "Acer Aspire",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
-			DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
-		},
-	},
-	{ }
-};
-
-static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
-{
-	union msr_fidvidstatus fidvidstatus;
-	int result;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
-
-	recalibrate_cpu_khz();
-
-	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
-	if (!fsb) {
-		printk(KERN_WARNING PFX "can not determine bus frequency\n");
-		return -EINVAL;
-	}
-	pr_debug("FSB: %3dMHz\n", fsb/1000);
-
-	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
-		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
-				"Trying ACPI instead\n");
-		result = powernow_acpi_init();
-	} else {
-		result = powernow_decode_bios(fidvidstatus.bits.MFID,
-				fidvidstatus.bits.SVID);
-		if (result) {
-			printk(KERN_INFO PFX "Trying ACPI perflib\n");
-			maximum_speed = 0;
-			minimum_speed = -1;
-			latency = 0;
-			result = powernow_acpi_init();
-			if (result) {
-				printk(KERN_INFO PFX
-					"ACPI and legacy methods failed\n");
-			}
-		} else {
-			/* SGTC use the bus clock as timer */
-			latency = fixup_sgtc();
-			printk(KERN_INFO PFX "SGTC: %d\n", latency);
-		}
-	}
-
-	if (result)
-		return result;
-
-	printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
-				minimum_speed/1000, maximum_speed/1000);
-
-	policy->cpuinfo.transition_latency =
-		cpufreq_scale(2000000UL, fsb, latency);
-
-	policy->cur = powernow_get(0);
-
-	cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
-
-	return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
-}
-
-static int powernow_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-	if (acpi_processor_perf) {
-		acpi_processor_unregister_performance(acpi_processor_perf, 0);
-		free_cpumask_var(acpi_processor_perf->shared_cpu_map);
-		kfree(acpi_processor_perf);
-	}
-#endif
-
-	kfree(powernow_table);
-	return 0;
-}
-
-static struct freq_attr *powernow_table_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver powernow_driver = {
-	.verify		= powernow_verify,
-	.target		= powernow_target,
-	.get		= powernow_get,
-#ifdef CONFIG_X86_POWERNOW_K7_ACPI
-	.bios_limit	= acpi_processor_get_bios_limit,
-#endif
-	.init		= powernow_cpu_init,
-	.exit		= powernow_cpu_exit,
-	.name		= "powernow-k7",
-	.owner		= THIS_MODULE,
-	.attr		= powernow_table_attr,
-};
-
-static int __init powernow_init(void)
-{
-	if (check_powernow() == 0)
-		return -ENODEV;
-	return cpufreq_register_driver(&powernow_driver);
-}
-
-
-static void __exit powernow_exit(void)
-{
-	cpufreq_unregister_driver(&powernow_driver);
-}
-
-module_param(acpi_force,  int, 0444);
-MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
-
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
-MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
-MODULE_LICENSE("GPL");
-
-late_initcall(powernow_init);
-module_exit(powernow_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
deleted file mode 100644
index 35fb4eaf6e1c..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  (C) 2003 Dave Jones.
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  AMD-specific information
- *
- */
-
-union msr_fidvidctl {
-	struct {
-		unsigned FID:5,			// 4:0
-		reserved1:3,	// 7:5
-		VID:5,			// 12:8
-		reserved2:3,	// 15:13
-		FIDC:1,			// 16
-		VIDC:1,			// 17
-		reserved3:2,	// 19:18
-		FIDCHGRATIO:1,	// 20
-		reserved4:11,	// 31-21
-		SGTC:20,		// 32:51
-		reserved5:12;	// 63:52
-	} bits;
-	unsigned long long val;
-};
-
-union msr_fidvidstatus {
-	struct {
-		unsigned CFID:5,			// 4:0
-		reserved1:3,	// 7:5
-		SFID:5,			// 12:8
-		reserved2:3,	// 15:13
-		MFID:5,			// 20:16
-		reserved3:11,	// 31:21
-		CVID:5,			// 36:32
-		reserved4:3,	// 39:37
-		SVID:5,			// 44:40
-		reserved5:3,	// 47:45
-		MVID:5,			// 52:48
-		reserved6:11;	// 63:53
-	} bits;
-	unsigned long long val;
-};
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
deleted file mode 100644
index 83479b6fb9a1..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ /dev/null
@@ -1,1607 +0,0 @@
-/*
- *   (c) 2003-2010 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- *
- *  Support : mark.langsdorf@amd.com
- *
- *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones on behalf of SuSE Labs
- *  (C) 2004 Dominik Brodowski <linux@brodo.de>
- *  (C) 2004 Pavel Machek <pavel@ucw.cz>
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon datasheets & sample CPUs kindly provided by AMD.
- *
- *  Valuable input gratefully received from Dave Jones, Pavel Machek,
- *  Dominik Brodowski, Jacob Shin, and others.
- *  Originally developed by Paul Devriendt.
- *  Processor information obtained from Chapter 9 (Power and Thermal Management)
- *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
- *  Opteron Processors" available for download from www.amd.com
- *
- *  Tables for specific CPUs can be inferred from
- *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/cpumask.h>
-#include <linux/sched.h>	/* for current / set_cpus_allowed() */
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/msr.h>
-
-#include <linux/acpi.h>
-#include <linux/mutex.h>
-#include <acpi/processor.h>
-
-#define PFX "powernow-k8: "
-#define VERSION "version 2.20.00"
-#include "powernow-k8.h"
-#include "mperf.h"
-
-/* serialize freq changes  */
-static DEFINE_MUTEX(fidvid_mutex);
-
-static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
-
-static int cpu_family = CPU_OPTERON;
-
-/* core performance boost */
-static bool cpb_capable, cpb_enabled;
-static struct msr __percpu *msrs;
-
-static struct cpufreq_driver cpufreq_amd64_driver;
-
-#ifndef CONFIG_SMP
-static inline const struct cpumask *cpu_core_mask(int cpu)
-{
-	return cpumask_of(0);
-}
-#endif
-
-/* Return a frequency in MHz, given an input fid */
-static u32 find_freq_from_fid(u32 fid)
-{
-	return 800 + (fid * 100);
-}
-
-/* Return a frequency in KHz, given an input fid */
-static u32 find_khz_freq_from_fid(u32 fid)
-{
-	return 1000 * find_freq_from_fid(fid);
-}
-
-static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
-		u32 pstate)
-{
-	return data[pstate].frequency;
-}
-
-/* Return the vco fid for an input fid
- *
- * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
- * only from corresponding high fids. This returns "high" fid corresponding to
- * "low" one.
- */
-static u32 convert_fid_to_vco_fid(u32 fid)
-{
-	if (fid < HI_FID_TABLE_BOTTOM)
-		return 8 + (2 * fid);
-	else
-		return fid;
-}
-
-/*
- * Return 1 if the pending bit is set. Unless we just instructed the processor
- * to transition to a new state, seeing this bit set is really bad news.
- */
-static int pending_bit_stuck(void)
-{
-	u32 lo, hi;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		return 0;
-
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
-	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
-}
-
-/*
- * Update the global current fid / vid values from the status msr.
- * Returns 1 on error.
- */
-static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
-{
-	u32 lo, hi;
-	u32 i = 0;
-
-	if (cpu_family == CPU_HW_PSTATE) {
-		rdmsr(MSR_PSTATE_STATUS, lo, hi);
-		i = lo & HW_PSTATE_MASK;
-		data->currpstate = i;
-
-		/*
-		 * a workaround for family 11h erratum 311 might cause
-		 * an "out-of-range Pstate if the core is in Pstate-0
-		 */
-		if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
-			data->currpstate = HW_PSTATE_0;
-
-		return 0;
-	}
-	do {
-		if (i++ > 10000) {
-			pr_debug("detected change pending stuck\n");
-			return 1;
-		}
-		rdmsr(MSR_FIDVID_STATUS, lo, hi);
-	} while (lo & MSR_S_LO_CHANGE_PENDING);
-
-	data->currvid = hi & MSR_S_HI_CURRENT_VID;
-	data->currfid = lo & MSR_S_LO_CURRENT_FID;
-
-	return 0;
-}
-
-/* the isochronous relief time */
-static void count_off_irt(struct powernow_k8_data *data)
-{
-	udelay((1 << data->irt) * 10);
-	return;
-}
-
-/* the voltage stabilization time */
-static void count_off_vst(struct powernow_k8_data *data)
-{
-	udelay(data->vstable * VST_UNITS_20US);
-	return;
-}
-
-/* need to init the control msr to a safe value (for each cpu) */
-static void fidvid_msr_init(void)
-{
-	u32 lo, hi;
-	u8 fid, vid;
-
-	rdmsr(MSR_FIDVID_STATUS, lo, hi);
-	vid = hi & MSR_S_HI_CURRENT_VID;
-	fid = lo & MSR_S_LO_CURRENT_FID;
-	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
-	hi = MSR_C_HI_STP_GNT_BENIGN;
-	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
-	wrmsr(MSR_FIDVID_CTL, lo, hi);
-}
-
-/* write the new fid value along with the other control fields to the msr */
-static int write_new_fid(struct powernow_k8_data *data, u32 fid)
-{
-	u32 lo;
-	u32 savevid = data->currvid;
-	u32 i = 0;
-
-	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
-		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
-		return 1;
-	}
-
-	lo = fid;
-	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
-	lo |= MSR_C_LO_INIT_FID_VID;
-
-	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
-		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
-
-	do {
-		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
-		if (i++ > 100) {
-			printk(KERN_ERR PFX
-				"Hardware error - pending bit very stuck - "
-				"no further pstate changes possible\n");
-			return 1;
-		}
-	} while (query_current_values_with_pending_wait(data));
-
-	count_off_irt(data);
-
-	if (savevid != data->currvid) {
-		printk(KERN_ERR PFX
-			"vid change on fid trans, old 0x%x, new 0x%x\n",
-			savevid, data->currvid);
-		return 1;
-	}
-
-	if (fid != data->currfid) {
-		printk(KERN_ERR PFX
-			"fid trans failed, fid 0x%x, curr 0x%x\n", fid,
-			data->currfid);
-		return 1;
-	}
-
-	return 0;
-}
-
-/* Write a new vid to the hardware */
-static int write_new_vid(struct powernow_k8_data *data, u32 vid)
-{
-	u32 lo;
-	u32 savefid = data->currfid;
-	int i = 0;
-
-	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
-		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
-		return 1;
-	}
-
-	lo = data->currfid;
-	lo |= (vid << MSR_C_LO_VID_SHIFT);
-	lo |= MSR_C_LO_INIT_FID_VID;
-
-	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
-		vid, lo, STOP_GRANT_5NS);
-
-	do {
-		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
-		if (i++ > 100) {
-			printk(KERN_ERR PFX "internal error - pending bit "
-					"very stuck - no further pstate "
-					"changes possible\n");
-			return 1;
-		}
-	} while (query_current_values_with_pending_wait(data));
-
-	if (savefid != data->currfid) {
-		printk(KERN_ERR PFX "fid changed on vid trans, old "
-			"0x%x new 0x%x\n",
-		       savefid, data->currfid);
-		return 1;
-	}
-
-	if (vid != data->currvid) {
-		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
-				"curr 0x%x\n",
-				vid, data->currvid);
-		return 1;
-	}
-
-	return 0;
-}
-
-/*
- * Reduce the vid by the max of step or reqvid.
- * Decreasing vid codes represent increasing voltages:
- * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
- */
-static int decrease_vid_code_by_step(struct powernow_k8_data *data,
-		u32 reqvid, u32 step)
-{
-	if ((data->currvid - reqvid) > step)
-		reqvid = data->currvid - step;
-
-	if (write_new_vid(data, reqvid))
-		return 1;
-
-	count_off_vst(data);
-
-	return 0;
-}
-
-/* Change hardware pstate by single MSR write */
-static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
-{
-	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
-	data->currpstate = pstate;
-	return 0;
-}
-
-/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
-static int transition_fid_vid(struct powernow_k8_data *data,
-		u32 reqfid, u32 reqvid)
-{
-	if (core_voltage_pre_transition(data, reqvid, reqfid))
-		return 1;
-
-	if (core_frequency_transition(data, reqfid))
-		return 1;
-
-	if (core_voltage_post_transition(data, reqvid))
-		return 1;
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
-		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
-				"curr 0x%x 0x%x\n",
-				smp_processor_id(),
-				reqfid, reqvid, data->currfid, data->currvid);
-		return 1;
-	}
-
-	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
-		smp_processor_id(), data->currfid, data->currvid);
-
-	return 0;
-}
-
-/* Phase 1 - core voltage transition ... setup voltage */
-static int core_voltage_pre_transition(struct powernow_k8_data *data,
-		u32 reqvid, u32 reqfid)
-{
-	u32 rvosteps = data->rvo;
-	u32 savefid = data->currfid;
-	u32 maxvid, lo, rvomult = 1;
-
-	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
-		"reqvid 0x%x, rvo 0x%x\n",
-		smp_processor_id(),
-		data->currfid, data->currvid, reqvid, data->rvo);
-
-	if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
-		rvomult = 2;
-	rvosteps *= rvomult;
-	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
-	maxvid = 0x1f & (maxvid >> 16);
-	pr_debug("ph1 maxvid=0x%x\n", maxvid);
-	if (reqvid < maxvid) /* lower numbers are higher voltages */
-		reqvid = maxvid;
-
-	while (data->currvid > reqvid) {
-		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
-			data->currvid, reqvid);
-		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
-			return 1;
-	}
-
-	while ((rvosteps > 0) &&
-			((rvomult * data->rvo + data->currvid) > reqvid)) {
-		if (data->currvid == maxvid) {
-			rvosteps = 0;
-		} else {
-			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
-				data->currvid - 1);
-			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
-				return 1;
-			rvosteps--;
-		}
-	}
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if (savefid != data->currfid) {
-		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
-				data->currfid);
-		return 1;
-	}
-
-	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
-		data->currfid, data->currvid);
-
-	return 0;
-}
-
-/* Phase 2 - core frequency transition */
-static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
-{
-	u32 vcoreqfid, vcocurrfid, vcofiddiff;
-	u32 fid_interval, savevid = data->currvid;
-
-	if (data->currfid == reqfid) {
-		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
-				data->currfid);
-		return 0;
-	}
-
-	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
-		"reqfid 0x%x\n",
-		smp_processor_id(),
-		data->currfid, data->currvid, reqfid);
-
-	vcoreqfid = convert_fid_to_vco_fid(reqfid);
-	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
-	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
-	    : vcoreqfid - vcocurrfid;
-
-	if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
-		vcofiddiff = 0;
-
-	while (vcofiddiff > 2) {
-		(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
-
-		if (reqfid > data->currfid) {
-			if (data->currfid > LO_FID_TABLE_TOP) {
-				if (write_new_fid(data,
-						data->currfid + fid_interval))
-					return 1;
-			} else {
-				if (write_new_fid
-				    (data,
-				     2 + convert_fid_to_vco_fid(data->currfid)))
-					return 1;
-			}
-		} else {
-			if (write_new_fid(data, data->currfid - fid_interval))
-				return 1;
-		}
-
-		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
-		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
-		    : vcoreqfid - vcocurrfid;
-	}
-
-	if (write_new_fid(data, reqfid))
-		return 1;
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if (data->currfid != reqfid) {
-		printk(KERN_ERR PFX
-			"ph2: mismatch, failed fid transition, "
-			"curr 0x%x, req 0x%x\n",
-			data->currfid, reqfid);
-		return 1;
-	}
-
-	if (savevid != data->currvid) {
-		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
-			savevid, data->currvid);
-		return 1;
-	}
-
-	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
-		data->currfid, data->currvid);
-
-	return 0;
-}
-
-/* Phase 3 - core voltage transition flow ... jump to the final vid. */
-static int core_voltage_post_transition(struct powernow_k8_data *data,
-		u32 reqvid)
-{
-	u32 savefid = data->currfid;
-	u32 savereqvid = reqvid;
-
-	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
-		smp_processor_id(),
-		data->currfid, data->currvid);
-
-	if (reqvid != data->currvid) {
-		if (write_new_vid(data, reqvid))
-			return 1;
-
-		if (savefid != data->currfid) {
-			printk(KERN_ERR PFX
-			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
-			       savefid, data->currfid);
-			return 1;
-		}
-
-		if (data->currvid != reqvid) {
-			printk(KERN_ERR PFX
-			       "ph3: failed vid transition\n, "
-			       "req 0x%x, curr 0x%x",
-			       reqvid, data->currvid);
-			return 1;
-		}
-	}
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if (savereqvid != data->currvid) {
-		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
-		return 1;
-	}
-
-	if (savefid != data->currfid) {
-		pr_debug("ph3 failed, currfid changed 0x%x\n",
-			data->currfid);
-		return 1;
-	}
-
-	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
-		data->currfid, data->currvid);
-
-	return 0;
-}
-
-static void check_supported_cpu(void *_rc)
-{
-	u32 eax, ebx, ecx, edx;
-	int *rc = _rc;
-
-	*rc = -ENODEV;
-
-	if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
-		return;
-
-	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
-	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
-		return;
-
-	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
-		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
-		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
-			printk(KERN_INFO PFX
-				"Processor cpuid %x not supported\n", eax);
-			return;
-		}
-
-		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
-		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
-			printk(KERN_INFO PFX
-			       "No frequency change capabilities detected\n");
-			return;
-		}
-
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & P_STATE_TRANSITION_CAPABLE)
-			!= P_STATE_TRANSITION_CAPABLE) {
-			printk(KERN_INFO PFX
-				"Power state transitions not supported\n");
-			return;
-		}
-	} else { /* must be a HW Pstate capable processor */
-		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
-		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
-			cpu_family = CPU_HW_PSTATE;
-		else
-			return;
-	}
-
-	*rc = 0;
-}
-
-static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
-		u8 maxvid)
-{
-	unsigned int j;
-	u8 lastfid = 0xff;
-
-	for (j = 0; j < data->numps; j++) {
-		if (pst[j].vid > LEAST_VID) {
-			printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
-			       j, pst[j].vid);
-			return -EINVAL;
-		}
-		if (pst[j].vid < data->rvo) {
-			/* vid + rvo >= 0 */
-			printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
-			       " %d\n", j);
-			return -ENODEV;
-		}
-		if (pst[j].vid < maxvid + data->rvo) {
-			/* vid + rvo >= maxvid */
-			printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
-			       " %d\n", j);
-			return -ENODEV;
-		}
-		if (pst[j].fid > MAX_FID) {
-			printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
-			       " %d\n", j);
-			return -ENODEV;
-		}
-		if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
-			/* Only first fid is allowed to be in "low" range */
-			printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
-			       "0x%x\n", j, pst[j].fid);
-			return -EINVAL;
-		}
-		if (pst[j].fid < lastfid)
-			lastfid = pst[j].fid;
-	}
-	if (lastfid & 1) {
-		printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
-		return -EINVAL;
-	}
-	if (lastfid > LO_FID_TABLE_TOP)
-		printk(KERN_INFO FW_BUG PFX
-			"first fid not from lo freq table\n");
-
-	return 0;
-}
-
-static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
-		unsigned int entry)
-{
-	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
-}
-
-static void print_basics(struct powernow_k8_data *data)
-{
-	int j;
-	for (j = 0; j < data->numps; j++) {
-		if (data->powernow_table[j].frequency !=
-				CPUFREQ_ENTRY_INVALID) {
-			if (cpu_family == CPU_HW_PSTATE) {
-				printk(KERN_INFO PFX
-					"   %d : pstate %d (%d MHz)\n", j,
-					data->powernow_table[j].index,
-					data->powernow_table[j].frequency/1000);
-			} else {
-				printk(KERN_INFO PFX
-					"fid 0x%x (%d MHz), vid 0x%x\n",
-					data->powernow_table[j].index & 0xff,
-					data->powernow_table[j].frequency/1000,
-					data->powernow_table[j].index >> 8);
-			}
-		}
-	}
-	if (data->batps)
-		printk(KERN_INFO PFX "Only %d pstates on battery\n",
-				data->batps);
-}
-
-static u32 freq_from_fid_did(u32 fid, u32 did)
-{
-	u32 mhz = 0;
-
-	if (boot_cpu_data.x86 == 0x10)
-		mhz = (100 * (fid + 0x10)) >> did;
-	else if (boot_cpu_data.x86 == 0x11)
-		mhz = (100 * (fid + 8)) >> did;
-	else
-		BUG();
-
-	return mhz * 1000;
-}
-
-static int fill_powernow_table(struct powernow_k8_data *data,
-		struct pst_s *pst, u8 maxvid)
-{
-	struct cpufreq_frequency_table *powernow_table;
-	unsigned int j;
-
-	if (data->batps) {
-		/* use ACPI support to get full speed on mains power */
-		printk(KERN_WARNING PFX
-			"Only %d pstates usable (use ACPI driver for full "
-			"range\n", data->batps);
-		data->numps = data->batps;
-	}
-
-	for (j = 1; j < data->numps; j++) {
-		if (pst[j-1].fid >= pst[j].fid) {
-			printk(KERN_ERR PFX "PST out of sequence\n");
-			return -EINVAL;
-		}
-	}
-
-	if (data->numps < 2) {
-		printk(KERN_ERR PFX "no p states to transition\n");
-		return -ENODEV;
-	}
-
-	if (check_pst_table(data, pst, maxvid))
-		return -EINVAL;
-
-	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
-		* (data->numps + 1)), GFP_KERNEL);
-	if (!powernow_table) {
-		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
-		return -ENOMEM;
-	}
-
-	for (j = 0; j < data->numps; j++) {
-		int freq;
-		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
-		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
-		freq = find_khz_freq_from_fid(pst[j].fid);
-		powernow_table[j].frequency = freq;
-	}
-	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
-	powernow_table[data->numps].index = 0;
-
-	if (query_current_values_with_pending_wait(data)) {
-		kfree(powernow_table);
-		return -EIO;
-	}
-
-	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
-	data->powernow_table = powernow_table;
-	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
-		print_basics(data);
-
-	for (j = 0; j < data->numps; j++)
-		if ((pst[j].fid == data->currfid) &&
-		    (pst[j].vid == data->currvid))
-			return 0;
-
-	pr_debug("currfid/vid do not match PST, ignoring\n");
-	return 0;
-}
-
-/* Find and validate the PSB/PST table in BIOS. */
-static int find_psb_table(struct powernow_k8_data *data)
-{
-	struct psb_s *psb;
-	unsigned int i;
-	u32 mvs;
-	u8 maxvid;
-	u32 cpst = 0;
-	u32 thiscpuid;
-
-	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
-		/* Scan BIOS looking for the signature. */
-		/* It can not be at ffff0 - it is too big. */
-
-		psb = phys_to_virt(i);
-		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
-			continue;
-
-		pr_debug("found PSB header at 0x%p\n", psb);
-
-		pr_debug("table vers: 0x%x\n", psb->tableversion);
-		if (psb->tableversion != PSB_VERSION_1_4) {
-			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
-			return -ENODEV;
-		}
-
-		pr_debug("flags: 0x%x\n", psb->flags1);
-		if (psb->flags1) {
-			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
-			return -ENODEV;
-		}
-
-		data->vstable = psb->vstable;
-		pr_debug("voltage stabilization time: %d(*20us)\n",
-				data->vstable);
-
-		pr_debug("flags2: 0x%x\n", psb->flags2);
-		data->rvo = psb->flags2 & 3;
-		data->irt = ((psb->flags2) >> 2) & 3;
-		mvs = ((psb->flags2) >> 4) & 3;
-		data->vidmvs = 1 << mvs;
-		data->batps = ((psb->flags2) >> 6) & 3;
-
-		pr_debug("ramp voltage offset: %d\n", data->rvo);
-		pr_debug("isochronous relief time: %d\n", data->irt);
-		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
-
-		pr_debug("numpst: 0x%x\n", psb->num_tables);
-		cpst = psb->num_tables;
-		if ((psb->cpuid == 0x00000fc0) ||
-		    (psb->cpuid == 0x00000fe0)) {
-			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
-			if ((thiscpuid == 0x00000fc0) ||
-			    (thiscpuid == 0x00000fe0))
-				cpst = 1;
-		}
-		if (cpst != 1) {
-			printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
-			return -ENODEV;
-		}
-
-		data->plllock = psb->plllocktime;
-		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
-		pr_debug("maxfid: 0x%x\n", psb->maxfid);
-		pr_debug("maxvid: 0x%x\n", psb->maxvid);
-		maxvid = psb->maxvid;
-
-		data->numps = psb->numps;
-		pr_debug("numpstates: 0x%x\n", data->numps);
-		return fill_powernow_table(data,
-				(struct pst_s *)(psb+1), maxvid);
-	}
-	/*
-	 * If you see this message, complain to BIOS manufacturer. If
-	 * he tells you "we do not support Linux" or some similar
-	 * nonsense, remember that Windows 2000 uses the same legacy
-	 * mechanism that the old Linux PSB driver uses. Tell them it
-	 * is broken with Windows 2000.
-	 *
-	 * The reference to the AMD documentation is chapter 9 in the
-	 * BIOS and Kernel Developer's Guide, which is available on
-	 * www.amd.com
-	 */
-	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
-	printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
-		" and Cool'N'Quiet support is enabled in BIOS setup\n");
-	return -ENODEV;
-}
-
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
-		unsigned int index)
-{
-	u64 control;
-
-	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
-		return;
-
-	control = data->acpi_data.states[index].control;
-	data->irt = (control >> IRT_SHIFT) & IRT_MASK;
-	data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
-	data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
-	data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
-	data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
-	data->vstable = (control >> VST_SHIFT) & VST_MASK;
-}
-
-static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
-{
-	struct cpufreq_frequency_table *powernow_table;
-	int ret_val = -ENODEV;
-	u64 control, status;
-
-	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
-		pr_debug("register performance failed: bad ACPI data\n");
-		return -EIO;
-	}
-
-	/* verify the data contained in the ACPI structures */
-	if (data->acpi_data.state_count <= 1) {
-		pr_debug("No ACPI P-States\n");
-		goto err_out;
-	}
-
-	control = data->acpi_data.control_register.space_id;
-	status = data->acpi_data.status_register.space_id;
-
-	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
-		pr_debug("Invalid control/status registers (%llx - %llx)\n",
-			control, status);
-		goto err_out;
-	}
-
-	/* fill in data->powernow_table */
-	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
-		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
-	if (!powernow_table) {
-		pr_debug("powernow_table memory alloc failure\n");
-		goto err_out;
-	}
-
-	/* fill in data */
-	data->numps = data->acpi_data.state_count;
-	powernow_k8_acpi_pst_values(data, 0);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		ret_val = fill_powernow_table_pstate(data, powernow_table);
-	else
-		ret_val = fill_powernow_table_fidvid(data, powernow_table);
-	if (ret_val)
-		goto err_out_mem;
-
-	powernow_table[data->acpi_data.state_count].frequency =
-		CPUFREQ_TABLE_END;
-	powernow_table[data->acpi_data.state_count].index = 0;
-	data->powernow_table = powernow_table;
-
-	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
-		print_basics(data);
-
-	/* notify BIOS that we exist */
-	acpi_processor_notify_smm(THIS_MODULE);
-
-	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
-		printk(KERN_ERR PFX
-				"unable to alloc powernow_k8_data cpumask\n");
-		ret_val = -ENOMEM;
-		goto err_out_mem;
-	}
-
-	return 0;
-
-err_out_mem:
-	kfree(powernow_table);
-
-err_out:
-	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-
-	/* data->acpi_data.state_count informs us at ->exit()
-	 * whether ACPI was used */
-	data->acpi_data.state_count = 0;
-
-	return ret_val;
-}
-
-static int fill_powernow_table_pstate(struct powernow_k8_data *data,
-		struct cpufreq_frequency_table *powernow_table)
-{
-	int i;
-	u32 hi = 0, lo = 0;
-	rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
-	data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
-
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		u32 index;
-
-		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
-		if (index > data->max_hw_pstate) {
-			printk(KERN_ERR PFX "invalid pstate %d - "
-					"bad value %d.\n", i, index);
-			printk(KERN_ERR PFX "Please report to BIOS "
-					"manufacturer\n");
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
-		if (!(hi & HW_PSTATE_VALID_MASK)) {
-			pr_debug("invalid pstate %d, ignoring\n", index);
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-
-		powernow_table[i].index = index;
-
-		/* Frequency may be rounded for these */
-		if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
-				 || boot_cpu_data.x86 == 0x11) {
-			powernow_table[i].frequency =
-				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
-		} else
-			powernow_table[i].frequency =
-				data->acpi_data.states[i].core_frequency * 1000;
-	}
-	return 0;
-}
-
-static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
-		struct cpufreq_frequency_table *powernow_table)
-{
-	int i;
-
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		u32 fid;
-		u32 vid;
-		u32 freq, index;
-		u64 status, control;
-
-		if (data->exttype) {
-			status =  data->acpi_data.states[i].status;
-			fid = status & EXT_FID_MASK;
-			vid = (status >> VID_SHIFT) & EXT_VID_MASK;
-		} else {
-			control =  data->acpi_data.states[i].control;
-			fid = control & FID_MASK;
-			vid = (control >> VID_SHIFT) & VID_MASK;
-		}
-
-		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
-
-		index = fid | (vid<<8);
-		powernow_table[i].index = index;
-
-		freq = find_khz_freq_from_fid(fid);
-		powernow_table[i].frequency = freq;
-
-		/* verify frequency is OK */
-		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
-			pr_debug("invalid freq %u kHz, ignoring\n", freq);
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-
-		/* verify voltage is OK -
-		 * BIOSs are using "off" to indicate invalid */
-		if (vid == VID_OFF) {
-			pr_debug("invalid vid %u, ignoring\n", vid);
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-
-		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
-			printk(KERN_INFO PFX "invalid freq entries "
-				"%u kHz vs. %u kHz\n", freq,
-				(unsigned int)
-				(data->acpi_data.states[i].core_frequency
-				 * 1000));
-			invalidate_entry(powernow_table, i);
-			continue;
-		}
-	}
-	return 0;
-}
-
-static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
-{
-	if (data->acpi_data.state_count)
-		acpi_processor_unregister_performance(&data->acpi_data,
-				data->cpu);
-	free_cpumask_var(data->acpi_data.shared_cpu_map);
-}
-
-static int get_transition_latency(struct powernow_k8_data *data)
-{
-	int max_latency = 0;
-	int i;
-	for (i = 0; i < data->acpi_data.state_count; i++) {
-		int cur_latency = data->acpi_data.states[i].transition_latency
-			+ data->acpi_data.states[i].bus_master_latency;
-		if (cur_latency > max_latency)
-			max_latency = cur_latency;
-	}
-	if (max_latency == 0) {
-		/*
-		 * Fam 11h and later may return 0 as transition latency. This
-		 * is intended and means "very fast". While cpufreq core and
-		 * governors currently can handle that gracefully, better set it
-		 * to 1 to avoid problems in the future.
-		 */
-		if (boot_cpu_data.x86 < 0x11)
-			printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
-				"latency\n");
-		max_latency = 1;
-	}
-	/* value in usecs, needs to be in nanoseconds */
-	return 1000 * max_latency;
-}
-
-/* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency_fidvid(struct powernow_k8_data *data,
-		unsigned int index)
-{
-	u32 fid = 0;
-	u32 vid = 0;
-	int res, i;
-	struct cpufreq_freqs freqs;
-
-	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
-
-	/* fid/vid correctness check for k8 */
-	/* fid are the lower 8 bits of the index we stored into
-	 * the cpufreq frequency table in find_psb_table, vid
-	 * are the upper 8 bits.
-	 */
-	fid = data->powernow_table[index].index & 0xFF;
-	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
-
-	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
-
-	if ((data->currvid == vid) && (data->currfid == fid)) {
-		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
-			fid, vid);
-		return 0;
-	}
-
-	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
-		smp_processor_id(), fid, vid);
-	freqs.old = find_khz_freq_from_fid(data->currfid);
-	freqs.new = find_khz_freq_from_fid(fid);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	res = transition_fid_vid(data, fid, vid);
-	freqs.new = find_khz_freq_from_fid(data->currfid);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-	return res;
-}
-
-/* Take a frequency, and issue the hardware pstate transition command */
-static int transition_frequency_pstate(struct powernow_k8_data *data,
-		unsigned int index)
-{
-	u32 pstate = 0;
-	int res, i;
-	struct cpufreq_freqs freqs;
-
-	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
-
-	/* get MSR index for hardware pstate transition */
-	pstate = index & HW_PSTATE_MASK;
-	if (pstate > data->max_hw_pstate)
-		return 0;
-	freqs.old = find_khz_freq_from_pstate(data->powernow_table,
-			data->currpstate);
-	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	res = transition_pstate(data, pstate);
-	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
-
-	for_each_cpu(i, data->available_cores) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-	return res;
-}
-
-/* Driver entry point to switch to the target frequency */
-static int powernowk8_target(struct cpufreq_policy *pol,
-		unsigned targfreq, unsigned relation)
-{
-	cpumask_var_t oldmask;
-	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
-	u32 checkfid;
-	u32 checkvid;
-	unsigned int newstate;
-	int ret = -EIO;
-
-	if (!data)
-		return -EINVAL;
-
-	checkfid = data->currfid;
-	checkvid = data->currvid;
-
-	/* only run on specific CPU from here on. */
-	/* This is poor form: use a workqueue or smp_call_function_single */
-	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_copy(oldmask, tsk_cpus_allowed(current));
-	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
-
-	if (smp_processor_id() != pol->cpu) {
-		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
-		goto err_out;
-	}
-
-	if (pending_bit_stuck()) {
-		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
-		goto err_out;
-	}
-
-	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
-		pol->cpu, targfreq, pol->min, pol->max, relation);
-
-	if (query_current_values_with_pending_wait(data))
-		goto err_out;
-
-	if (cpu_family != CPU_HW_PSTATE) {
-		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
-		data->currfid, data->currvid);
-
-		if ((checkvid != data->currvid) ||
-		    (checkfid != data->currfid)) {
-			printk(KERN_INFO PFX
-				"error - out of sync, fix 0x%x 0x%x, "
-				"vid 0x%x 0x%x\n",
-				checkfid, data->currfid,
-				checkvid, data->currvid);
-		}
-	}
-
-	if (cpufreq_frequency_table_target(pol, data->powernow_table,
-				targfreq, relation, &newstate))
-		goto err_out;
-
-	mutex_lock(&fidvid_mutex);
-
-	powernow_k8_acpi_pst_values(data, newstate);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		ret = transition_frequency_pstate(data, newstate);
-	else
-		ret = transition_frequency_fidvid(data, newstate);
-	if (ret) {
-		printk(KERN_ERR PFX "transition frequency failed\n");
-		ret = 1;
-		mutex_unlock(&fidvid_mutex);
-		goto err_out;
-	}
-	mutex_unlock(&fidvid_mutex);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
-				newstate);
-	else
-		pol->cur = find_khz_freq_from_fid(data->currfid);
-	ret = 0;
-
-err_out:
-	set_cpus_allowed_ptr(current, oldmask);
-	free_cpumask_var(oldmask);
-	return ret;
-}
-
-/* Driver entry point to verify the policy and range of frequencies */
-static int powernowk8_verify(struct cpufreq_policy *pol)
-{
-	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
-
-	if (!data)
-		return -EINVAL;
-
-	return cpufreq_frequency_table_verify(pol, data->powernow_table);
-}
-
-struct init_on_cpu {
-	struct powernow_k8_data *data;
-	int rc;
-};
-
-static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
-{
-	struct init_on_cpu *init_on_cpu = _init_on_cpu;
-
-	if (pending_bit_stuck()) {
-		printk(KERN_ERR PFX "failing init, change pending bit set\n");
-		init_on_cpu->rc = -ENODEV;
-		return;
-	}
-
-	if (query_current_values_with_pending_wait(init_on_cpu->data)) {
-		init_on_cpu->rc = -ENODEV;
-		return;
-	}
-
-	if (cpu_family == CPU_OPTERON)
-		fidvid_msr_init();
-
-	init_on_cpu->rc = 0;
-}
-
-/* per CPU init entry point to the driver */
-static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
-{
-	static const char ACPI_PSS_BIOS_BUG_MSG[] =
-		KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
-		FW_BUG PFX "Try again with latest BIOS.\n";
-	struct powernow_k8_data *data;
-	struct init_on_cpu init_on_cpu;
-	int rc;
-	struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
-
-	if (!cpu_online(pol->cpu))
-		return -ENODEV;
-
-	smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
-	if (rc)
-		return -ENODEV;
-
-	data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
-	if (!data) {
-		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
-		return -ENOMEM;
-	}
-
-	data->cpu = pol->cpu;
-	data->currpstate = HW_PSTATE_INVALID;
-
-	if (powernow_k8_cpu_init_acpi(data)) {
-		/*
-		 * Use the PSB BIOS structure. This is only available on
-		 * an UP version, and is deprecated by AMD.
-		 */
-		if (num_online_cpus() != 1) {
-			printk_once(ACPI_PSS_BIOS_BUG_MSG);
-			goto err_out;
-		}
-		if (pol->cpu != 0) {
-			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
-			       "CPU other than CPU0. Complain to your BIOS "
-			       "vendor.\n");
-			goto err_out;
-		}
-		rc = find_psb_table(data);
-		if (rc)
-			goto err_out;
-
-		/* Take a crude guess here.
-		 * That guess was in microseconds, so multiply with 1000 */
-		pol->cpuinfo.transition_latency = (
-			 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
-			 ((1 << data->irt) * 30)) * 1000;
-	} else /* ACPI _PSS objects available */
-		pol->cpuinfo.transition_latency = get_transition_latency(data);
-
-	/* only run on specific CPU from here on */
-	init_on_cpu.data = data;
-	smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
-				 &init_on_cpu, 1);
-	rc = init_on_cpu.rc;
-	if (rc != 0)
-		goto err_out_exit_acpi;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
-	else
-		cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
-	data->available_cores = pol->cpus;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
-				data->currpstate);
-	else
-		pol->cur = find_khz_freq_from_fid(data->currfid);
-	pr_debug("policy current frequency %d kHz\n", pol->cur);
-
-	/* min/max the cpu is capable of */
-	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
-		printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
-		powernow_k8_cpu_exit_acpi(data);
-		kfree(data->powernow_table);
-		kfree(data);
-		return -EINVAL;
-	}
-
-	/* Check for APERF/MPERF support in hardware */
-	if (cpu_has(c, X86_FEATURE_APERFMPERF))
-		cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
-
-	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
-
-	if (cpu_family == CPU_HW_PSTATE)
-		pr_debug("cpu_init done, current pstate 0x%x\n",
-				data->currpstate);
-	else
-		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
-			data->currfid, data->currvid);
-
-	per_cpu(powernow_data, pol->cpu) = data;
-
-	return 0;
-
-err_out_exit_acpi:
-	powernow_k8_cpu_exit_acpi(data);
-
-err_out:
-	kfree(data);
-	return -ENODEV;
-}
-
-static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
-{
-	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
-
-	if (!data)
-		return -EINVAL;
-
-	powernow_k8_cpu_exit_acpi(data);
-
-	cpufreq_frequency_table_put_attr(pol->cpu);
-
-	kfree(data->powernow_table);
-	kfree(data);
-	per_cpu(powernow_data, pol->cpu) = NULL;
-
-	return 0;
-}
-
-static void query_values_on_cpu(void *_err)
-{
-	int *err = _err;
-	struct powernow_k8_data *data = __this_cpu_read(powernow_data);
-
-	*err = query_current_values_with_pending_wait(data);
-}
-
-static unsigned int powernowk8_get(unsigned int cpu)
-{
-	struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
-	unsigned int khz = 0;
-	int err;
-
-	if (!data)
-		return 0;
-
-	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
-	if (err)
-		goto out;
-
-	if (cpu_family == CPU_HW_PSTATE)
-		khz = find_khz_freq_from_pstate(data->powernow_table,
-						data->currpstate);
-	else
-		khz = find_khz_freq_from_fid(data->currfid);
-
-
-out:
-	return khz;
-}
-
-static void _cpb_toggle_msrs(bool t)
-{
-	int cpu;
-
-	get_online_cpus();
-
-	rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
-	for_each_cpu(cpu, cpu_online_mask) {
-		struct msr *reg = per_cpu_ptr(msrs, cpu);
-		if (t)
-			reg->l &= ~BIT(25);
-		else
-			reg->l |= BIT(25);
-	}
-	wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
-	put_online_cpus();
-}
-
-/*
- * Switch on/off core performance boosting.
- *
- * 0=disable
- * 1=enable.
- */
-static void cpb_toggle(bool t)
-{
-	if (!cpb_capable)
-		return;
-
-	if (t && !cpb_enabled) {
-		cpb_enabled = true;
-		_cpb_toggle_msrs(t);
-		printk(KERN_INFO PFX "Core Boosting enabled.\n");
-	} else if (!t && cpb_enabled) {
-		cpb_enabled = false;
-		_cpb_toggle_msrs(t);
-		printk(KERN_INFO PFX "Core Boosting disabled.\n");
-	}
-}
-
-static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
-				 size_t count)
-{
-	int ret = -EINVAL;
-	unsigned long val = 0;
-
-	ret = strict_strtoul(buf, 10, &val);
-	if (!ret && (val == 0 || val == 1) && cpb_capable)
-		cpb_toggle(val);
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
-{
-	return sprintf(buf, "%u\n", cpb_enabled);
-}
-
-#define define_one_rw(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_rw(cpb);
-
-static struct freq_attr *powernow_k8_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	&cpb,
-	NULL,
-};
-
-static struct cpufreq_driver cpufreq_amd64_driver = {
-	.verify		= powernowk8_verify,
-	.target		= powernowk8_target,
-	.bios_limit	= acpi_processor_get_bios_limit,
-	.init		= powernowk8_cpu_init,
-	.exit		= __devexit_p(powernowk8_cpu_exit),
-	.get		= powernowk8_get,
-	.name		= "powernow-k8",
-	.owner		= THIS_MODULE,
-	.attr		= powernow_k8_attr,
-};
-
-/*
- * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
- * cannot block the remaining ones from boosting. On the CPU_UP path we
- * simply keep the boost-disable flag in sync with the current global
- * state.
- */
-static int cpb_notify(struct notifier_block *nb, unsigned long action,
-		      void *hcpu)
-{
-	unsigned cpu = (long)hcpu;
-	u32 lo, hi;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-
-		if (!cpb_enabled) {
-			rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
-			lo |= BIT(25);
-			wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
-		}
-		break;
-
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
-		lo &= ~BIT(25);
-		wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
-		break;
-
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block cpb_nb = {
-	.notifier_call		= cpb_notify,
-};
-
-/* driver entry point for init */
-static int __cpuinit powernowk8_init(void)
-{
-	unsigned int i, supported_cpus = 0, cpu;
-	int rv;
-
-	for_each_online_cpu(i) {
-		int rc;
-		smp_call_function_single(i, check_supported_cpu, &rc, 1);
-		if (rc == 0)
-			supported_cpus++;
-	}
-
-	if (supported_cpus != num_online_cpus())
-		return -ENODEV;
-
-	printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
-		num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
-
-	if (boot_cpu_has(X86_FEATURE_CPB)) {
-
-		cpb_capable = true;
-
-		msrs = msrs_alloc();
-		if (!msrs) {
-			printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
-			return -ENOMEM;
-		}
-
-		register_cpu_notifier(&cpb_nb);
-
-		rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
-
-		for_each_cpu(cpu, cpu_online_mask) {
-			struct msr *reg = per_cpu_ptr(msrs, cpu);
-			cpb_enabled |= !(!!(reg->l & BIT(25)));
-		}
-
-		printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
-			(cpb_enabled ? "on" : "off"));
-	}
-
-	rv = cpufreq_register_driver(&cpufreq_amd64_driver);
-	if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
-		unregister_cpu_notifier(&cpb_nb);
-		msrs_free(msrs);
-		msrs = NULL;
-	}
-	return rv;
-}
-
-/* driver entry point for term */
-static void __exit powernowk8_exit(void)
-{
-	pr_debug("exit\n");
-
-	if (boot_cpu_has(X86_FEATURE_CPB)) {
-		msrs_free(msrs);
-		msrs = NULL;
-
-		unregister_cpu_notifier(&cpb_nb);
-	}
-
-	cpufreq_unregister_driver(&cpufreq_amd64_driver);
-}
-
-MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
-		"Mark Langsdorf <mark.langsdorf@amd.com>");
-MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
-MODULE_LICENSE("GPL");
-
-late_initcall(powernowk8_init);
-module_exit(powernowk8_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
deleted file mode 100644
index 3744d26cdc2b..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- *  (c) 2003-2006 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- */
-
-enum pstate {
-	HW_PSTATE_INVALID = 0xff,
-	HW_PSTATE_0 = 0,
-	HW_PSTATE_1 = 1,
-	HW_PSTATE_2 = 2,
-	HW_PSTATE_3 = 3,
-	HW_PSTATE_4 = 4,
-	HW_PSTATE_5 = 5,
-	HW_PSTATE_6 = 6,
-	HW_PSTATE_7 = 7,
-};
-
-struct powernow_k8_data {
-	unsigned int cpu;
-
-	u32 numps;  /* number of p-states */
-	u32 batps;  /* number of p-states supported on battery */
-	u32 max_hw_pstate; /* maximum legal hardware pstate */
-
-	/* these values are constant when the PSB is used to determine
-	 * vid/fid pairings, but are modified during the ->target() call
-	 * when ACPI is used */
-	u32 rvo;     /* ramp voltage offset */
-	u32 irt;     /* isochronous relief time */
-	u32 vidmvs;  /* usable value calculated from mvs */
-	u32 vstable; /* voltage stabilization time, units 20 us */
-	u32 plllock; /* pll lock time, units 1 us */
-        u32 exttype; /* extended interface = 1 */
-
-	/* keep track of the current fid / vid or pstate */
-	u32 currvid;
-	u32 currfid;
-	enum pstate currpstate;
-
-	/* the powernow_table includes all frequency and vid/fid pairings:
-	 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
-	 * frequency is in kHz */
-	struct cpufreq_frequency_table  *powernow_table;
-
-	/* the acpi table needs to be kept. it's only available if ACPI was
-	 * used to determine valid frequency/vid/fid states */
-	struct acpi_processor_performance acpi_data;
-
-	/* we need to keep track of associated cores, but let cpufreq
-	 * handle hotplug events - so just point at cpufreq pol->cpus
-	 * structure */
-	struct cpumask *available_cores;
-};
-
-/* processor's cpuid instruction support */
-#define CPUID_PROCESSOR_SIGNATURE	1	/* function 1 */
-#define CPUID_XFAM			0x0ff00000	/* extended family */
-#define CPUID_XFAM_K8			0
-#define CPUID_XMOD			0x000f0000	/* extended model */
-#define CPUID_XMOD_REV_MASK		0x000c0000
-#define CPUID_XFAM_10H			0x00100000	/* family 0x10 */
-#define CPUID_USE_XFAM_XMOD		0x00000f00
-#define CPUID_GET_MAX_CAPABILITIES	0x80000000
-#define CPUID_FREQ_VOLT_CAPABILITIES	0x80000007
-#define P_STATE_TRANSITION_CAPABLE	6
-
-/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For     */
-/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and   */
-/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
-/* the register number is placed in ecx, and the data is returned in edx:eax. */
-
-#define MSR_FIDVID_CTL      0xc0010041
-#define MSR_FIDVID_STATUS   0xc0010042
-
-/* Field definitions within the FID VID Low Control MSR : */
-#define MSR_C_LO_INIT_FID_VID     0x00010000
-#define MSR_C_LO_NEW_VID          0x00003f00
-#define MSR_C_LO_NEW_FID          0x0000003f
-#define MSR_C_LO_VID_SHIFT        8
-
-/* Field definitions within the FID VID High Control MSR : */
-#define MSR_C_HI_STP_GNT_TO	  0x000fffff
-
-/* Field definitions within the FID VID Low Status MSR : */
-#define MSR_S_LO_CHANGE_PENDING   0x80000000   /* cleared when completed */
-#define MSR_S_LO_MAX_RAMP_VID     0x3f000000
-#define MSR_S_LO_MAX_FID          0x003f0000
-#define MSR_S_LO_START_FID        0x00003f00
-#define MSR_S_LO_CURRENT_FID      0x0000003f
-
-/* Field definitions within the FID VID High Status MSR : */
-#define MSR_S_HI_MIN_WORKING_VID  0x3f000000
-#define MSR_S_HI_MAX_WORKING_VID  0x003f0000
-#define MSR_S_HI_START_VID        0x00003f00
-#define MSR_S_HI_CURRENT_VID      0x0000003f
-#define MSR_C_HI_STP_GNT_BENIGN	  0x00000001
-
-
-/* Hardware Pstate _PSS and MSR definitions */
-#define USE_HW_PSTATE		0x00000080
-#define HW_PSTATE_MASK 		0x00000007
-#define HW_PSTATE_VALID_MASK 	0x80000000
-#define HW_PSTATE_MAX_MASK	0x000000f0
-#define HW_PSTATE_MAX_SHIFT	4
-#define MSR_PSTATE_DEF_BASE 	0xc0010064 /* base of Pstate MSRs */
-#define MSR_PSTATE_STATUS 	0xc0010063 /* Pstate Status MSR */
-#define MSR_PSTATE_CTRL 	0xc0010062 /* Pstate control MSR */
-#define MSR_PSTATE_CUR_LIMIT	0xc0010061 /* pstate current limit MSR */
-
-/* define the two driver architectures */
-#define CPU_OPTERON 0
-#define CPU_HW_PSTATE 1
-
-
-/*
- * There are restrictions frequencies have to follow:
- * - only 1 entry in the low fid table ( <=1.4GHz )
- * - lowest entry in the high fid table must be >= 2 * the entry in the
- *   low fid table
- * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
- *   in the low fid table
- * - the parts can only step at <= 200 MHz intervals, odd fid values are
- *   supported in revision G and later revisions.
- * - lowest frequency must be >= interprocessor hypertransport link speed
- *   (only applies to MP systems obviously)
- */
-
-/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
-#define LO_FID_TABLE_TOP     7	/* fid values marking the boundary    */
-#define HI_FID_TABLE_BOTTOM  8	/* between the low and high tables    */
-
-#define LO_VCOFREQ_TABLE_TOP    1400	/* corresponding vco frequency values */
-#define HI_VCOFREQ_TABLE_BOTTOM 1600
-
-#define MIN_FREQ_RESOLUTION  200 /* fids jump by 2 matching freq jumps by 200 */
-
-#define MAX_FID 0x2a	/* Spec only gives FID values as far as 5 GHz */
-#define LEAST_VID 0x3e	/* Lowest (numerically highest) useful vid value */
-
-#define MIN_FREQ 800	/* Min and max freqs, per spec */
-#define MAX_FREQ 5000
-
-#define INVALID_FID_MASK 0xffffffc0  /* not a valid fid if these bits are set */
-#define INVALID_VID_MASK 0xffffffc0  /* not a valid vid if these bits are set */
-
-#define VID_OFF 0x3f
-
-#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
-
-#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
-
-#define MAXIMUM_VID_STEPS 1  /* Current cpus only allow a single step of 25mV */
-#define VST_UNITS_20US 20   /* Voltage Stabilization Time is in units of 20us */
-
-/*
- * Most values of interest are encoded in a single field of the _PSS
- * entries: the "control" value.
- */
-
-#define IRT_SHIFT      30
-#define RVO_SHIFT      28
-#define EXT_TYPE_SHIFT 27
-#define PLL_L_SHIFT    20
-#define MVS_SHIFT      18
-#define VST_SHIFT      11
-#define VID_SHIFT       6
-#define IRT_MASK        3
-#define RVO_MASK        3
-#define EXT_TYPE_MASK   1
-#define PLL_L_MASK   0x7f
-#define MVS_MASK        3
-#define VST_MASK     0x7f
-#define VID_MASK     0x1f
-#define FID_MASK     0x1f
-#define EXT_VID_MASK 0x3f
-#define EXT_FID_MASK 0x3f
-
-
-/*
- * Version 1.4 of the PSB table. This table is constructed by BIOS and is
- * to tell the OS's power management driver which VIDs and FIDs are
- * supported by this particular processor.
- * If the data in the PSB / PST is wrong, then this driver will program the
- * wrong values into hardware, which is very likely to lead to a crash.
- */
-
-#define PSB_ID_STRING      "AMDK7PNOW!"
-#define PSB_ID_STRING_LEN  10
-
-#define PSB_VERSION_1_4  0x14
-
-struct psb_s {
-	u8 signature[10];
-	u8 tableversion;
-	u8 flags1;
-	u16 vstable;
-	u8 flags2;
-	u8 num_tables;
-	u32 cpuid;
-	u8 plllocktime;
-	u8 maxfid;
-	u8 maxvid;
-	u8 numps;
-};
-
-/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
-struct pst_s {
-	u8 fid;
-	u8 vid;
-};
-
-static int core_voltage_pre_transition(struct powernow_k8_data *data,
-	u32 reqvid, u32 regfid);
-static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
-static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
-
-static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
-
-static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
-static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
deleted file mode 100644
index 1e205e6b1727..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- *	sc520_freq.c: cpufreq driver for the AMD Elan sc520
- *
- *	Copyright (C) 2005 Sean Young <sean@mess.org>
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *	Based on elanfreq.c
- *
- *	2005-03-30: - initial revision
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/delay.h>
-#include <linux/cpufreq.h>
-#include <linux/timex.h>
-#include <linux/io.h>
-
-#include <asm/msr.h>
-
-#define MMCR_BASE	0xfffef000	/* The default base address */
-#define OFFS_CPUCTL	0x2   /* CPU Control Register */
-
-static __u8 __iomem *cpuctl;
-
-#define PFX "sc520_freq: "
-
-static struct cpufreq_frequency_table sc520_freq_table[] = {
-	{0x01,	100000},
-	{0x02,	133000},
-	{0,	CPUFREQ_TABLE_END},
-};
-
-static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
-{
-	u8 clockspeed_reg = *cpuctl;
-
-	switch (clockspeed_reg & 0x03) {
-	default:
-		printk(KERN_ERR PFX "error: cpuctl register has unexpected "
-				"value %02x\n", clockspeed_reg);
-	case 0x01:
-		return 100000;
-	case 0x02:
-		return 133000;
-	}
-}
-
-static void sc520_freq_set_cpu_state(unsigned int state)
-{
-
-	struct cpufreq_freqs	freqs;
-	u8 clockspeed_reg;
-
-	freqs.old = sc520_freq_get_cpu_frequency(0);
-	freqs.new = sc520_freq_table[state].frequency;
-	freqs.cpu = 0; /* AMD Elan is UP */
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-
-	pr_debug("attempting to set frequency to %i kHz\n",
-			sc520_freq_table[state].frequency);
-
-	local_irq_disable();
-
-	clockspeed_reg = *cpuctl & ~0x03;
-	*cpuctl = clockspeed_reg | sc520_freq_table[state].index;
-
-	local_irq_enable();
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-};
-
-static int sc520_freq_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
-}
-
-static int sc520_freq_target(struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int newstate = 0;
-
-	if (cpufreq_frequency_table_target(policy, sc520_freq_table,
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	sc520_freq_set_cpu_state(newstate);
-
-	return 0;
-}
-
-
-/*
- *	Module init and exit code
- */
-
-static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int result;
-
-	/* capability check */
-	if (c->x86_vendor != X86_VENDOR_AMD ||
-	    c->x86 != 4 || c->x86_model != 9)
-		return -ENODEV;
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = 1000000; /* 1ms */
-	policy->cur = sc520_freq_get_cpu_frequency(0);
-
-	result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
-
-	return 0;
-}
-
-
-static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-
-static struct freq_attr *sc520_freq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver sc520_freq_driver = {
-	.get	= sc520_freq_get_cpu_frequency,
-	.verify	= sc520_freq_verify,
-	.target	= sc520_freq_target,
-	.init	= sc520_freq_cpu_init,
-	.exit	= sc520_freq_cpu_exit,
-	.name	= "sc520_freq",
-	.owner	= THIS_MODULE,
-	.attr	= sc520_freq_attr,
-};
-
-
-static int __init sc520_freq_init(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	int err;
-
-	/* Test if we have the right hardware */
-	if (c->x86_vendor != X86_VENDOR_AMD ||
-	    c->x86 != 4 || c->x86_model != 9) {
-		pr_debug("no Elan SC520 processor found!\n");
-		return -ENODEV;
-	}
-	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
-	if (!cpuctl) {
-		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
-		return -ENOMEM;
-	}
-
-	err = cpufreq_register_driver(&sc520_freq_driver);
-	if (err)
-		iounmap(cpuctl);
-
-	return err;
-}
-
-
-static void __exit sc520_freq_exit(void)
-{
-	cpufreq_unregister_driver(&sc520_freq_driver);
-	iounmap(cpuctl);
-}
-
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Sean Young <sean@mess.org>");
-MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
-
-module_init(sc520_freq_init);
-module_exit(sc520_freq_exit);
-
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
deleted file mode 100644
index 6ea3455def21..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/*
- * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
- * M (part of the Centrino chipset).
- *
- * Since the original Pentium M, most new Intel CPUs support Enhanced
- * SpeedStep.
- *
- * Despite the "SpeedStep" in the name, this is almost entirely unlike
- * traditional SpeedStep.
- *
- * Modelled on speedstep.c
- *
- * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/sched.h>	/* current */
-#include <linux/delay.h>
-#include <linux/compiler.h>
-#include <linux/gfp.h>
-
-#include <asm/msr.h>
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-
-#define PFX		"speedstep-centrino: "
-#define MAINTAINER	"cpufreq@vger.kernel.org"
-
-#define INTEL_MSR_RANGE	(0xffff)
-
-struct cpu_id
-{
-	__u8	x86;            /* CPU family */
-	__u8	x86_model;	/* model */
-	__u8	x86_mask;	/* stepping */
-};
-
-enum {
-	CPU_BANIAS,
-	CPU_DOTHAN_A1,
-	CPU_DOTHAN_A2,
-	CPU_DOTHAN_B0,
-	CPU_MP4HT_D0,
-	CPU_MP4HT_E0,
-};
-
-static const struct cpu_id cpu_ids[] = {
-	[CPU_BANIAS]	= { 6,  9, 5 },
-	[CPU_DOTHAN_A1]	= { 6, 13, 1 },
-	[CPU_DOTHAN_A2]	= { 6, 13, 2 },
-	[CPU_DOTHAN_B0]	= { 6, 13, 6 },
-	[CPU_MP4HT_D0]	= {15,  3, 4 },
-	[CPU_MP4HT_E0]	= {15,  4, 1 },
-};
-#define N_IDS	ARRAY_SIZE(cpu_ids)
-
-struct cpu_model
-{
-	const struct cpu_id *cpu_id;
-	const char	*model_name;
-	unsigned	max_freq; /* max clock in kHz */
-
-	struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
-};
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
-				  const struct cpu_id *x);
-
-/* Operating points for current CPU */
-static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
-static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
-
-static struct cpufreq_driver centrino_driver;
-
-#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
-
-/* Computes the correct form for IA32_PERF_CTL MSR for a particular
-   frequency/voltage operating point; frequency in MHz, volts in mV.
-   This is stored as "index" in the structure. */
-#define OP(mhz, mv)							\
-	{								\
-		.frequency = (mhz) * 1000,				\
-		.index = (((mhz)/100) << 8) | ((mv - 700) / 16)		\
-	}
-
-/*
- * These voltage tables were derived from the Intel Pentium M
- * datasheet, document 25261202.pdf, Table 5.  I have verified they
- * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
- * M.
- */
-
-/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
-static struct cpufreq_frequency_table banias_900[] =
-{
-	OP(600,  844),
-	OP(800,  988),
-	OP(900, 1004),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
-static struct cpufreq_frequency_table banias_1000[] =
-{
-	OP(600,   844),
-	OP(800,   972),
-	OP(900,   988),
-	OP(1000, 1004),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
-static struct cpufreq_frequency_table banias_1100[] =
-{
-	OP( 600,  956),
-	OP( 800, 1020),
-	OP( 900, 1100),
-	OP(1000, 1164),
-	OP(1100, 1180),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-
-/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
-static struct cpufreq_frequency_table banias_1200[] =
-{
-	OP( 600,  956),
-	OP( 800, 1004),
-	OP( 900, 1020),
-	OP(1000, 1100),
-	OP(1100, 1164),
-	OP(1200, 1180),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.30GHz (Banias) */
-static struct cpufreq_frequency_table banias_1300[] =
-{
-	OP( 600,  956),
-	OP( 800, 1260),
-	OP(1000, 1292),
-	OP(1200, 1356),
-	OP(1300, 1388),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.40GHz (Banias) */
-static struct cpufreq_frequency_table banias_1400[] =
-{
-	OP( 600,  956),
-	OP( 800, 1180),
-	OP(1000, 1308),
-	OP(1200, 1436),
-	OP(1400, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.50GHz (Banias) */
-static struct cpufreq_frequency_table banias_1500[] =
-{
-	OP( 600,  956),
-	OP( 800, 1116),
-	OP(1000, 1228),
-	OP(1200, 1356),
-	OP(1400, 1452),
-	OP(1500, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.60GHz (Banias) */
-static struct cpufreq_frequency_table banias_1600[] =
-{
-	OP( 600,  956),
-	OP( 800, 1036),
-	OP(1000, 1164),
-	OP(1200, 1276),
-	OP(1400, 1420),
-	OP(1600, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-
-/* Intel Pentium M processor 1.70GHz (Banias) */
-static struct cpufreq_frequency_table banias_1700[] =
-{
-	OP( 600,  956),
-	OP( 800, 1004),
-	OP(1000, 1116),
-	OP(1200, 1228),
-	OP(1400, 1308),
-	OP(1700, 1484),
-	{ .frequency = CPUFREQ_TABLE_END }
-};
-#undef OP
-
-#define _BANIAS(cpuid, max, name)	\
-{	.cpu_id		= cpuid,	\
-	.model_name	= "Intel(R) Pentium(R) M processor " name "MHz", \
-	.max_freq	= (max)*1000,	\
-	.op_points	= banias_##max,	\
-}
-#define BANIAS(max)	_BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
-
-/* CPU models, their operating frequency range, and freq/voltage
-   operating points */
-static struct cpu_model models[] =
-{
-	_BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
-	BANIAS(1000),
-	BANIAS(1100),
-	BANIAS(1200),
-	BANIAS(1300),
-	BANIAS(1400),
-	BANIAS(1500),
-	BANIAS(1600),
-	BANIAS(1700),
-
-	/* NULL model_name is a wildcard */
-	{ &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
-	{ &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
-	{ &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
-	{ &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
-	{ &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
-
-	{ NULL, }
-};
-#undef _BANIAS
-#undef BANIAS
-
-static int centrino_cpu_init_table(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
-	struct cpu_model *model;
-
-	for(model = models; model->cpu_id != NULL; model++)
-		if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
-		    (model->model_name == NULL ||
-		     strcmp(cpu->x86_model_id, model->model_name) == 0))
-			break;
-
-	if (model->cpu_id == NULL) {
-		/* No match at all */
-		pr_debug("no support for CPU model \"%s\": "
-		       "send /proc/cpuinfo to " MAINTAINER "\n",
-		       cpu->x86_model_id);
-		return -ENOENT;
-	}
-
-	if (model->op_points == NULL) {
-		/* Matched a non-match */
-		pr_debug("no table support for CPU model \"%s\"\n",
-		       cpu->x86_model_id);
-		pr_debug("try using the acpi-cpufreq driver\n");
-		return -ENOENT;
-	}
-
-	per_cpu(centrino_model, policy->cpu) = model;
-
-	pr_debug("found \"%s\": max frequency: %dkHz\n",
-	       model->model_name, model->max_freq);
-
-	return 0;
-}
-
-#else
-static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
-{
-	return -ENODEV;
-}
-#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
-
-static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
-				  const struct cpu_id *x)
-{
-	if ((c->x86 == x->x86) &&
-	    (c->x86_model == x->x86_model) &&
-	    (c->x86_mask == x->x86_mask))
-		return 1;
-	return 0;
-}
-
-/* To be called only after centrino_model is initialized */
-static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
-{
-	int i;
-
-	/*
-	 * Extract clock in kHz from PERF_CTL value
-	 * for centrino, as some DSDTs are buggy.
-	 * Ideally, this can be done using the acpi_data structure.
-	 */
-	if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
-	    (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
-	    (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
-		msr = (msr >> 8) & 0xff;
-		return msr * 100000;
-	}
-
-	if ((!per_cpu(centrino_model, cpu)) ||
-	    (!per_cpu(centrino_model, cpu)->op_points))
-		return 0;
-
-	msr &= 0xffff;
-	for (i = 0;
-		per_cpu(centrino_model, cpu)->op_points[i].frequency
-							!= CPUFREQ_TABLE_END;
-	     i++) {
-		if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
-			return per_cpu(centrino_model, cpu)->
-							op_points[i].frequency;
-	}
-	if (failsafe)
-		return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
-	else
-		return 0;
-}
-
-/* Return the current CPU frequency in kHz */
-static unsigned int get_cur_freq(unsigned int cpu)
-{
-	unsigned l, h;
-	unsigned clock_freq;
-
-	rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
-	clock_freq = extract_clock(l, cpu, 0);
-
-	if (unlikely(clock_freq == 0)) {
-		/*
-		 * On some CPUs, we can see transient MSR values (which are
-		 * not present in _PSS), while CPU is doing some automatic
-		 * P-state transition (like TM2). Get the last freq set 
-		 * in PERF_CTL.
-		 */
-		rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
-		clock_freq = extract_clock(l, cpu, 1);
-	}
-	return clock_freq;
-}
-
-
-static int centrino_cpu_init(struct cpufreq_policy *policy)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
-	unsigned freq;
-	unsigned l, h;
-	int ret;
-	int i;
-
-	/* Only Intel makes Enhanced Speedstep-capable CPUs */
-	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
-	    !cpu_has(cpu, X86_FEATURE_EST))
-		return -ENODEV;
-
-	if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
-		centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
-
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	for (i = 0; i < N_IDS; i++)
-		if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
-			break;
-
-	if (i != N_IDS)
-		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
-
-	if (!per_cpu(centrino_cpu, policy->cpu)) {
-		pr_debug("found unsupported CPU with "
-		"Enhanced SpeedStep: send /proc/cpuinfo to "
-		MAINTAINER "\n");
-		return -ENODEV;
-	}
-
-	if (centrino_cpu_init_table(policy)) {
-		return -ENODEV;
-	}
-
-	/* Check to see if Enhanced SpeedStep is enabled, and try to
-	   enable it if not. */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-
-	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
-		pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
-		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
-
-		/* check to see if it stuck */
-		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO PFX
-				"couldn't enable Enhanced SpeedStep\n");
-			return -ENODEV;
-		}
-	}
-
-	freq = get_cur_freq(policy->cpu);
-	policy->cpuinfo.transition_latency = 10000;
-						/* 10uS transition latency */
-	policy->cur = freq;
-
-	pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
-
-	ret = cpufreq_frequency_table_cpuinfo(policy,
-		per_cpu(centrino_model, policy->cpu)->op_points);
-	if (ret)
-		return (ret);
-
-	cpufreq_frequency_table_get_attr(
-		per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
-
-	return 0;
-}
-
-static int centrino_cpu_exit(struct cpufreq_policy *policy)
-{
-	unsigned int cpu = policy->cpu;
-
-	if (!per_cpu(centrino_model, cpu))
-		return -ENODEV;
-
-	cpufreq_frequency_table_put_attr(cpu);
-
-	per_cpu(centrino_model, cpu) = NULL;
-
-	return 0;
-}
-
-/**
- * centrino_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within this model's frequency range at least one
- * border included.
- */
-static int centrino_verify (struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy,
-			per_cpu(centrino_model, policy->cpu)->op_points);
-}
-
-/**
- * centrino_setpolicy - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int centrino_target (struct cpufreq_policy *policy,
-			    unsigned int target_freq,
-			    unsigned int relation)
-{
-	unsigned int    newstate = 0;
-	unsigned int	msr, oldmsr = 0, h = 0, cpu = policy->cpu;
-	struct cpufreq_freqs	freqs;
-	int			retval = 0;
-	unsigned int		j, k, first_cpu, tmp;
-	cpumask_var_t covered_cpus;
-
-	if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
-		return -ENOMEM;
-
-	if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	if (unlikely(cpufreq_frequency_table_target(policy,
-			per_cpu(centrino_model, cpu)->op_points,
-			target_freq,
-			relation,
-			&newstate))) {
-		retval = -EINVAL;
-		goto out;
-	}
-
-	first_cpu = 1;
-	for_each_cpu(j, policy->cpus) {
-		int good_cpu;
-
-		/* cpufreq holds the hotplug lock, so we are safe here */
-		if (!cpu_online(j))
-			continue;
-
-		/*
-		 * Support for SMP systems.
-		 * Make sure we are running on CPU that wants to change freq
-		 */
-		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			good_cpu = cpumask_any_and(policy->cpus,
-						   cpu_online_mask);
-		else
-			good_cpu = j;
-
-		if (good_cpu >= nr_cpu_ids) {
-			pr_debug("couldn't limit to CPUs in this domain\n");
-			retval = -EAGAIN;
-			if (first_cpu) {
-				/* We haven't started the transition yet. */
-				goto out;
-			}
-			break;
-		}
-
-		msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
-
-		if (first_cpu) {
-			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
-			if (msr == (oldmsr & 0xffff)) {
-				pr_debug("no change needed - msr was and needs "
-					"to be %x\n", oldmsr);
-				retval = 0;
-				goto out;
-			}
-
-			freqs.old = extract_clock(oldmsr, cpu, 0);
-			freqs.new = extract_clock(msr, cpu, 0);
-
-			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
-				target_freq, freqs.old, freqs.new, msr);
-
-			for_each_cpu(k, policy->cpus) {
-				if (!cpu_online(k))
-					continue;
-				freqs.cpu = k;
-				cpufreq_notify_transition(&freqs,
-					CPUFREQ_PRECHANGE);
-			}
-
-			first_cpu = 0;
-			/* all but 16 LSB are reserved, treat them with care */
-			oldmsr &= ~0xffff;
-			msr &= 0xffff;
-			oldmsr |= msr;
-		}
-
-		wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
-		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
-			break;
-
-		cpumask_set_cpu(j, covered_cpus);
-	}
-
-	for_each_cpu(k, policy->cpus) {
-		if (!cpu_online(k))
-			continue;
-		freqs.cpu = k;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-
-	if (unlikely(retval)) {
-		/*
-		 * We have failed halfway through the frequency change.
-		 * We have sent callbacks to policy->cpus and
-		 * MSRs have already been written on coverd_cpus.
-		 * Best effort undo..
-		 */
-
-		for_each_cpu(j, covered_cpus)
-			wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
-
-		tmp = freqs.new;
-		freqs.new = freqs.old;
-		freqs.old = tmp;
-		for_each_cpu(j, policy->cpus) {
-			if (!cpu_online(j))
-				continue;
-			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-			cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-		}
-	}
-	retval = 0;
-
-out:
-	free_cpumask_var(covered_cpus);
-	return retval;
-}
-
-static struct freq_attr* centrino_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver centrino_driver = {
-	.name		= "centrino", /* should be speedstep-centrino,
-					 but there's a 16 char limit */
-	.init		= centrino_cpu_init,
-	.exit		= centrino_cpu_exit,
-	.verify		= centrino_verify,
-	.target		= centrino_target,
-	.get		= get_cur_freq,
-	.attr           = centrino_attr,
-	.owner		= THIS_MODULE,
-};
-
-
-/**
- * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
- *
- * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
- * unsupported devices, -ENOENT if there's no voltage table for this
- * particular CPU model, -EINVAL on problems during initiatization,
- * and zero on success.
- *
- * This is quite picky.  Not only does the CPU have to advertise the
- * "est" flag in the cpuid capability flags, we look for a specific
- * CPU model and stepping, and we need to have the exact model name in
- * our voltage tables.  That is, be paranoid about not releasing
- * someone's valuable magic smoke.
- */
-static int __init centrino_init(void)
-{
-	struct cpuinfo_x86 *cpu = &cpu_data(0);
-
-	if (!cpu_has(cpu, X86_FEATURE_EST))
-		return -ENODEV;
-
-	return cpufreq_register_driver(&centrino_driver);
-}
-
-static void __exit centrino_exit(void)
-{
-	cpufreq_unregister_driver(&centrino_driver);
-}
-
-MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
-MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
-MODULE_LICENSE ("GPL");
-
-late_initcall(centrino_init);
-module_exit(centrino_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
deleted file mode 100644
index a748ce782fee..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * (C) 2001  Dave Jones, Arjan van de ven.
- * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *  Based upon reverse engineered information, and on Intel documentation
- *  for chipsets ICH2-M and ICH3-M.
- *
- *  Many thanks to Ducrot Bruno for finding and fixing the last
- *  "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
- *  for extensive testing.
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-
-/*********************************************************************
- *                        SPEEDSTEP - DEFINITIONS                    *
- *********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-
-#include "speedstep-lib.h"
-
-
-/* speedstep_chipset:
- *   It is necessary to know which chipset is used. As accesses to
- * this device occur at various places in this module, we need a
- * static struct pci_dev * pointing to that device.
- */
-static struct pci_dev *speedstep_chipset_dev;
-
-
-/* speedstep_processor
- */
-static enum speedstep_processor speedstep_processor;
-
-static u32 pmbase;
-
-/*
- *   There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-static struct cpufreq_frequency_table speedstep_freqs[] = {
-	{SPEEDSTEP_HIGH,	0},
-	{SPEEDSTEP_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-
-/**
- * speedstep_find_register - read the PMBASE address
- *
- * Returns: -ENODEV if no register could be found
- */
-static int speedstep_find_register(void)
-{
-	if (!speedstep_chipset_dev)
-		return -ENODEV;
-
-	/* get PMBASE */
-	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
-	if (!(pmbase & 0x01)) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
-		return -ENODEV;
-	}
-
-	pmbase &= 0xFFFFFFFE;
-	if (!pmbase) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
-		return -ENODEV;
-	}
-
-	pr_debug("pmbase is 0x%x\n", pmbase);
-	return 0;
-}
-
-/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- *   Tries to change the SpeedStep state.  Can be called from
- *   smp_call_function_single.
- */
-static void speedstep_set_state(unsigned int state)
-{
-	u8 pm2_blk;
-	u8 value;
-	unsigned long flags;
-
-	if (state > 0x1)
-		return;
-
-	/* Disable IRQs */
-	local_irq_save(flags);
-
-	/* read state */
-	value = inb(pmbase + 0x50);
-
-	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
-
-	/* write new state */
-	value &= 0xFE;
-	value |= state;
-
-	pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
-
-	/* Disable bus master arbitration */
-	pm2_blk = inb(pmbase + 0x20);
-	pm2_blk |= 0x01;
-	outb(pm2_blk, (pmbase + 0x20));
-
-	/* Actual transition */
-	outb(value, (pmbase + 0x50));
-
-	/* Restore bus master arbitration */
-	pm2_blk &= 0xfe;
-	outb(pm2_blk, (pmbase + 0x20));
-
-	/* check if transition was successful */
-	value = inb(pmbase + 0x50);
-
-	/* Enable IRQs */
-	local_irq_restore(flags);
-
-	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
-
-	if (state == (value & 0x1))
-		pr_debug("change to %u MHz succeeded\n",
-			speedstep_get_frequency(speedstep_processor) / 1000);
-	else
-		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
-
-	return;
-}
-
-/* Wrapper for smp_call_function_single. */
-static void _speedstep_set_state(void *_state)
-{
-	speedstep_set_state(*(unsigned int *)_state);
-}
-
-/**
- * speedstep_activate - activate SpeedStep control in the chipset
- *
- *   Tries to activate the SpeedStep status and control registers.
- * Returns -EINVAL on an unsupported chipset, and zero on success.
- */
-static int speedstep_activate(void)
-{
-	u16 value = 0;
-
-	if (!speedstep_chipset_dev)
-		return -EINVAL;
-
-	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
-	if (!(value & 0x08)) {
-		value |= 0x08;
-		pr_debug("activating SpeedStep (TM) registers\n");
-		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
-	}
-
-	return 0;
-}
-
-
-/**
- * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
- *
- *   Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
- * the LPC bridge / PM module which contains all power-management
- * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
- * chipset, or zero on failure.
- */
-static unsigned int speedstep_detect_chipset(void)
-{
-	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82801DB_12,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-	if (speedstep_chipset_dev)
-		return 4; /* 4-M */
-
-	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82801CA_12,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-	if (speedstep_chipset_dev)
-		return 3; /* 3-M */
-
-
-	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82801BA_10,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-	if (speedstep_chipset_dev) {
-		/* speedstep.c causes lockups on Dell Inspirons 8000 and
-		 * 8100 which use a pretty old revision of the 82815
-		 * host brige. Abort on these systems.
-		 */
-		static struct pci_dev *hostbridge;
-
-		hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
-			      PCI_DEVICE_ID_INTEL_82815_MC,
-			      PCI_ANY_ID, PCI_ANY_ID,
-			      NULL);
-
-		if (!hostbridge)
-			return 2; /* 2-M */
-
-		if (hostbridge->revision < 5) {
-			pr_debug("hostbridge does not support speedstep\n");
-			speedstep_chipset_dev = NULL;
-			pci_dev_put(hostbridge);
-			return 0;
-		}
-
-		pci_dev_put(hostbridge);
-		return 2; /* 2-M */
-	}
-
-	return 0;
-}
-
-static void get_freq_data(void *_speed)
-{
-	unsigned int *speed = _speed;
-
-	*speed = speedstep_get_frequency(speedstep_processor);
-}
-
-static unsigned int speedstep_get(unsigned int cpu)
-{
-	unsigned int speed;
-
-	/* You're supposed to ensure CPU is online. */
-	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
-		BUG();
-
-	pr_debug("detected %u kHz as current frequency\n", speed);
-	return speed;
-}
-
-/**
- * speedstep_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: the target frequency
- * @relation: how that frequency relates to achieved frequency
- *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
- *
- * Sets a new CPUFreq policy.
- */
-static int speedstep_target(struct cpufreq_policy *policy,
-			     unsigned int target_freq,
-			     unsigned int relation)
-{
-	unsigned int newstate = 0, policy_cpu;
-	struct cpufreq_freqs freqs;
-	int i;
-
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
-	freqs.old = speedstep_get(policy_cpu);
-	freqs.new = speedstep_freqs[newstate].frequency;
-	freqs.cpu = policy->cpu;
-
-	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
-
-	/* no transition necessary */
-	if (freqs.old == freqs.new)
-		return 0;
-
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	}
-
-	smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
-				 true);
-
-	for_each_cpu(i, policy->cpus) {
-		freqs.cpu = i;
-		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-	}
-
-	return 0;
-}
-
-
-/**
- * speedstep_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within speedstep_low_freq and speedstep_high_freq, with
- * at least one border included.
- */
-static int speedstep_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
-}
-
-struct get_freqs {
-	struct cpufreq_policy *policy;
-	int ret;
-};
-
-static void get_freqs_on_cpu(void *_get_freqs)
-{
-	struct get_freqs *get_freqs = _get_freqs;
-
-	get_freqs->ret =
-		speedstep_get_freqs(speedstep_processor,
-			    &speedstep_freqs[SPEEDSTEP_LOW].frequency,
-			    &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
-			    &get_freqs->policy->cpuinfo.transition_latency,
-			    &speedstep_set_state);
-}
-
-static int speedstep_cpu_init(struct cpufreq_policy *policy)
-{
-	int result;
-	unsigned int policy_cpu, speed;
-	struct get_freqs gf;
-
-	/* only run on CPU to be set, or on its sibling */
-#ifdef CONFIG_SMP
-	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
-#endif
-	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
-
-	/* detect low and high frequency and transition latency */
-	gf.policy = policy;
-	smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
-	if (gf.ret)
-		return gf.ret;
-
-	/* get current speed setting */
-	speed = speedstep_get(policy_cpu);
-	if (!speed)
-		return -EIO;
-
-	pr_debug("currently at %s speed setting - %i MHz\n",
-		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
-		? "low" : "high",
-		(speed / 1000));
-
-	/* cpuinfo and default policy values */
-	policy->cur = speed;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
-
-	return 0;
-}
-
-
-static int speedstep_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static struct freq_attr *speedstep_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-
-static struct cpufreq_driver speedstep_driver = {
-	.name	= "speedstep-ich",
-	.verify	= speedstep_verify,
-	.target	= speedstep_target,
-	.init	= speedstep_cpu_init,
-	.exit	= speedstep_cpu_exit,
-	.get	= speedstep_get,
-	.owner	= THIS_MODULE,
-	.attr	= speedstep_attr,
-};
-
-
-/**
- * speedstep_init - initializes the SpeedStep CPUFreq driver
- *
- *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
- * devices, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init speedstep_init(void)
-{
-	/* detect processor */
-	speedstep_processor = speedstep_detect_processor();
-	if (!speedstep_processor) {
-		pr_debug("Intel(R) SpeedStep(TM) capable processor "
-				"not found\n");
-		return -ENODEV;
-	}
-
-	/* detect chipset */
-	if (!speedstep_detect_chipset()) {
-		pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
-				"(yet) available.\n");
-		return -ENODEV;
-	}
-
-	/* activate speedstep support */
-	if (speedstep_activate()) {
-		pci_dev_put(speedstep_chipset_dev);
-		return -EINVAL;
-	}
-
-	if (speedstep_find_register())
-		return -ENODEV;
-
-	return cpufreq_register_driver(&speedstep_driver);
-}
-
-
-/**
- * speedstep_exit - unregisters SpeedStep support
- *
- *   Unregisters SpeedStep support.
- */
-static void __exit speedstep_exit(void)
-{
-	pci_dev_put(speedstep_chipset_dev);
-	cpufreq_unregister_driver(&speedstep_driver);
-}
-
-
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
-		"with ICH-M southbridges.");
-MODULE_LICENSE("GPL");
-
-module_init(speedstep_init);
-module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
deleted file mode 100644
index 8af2d2fd9d51..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  Library for common functions for Intel SpeedStep v.1 and v.2 support
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-
-#include <asm/msr.h>
-#include <asm/tsc.h>
-#include "speedstep-lib.h"
-
-#define PFX "speedstep-lib: "
-
-#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-static int relaxed_check;
-#else
-#define relaxed_check 0
-#endif
-
-/*********************************************************************
- *                   GET PROCESSOR CORE SPEED IN KHZ                 *
- *********************************************************************/
-
-static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
-{
-	/* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
-	struct {
-		unsigned int ratio;	/* Frequency Multiplier (x10) */
-		u8 bitmap;		/* power on configuration bits
-					[27, 25:22] (in MSR 0x2a) */
-	} msr_decode_mult[] = {
-		{ 30, 0x01 },
-		{ 35, 0x05 },
-		{ 40, 0x02 },
-		{ 45, 0x06 },
-		{ 50, 0x00 },
-		{ 55, 0x04 },
-		{ 60, 0x0b },
-		{ 65, 0x0f },
-		{ 70, 0x09 },
-		{ 75, 0x0d },
-		{ 80, 0x0a },
-		{ 85, 0x26 },
-		{ 90, 0x20 },
-		{ 100, 0x2b },
-		{ 0, 0xff }	/* error or unknown value */
-	};
-
-	/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
-	struct {
-		unsigned int value;	/* Front Side Bus speed in MHz */
-		u8 bitmap;		/* power on configuration bits [18: 19]
-					(in MSR 0x2a) */
-	} msr_decode_fsb[] = {
-		{  66, 0x0 },
-		{ 100, 0x2 },
-		{ 133, 0x1 },
-		{   0, 0xff}
-	};
-
-	u32 msr_lo, msr_tmp;
-	int i = 0, j = 0;
-
-	/* read MSR 0x2a - we only need the low 32 bits */
-	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
-	msr_tmp = msr_lo;
-
-	/* decode the FSB */
-	msr_tmp &= 0x00c0000;
-	msr_tmp >>= 18;
-	while (msr_tmp != msr_decode_fsb[i].bitmap) {
-		if (msr_decode_fsb[i].bitmap == 0xff)
-			return 0;
-		i++;
-	}
-
-	/* decode the multiplier */
-	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
-		pr_debug("workaround for early PIIIs\n");
-		msr_lo &= 0x03c00000;
-	} else
-		msr_lo &= 0x0bc00000;
-	msr_lo >>= 22;
-	while (msr_lo != msr_decode_mult[j].bitmap) {
-		if (msr_decode_mult[j].bitmap == 0xff)
-			return 0;
-		j++;
-	}
-
-	pr_debug("speed is %u\n",
-		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
-
-	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
-}
-
-
-static unsigned int pentiumM_get_frequency(void)
-{
-	u32 msr_lo, msr_tmp;
-
-	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
-
-	/* see table B-2 of 24547212.pdf */
-	if (msr_lo & 0x00040000) {
-		printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
-				msr_lo, msr_tmp);
-		return 0;
-	}
-
-	msr_tmp = (msr_lo >> 22) & 0x1f;
-	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
-			msr_tmp, (msr_tmp * 100 * 1000));
-
-	return msr_tmp * 100 * 1000;
-}
-
-static unsigned int pentium_core_get_frequency(void)
-{
-	u32 fsb = 0;
-	u32 msr_lo, msr_tmp;
-	int ret;
-
-	rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
-	/* see table B-2 of 25366920.pdf */
-	switch (msr_lo & 0x07) {
-	case 5:
-		fsb = 100000;
-		break;
-	case 1:
-		fsb = 133333;
-		break;
-	case 3:
-		fsb = 166667;
-		break;
-	case 2:
-		fsb = 200000;
-		break;
-	case 0:
-		fsb = 266667;
-		break;
-	case 4:
-		fsb = 333333;
-		break;
-	default:
-		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
-	}
-
-	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
-	pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
-			msr_lo, msr_tmp);
-
-	msr_tmp = (msr_lo >> 22) & 0x1f;
-	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
-			msr_tmp, (msr_tmp * fsb));
-
-	ret = (msr_tmp * fsb);
-	return ret;
-}
-
-
-static unsigned int pentium4_get_frequency(void)
-{
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-	u32 msr_lo, msr_hi, mult;
-	unsigned int fsb = 0;
-	unsigned int ret;
-	u8 fsb_code;
-
-	/* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
-	 * to System Bus Frequency Ratio Field in the Processor Frequency
-	 * Configuration Register of the MSR. Therefore the current
-	 * frequency cannot be calculated and has to be measured.
-	 */
-	if (c->x86_model < 2)
-		return cpu_khz;
-
-	rdmsr(0x2c, msr_lo, msr_hi);
-
-	pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
-
-	/* decode the FSB: see IA-32 Intel (C) Architecture Software
-	 * Developer's Manual, Volume 3: System Prgramming Guide,
-	 * revision #12 in Table B-1: MSRs in the Pentium 4 and
-	 * Intel Xeon Processors, on page B-4 and B-5.
-	 */
-	fsb_code = (msr_lo >> 16) & 0x7;
-	switch (fsb_code) {
-	case 0:
-		fsb = 100 * 1000;
-		break;
-	case 1:
-		fsb = 13333 * 10;
-		break;
-	case 2:
-		fsb = 200 * 1000;
-		break;
-	}
-
-	if (!fsb)
-		printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
-				"Please send an e-mail to <linux@brodo.de>\n");
-
-	/* Multiplier. */
-	mult = msr_lo >> 24;
-
-	pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
-			fsb, mult, (fsb * mult));
-
-	ret = (fsb * mult);
-	return ret;
-}
-
-
-/* Warning: may get called from smp_call_function_single. */
-unsigned int speedstep_get_frequency(enum speedstep_processor processor)
-{
-	switch (processor) {
-	case SPEEDSTEP_CPU_PCORE:
-		return pentium_core_get_frequency();
-	case SPEEDSTEP_CPU_PM:
-		return pentiumM_get_frequency();
-	case SPEEDSTEP_CPU_P4D:
-	case SPEEDSTEP_CPU_P4M:
-		return pentium4_get_frequency();
-	case SPEEDSTEP_CPU_PIII_T:
-	case SPEEDSTEP_CPU_PIII_C:
-	case SPEEDSTEP_CPU_PIII_C_EARLY:
-		return pentium3_get_frequency(processor);
-	default:
-		return 0;
-	};
-	return 0;
-}
-EXPORT_SYMBOL_GPL(speedstep_get_frequency);
-
-
-/*********************************************************************
- *                 DETECT SPEEDSTEP-CAPABLE PROCESSOR                *
- *********************************************************************/
-
-unsigned int speedstep_detect_processor(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-	u32 ebx, msr_lo, msr_hi;
-
-	pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
-
-	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
-	    ((c->x86 != 6) && (c->x86 != 0xF)))
-		return 0;
-
-	if (c->x86 == 0xF) {
-		/* Intel Mobile Pentium 4-M
-		 * or Intel Mobile Pentium 4 with 533 MHz FSB */
-		if (c->x86_model != 2)
-			return 0;
-
-		ebx = cpuid_ebx(0x00000001);
-		ebx &= 0x000000FF;
-
-		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
-
-		switch (c->x86_mask) {
-		case 4:
-			/*
-			 * B-stepping [M-P4-M]
-			 * sample has ebx = 0x0f, production has 0x0e.
-			 */
-			if ((ebx == 0x0e) || (ebx == 0x0f))
-				return SPEEDSTEP_CPU_P4M;
-			break;
-		case 7:
-			/*
-			 * C-stepping [M-P4-M]
-			 * needs to have ebx=0x0e, else it's a celeron:
-			 * cf. 25130917.pdf / page 7, footnote 5 even
-			 * though 25072120.pdf / page 7 doesn't say
-			 * samples are only of B-stepping...
-			 */
-			if (ebx == 0x0e)
-				return SPEEDSTEP_CPU_P4M;
-			break;
-		case 9:
-			/*
-			 * D-stepping [M-P4-M or M-P4/533]
-			 *
-			 * this is totally strange: CPUID 0x0F29 is
-			 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
-			 * The latter need to be sorted out as they don't
-			 * support speedstep.
-			 * Celerons with CPUID 0x0F29 may have either
-			 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
-			 * specific.
-			 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
-			 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
-			 * also, M-P4M HTs have ebx=0x8, too
-			 * For now, they are distinguished by the model_id
-			 * string
-			 */
-			if ((ebx == 0x0e) ||
-				(strstr(c->x86_model_id,
-				    "Mobile Intel(R) Pentium(R) 4") != NULL))
-				return SPEEDSTEP_CPU_P4M;
-			break;
-		default:
-			break;
-		}
-		return 0;
-	}
-
-	switch (c->x86_model) {
-	case 0x0B: /* Intel PIII [Tualatin] */
-		/* cpuid_ebx(1) is 0x04 for desktop PIII,
-		 * 0x06 for mobile PIII-M */
-		ebx = cpuid_ebx(0x00000001);
-		pr_debug("ebx is %x\n", ebx);
-
-		ebx &= 0x000000FF;
-
-		if (ebx != 0x06)
-			return 0;
-
-		/* So far all PIII-M processors support SpeedStep. See
-		 * Intel's 24540640.pdf of June 2003
-		 */
-		return SPEEDSTEP_CPU_PIII_T;
-
-	case 0x08: /* Intel PIII [Coppermine] */
-
-		/* all mobile PIII Coppermines have FSB 100 MHz
-		 * ==> sort out a few desktop PIIIs. */
-		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
-		pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
-				msr_lo, msr_hi);
-		msr_lo &= 0x00c0000;
-		if (msr_lo != 0x0080000)
-			return 0;
-
-		/*
-		 * If the processor is a mobile version,
-		 * platform ID has bit 50 set
-		 * it has SpeedStep technology if either
-		 * bit 56 or 57 is set
-		 */
-		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
-		pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
-				msr_lo, msr_hi);
-		if ((msr_hi & (1<<18)) &&
-		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
-			if (c->x86_mask == 0x01) {
-				pr_debug("early PIII version\n");
-				return SPEEDSTEP_CPU_PIII_C_EARLY;
-			} else
-				return SPEEDSTEP_CPU_PIII_C;
-		}
-
-	default:
-		return 0;
-	}
-}
-EXPORT_SYMBOL_GPL(speedstep_detect_processor);
-
-
-/*********************************************************************
- *                     DETECT SPEEDSTEP SPEEDS                       *
- *********************************************************************/
-
-unsigned int speedstep_get_freqs(enum speedstep_processor processor,
-				  unsigned int *low_speed,
-				  unsigned int *high_speed,
-				  unsigned int *transition_latency,
-				  void (*set_state) (unsigned int state))
-{
-	unsigned int prev_speed;
-	unsigned int ret = 0;
-	unsigned long flags;
-	struct timeval tv1, tv2;
-
-	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
-		return -EINVAL;
-
-	pr_debug("trying to determine both speeds\n");
-
-	/* get current speed */
-	prev_speed = speedstep_get_frequency(processor);
-	if (!prev_speed)
-		return -EIO;
-
-	pr_debug("previous speed is %u\n", prev_speed);
-
-	local_irq_save(flags);
-
-	/* switch to low state */
-	set_state(SPEEDSTEP_LOW);
-	*low_speed = speedstep_get_frequency(processor);
-	if (!*low_speed) {
-		ret = -EIO;
-		goto out;
-	}
-
-	pr_debug("low speed is %u\n", *low_speed);
-
-	/* start latency measurement */
-	if (transition_latency)
-		do_gettimeofday(&tv1);
-
-	/* switch to high state */
-	set_state(SPEEDSTEP_HIGH);
-
-	/* end latency measurement */
-	if (transition_latency)
-		do_gettimeofday(&tv2);
-
-	*high_speed = speedstep_get_frequency(processor);
-	if (!*high_speed) {
-		ret = -EIO;
-		goto out;
-	}
-
-	pr_debug("high speed is %u\n", *high_speed);
-
-	if (*low_speed == *high_speed) {
-		ret = -ENODEV;
-		goto out;
-	}
-
-	/* switch to previous state, if necessary */
-	if (*high_speed != prev_speed)
-		set_state(SPEEDSTEP_LOW);
-
-	if (transition_latency) {
-		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
-			tv2.tv_usec - tv1.tv_usec;
-		pr_debug("transition latency is %u uSec\n", *transition_latency);
-
-		/* convert uSec to nSec and add 20% for safety reasons */
-		*transition_latency *= 1200;
-
-		/* check if the latency measurement is too high or too low
-		 * and set it to a safe value (500uSec) in that case
-		 */
-		if (*transition_latency > 10000000 ||
-		    *transition_latency < 50000) {
-			printk(KERN_WARNING PFX "frequency transition "
-					"measured seems out of range (%u "
-					"nSec), falling back to a safe one of"
-					"%u nSec.\n",
-					*transition_latency, 500000);
-			*transition_latency = 500000;
-		}
-	}
-
-out:
-	local_irq_restore(flags);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(speedstep_get_freqs);
-
-#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
-module_param(relaxed_check, int, 0444);
-MODULE_PARM_DESC(relaxed_check,
-		"Don't do all checks for speedstep capability.");
-#endif
-
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
deleted file mode 100644
index 70d9cea1219d..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- *  Library for common functions for Intel SpeedStep v.1 and v.2 support
- *
- *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
- */
-
-
-
-/* processors */
-enum speedstep_processor {
-	SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001,  /* Coppermine core */
-	SPEEDSTEP_CPU_PIII_C	   = 0x00000002,  /* Coppermine core */
-	SPEEDSTEP_CPU_PIII_T	   = 0x00000003,  /* Tualatin core */
-	SPEEDSTEP_CPU_P4M	   = 0x00000004,  /* P4-M  */
-/* the following processors are not speedstep-capable and are not auto-detected
- * in speedstep_detect_processor(). However, their speed can be detected using
- * the speedstep_get_frequency() call. */
-	SPEEDSTEP_CPU_PM	   = 0xFFFFFF03,  /* Pentium M  */
-	SPEEDSTEP_CPU_P4D	   = 0xFFFFFF04,  /* desktop P4  */
-	SPEEDSTEP_CPU_PCORE	   = 0xFFFFFF05,  /* Core */
-};
-
-/* speedstep states -- only two of them */
-
-#define SPEEDSTEP_HIGH	0x00000000
-#define SPEEDSTEP_LOW	0x00000001
-
-
-/* detect a speedstep-capable processor */
-extern enum speedstep_processor speedstep_detect_processor(void);
-
-/* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
-
-
-/* detect the low and high speeds of the processor. The callback
- * set_state"'s first argument is either SPEEDSTEP_HIGH or
- * SPEEDSTEP_LOW; the second argument is zero so that no
- * cpufreq_notify_transition calls are initiated.
- */
-extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
-	unsigned int *low_speed,
-	unsigned int *high_speed,
-	unsigned int *transition_latency,
-	void (*set_state) (unsigned int state));
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
deleted file mode 100644
index c76ead3490bf..000000000000
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Intel SpeedStep SMI driver.
- *
- * (C) 2003  Hiroshi Miura <miura@da-cha.org>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- *
- */
-
-
-/*********************************************************************
- *                        SPEEDSTEP - DEFINITIONS                    *
- *********************************************************************/
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <asm/ist.h>
-
-#include "speedstep-lib.h"
-
-/* speedstep system management interface port/command.
- *
- * These parameters are got from IST-SMI BIOS call.
- * If user gives it, these are used.
- *
- */
-static int smi_port;
-static int smi_cmd;
-static unsigned int smi_sig;
-
-/* info about the processor */
-static enum speedstep_processor speedstep_processor;
-
-/*
- * There are only two frequency states for each processor. Values
- * are in kHz for the time being.
- */
-static struct cpufreq_frequency_table speedstep_freqs[] = {
-	{SPEEDSTEP_HIGH,	0},
-	{SPEEDSTEP_LOW,		0},
-	{0,			CPUFREQ_TABLE_END},
-};
-
-#define GET_SPEEDSTEP_OWNER 0
-#define GET_SPEEDSTEP_STATE 1
-#define SET_SPEEDSTEP_STATE 2
-#define GET_SPEEDSTEP_FREQS 4
-
-/* how often shall the SMI call be tried if it failed, e.g. because
- * of DMA activity going on? */
-#define SMI_TRIES 5
-
-/**
- * speedstep_smi_ownership
- */
-static int speedstep_smi_ownership(void)
-{
-	u32 command, result, magic, dummy;
-	u32 function = GET_SPEEDSTEP_OWNER;
-	unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-	magic = virt_to_phys(magic_data);
-
-	pr_debug("trying to obtain ownership with command %x at port %x\n",
-			command, smi_port);
-
-	__asm__ __volatile__(
-		"push %%ebp\n"
-		"out %%al, (%%dx)\n"
-		"pop %%ebp\n"
-		: "=D" (result),
-		  "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
-		  "=S" (dummy)
-		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
-		  "D" (0), "S" (magic)
-		: "memory"
-	);
-
-	pr_debug("result is %x\n", result);
-
-	return result;
-}
-
-/**
- * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
- * @low: the low frequency value is placed here
- * @high: the high frequency value is placed here
- *
- * Only available on later SpeedStep-enabled systems, returns false results or
- * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
- * shows that the latter occurs if !(ist_info.event & 0xFFFF).
- */
-static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
-{
-	u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
-	u32 state = 0;
-	u32 function = GET_SPEEDSTEP_FREQS;
-
-	if (!(ist_info.event & 0xFFFF)) {
-		pr_debug("bug #1422 -- can't read freqs from BIOS\n");
-		return -ENODEV;
-	}
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
-	pr_debug("trying to determine frequencies with command %x at port %x\n",
-			command, smi_port);
-
-	__asm__ __volatile__(
-		"push %%ebp\n"
-		"out %%al, (%%dx)\n"
-		"pop %%ebp"
-		: "=a" (result),
-		  "=b" (high_mhz),
-		  "=c" (low_mhz),
-		  "=d" (state), "=D" (edi), "=S" (dummy)
-		: "a" (command),
-		  "b" (function),
-		  "c" (state),
-		  "d" (smi_port), "S" (0), "D" (0)
-	);
-
-	pr_debug("result %x, low_freq %u, high_freq %u\n",
-			result, low_mhz, high_mhz);
-
-	/* abort if results are obviously incorrect... */
-	if ((high_mhz + low_mhz) < 600)
-		return -EINVAL;
-
-	*high = high_mhz * 1000;
-	*low  = low_mhz  * 1000;
-
-	return result;
-}
-
-/**
- * speedstep_get_state - set the SpeedStep state
- * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- */
-static int speedstep_get_state(void)
-{
-	u32 function = GET_SPEEDSTEP_STATE;
-	u32 result, state, edi, command, dummy;
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
-	pr_debug("trying to determine current setting with command %x "
-		"at port %x\n", command, smi_port);
-
-	__asm__ __volatile__(
-		"push %%ebp\n"
-		"out %%al, (%%dx)\n"
-		"pop %%ebp\n"
-		: "=a" (result),
-		  "=b" (state), "=D" (edi),
-		  "=c" (dummy), "=d" (dummy), "=S" (dummy)
-		: "a" (command), "b" (function), "c" (0),
-		  "d" (smi_port), "S" (0), "D" (0)
-	);
-
-	pr_debug("state is %x, result is %x\n", state, result);
-
-	return state & 1;
-}
-
-
-/**
- * speedstep_set_state - set the SpeedStep state
- * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
- *
- */
-static void speedstep_set_state(unsigned int state)
-{
-	unsigned int result = 0, command, new_state, dummy;
-	unsigned long flags;
-	unsigned int function = SET_SPEEDSTEP_STATE;
-	unsigned int retry = 0;
-
-	if (state > 0x1)
-		return;
-
-	/* Disable IRQs */
-	local_irq_save(flags);
-
-	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
-
-	pr_debug("trying to set frequency to state %u "
-		"with command %x at port %x\n",
-		state, command, smi_port);
-
-	do {
-		if (retry) {
-			pr_debug("retry %u, previous result %u, waiting...\n",
-					retry, result);
-			mdelay(retry * 50);
-		}
-		retry++;
-		__asm__ __volatile__(
-			"push %%ebp\n"
-			"out %%al, (%%dx)\n"
-			"pop %%ebp"
-			: "=b" (new_state), "=D" (result),
-			  "=c" (dummy), "=a" (dummy),
-			  "=d" (dummy), "=S" (dummy)
-			: "a" (command), "b" (function), "c" (state),
-			  "d" (smi_port), "S" (0), "D" (0)
-			);
-	} while ((new_state != state) && (retry <= SMI_TRIES));
-
-	/* enable IRQs */
-	local_irq_restore(flags);
-
-	if (new_state == state)
-		pr_debug("change to %u MHz succeeded after %u tries "
-			"with result %u\n",
-			(speedstep_freqs[new_state].frequency / 1000),
-			retry, result);
-	else
-		printk(KERN_ERR "cpufreq: change to state %u "
-			"failed with new_state %u and result %u\n",
-			state, new_state, result);
-
-	return;
-}
-
-
-/**
- * speedstep_target - set a new CPUFreq policy
- * @policy: new policy
- * @target_freq: new freq
- * @relation:
- *
- * Sets a new CPUFreq policy/freq.
- */
-static int speedstep_target(struct cpufreq_policy *policy,
-			unsigned int target_freq, unsigned int relation)
-{
-	unsigned int newstate = 0;
-	struct cpufreq_freqs freqs;
-
-	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
-				target_freq, relation, &newstate))
-		return -EINVAL;
-
-	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
-	freqs.new = speedstep_freqs[newstate].frequency;
-	freqs.cpu = 0; /* speedstep.c is UP only driver */
-
-	if (freqs.old == freqs.new)
-		return 0;
-
-	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
-	speedstep_set_state(newstate);
-	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
-
-	return 0;
-}
-
-
-/**
- * speedstep_verify - verifies a new CPUFreq policy
- * @policy: new policy
- *
- * Limit must be within speedstep_low_freq and speedstep_high_freq, with
- * at least one border included.
- */
-static int speedstep_verify(struct cpufreq_policy *policy)
-{
-	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
-}
-
-
-static int speedstep_cpu_init(struct cpufreq_policy *policy)
-{
-	int result;
-	unsigned int speed, state;
-	unsigned int *low, *high;
-
-	/* capability check */
-	if (policy->cpu != 0)
-		return -ENODEV;
-
-	result = speedstep_smi_ownership();
-	if (result) {
-		pr_debug("fails in acquiring ownership of a SMI interface.\n");
-		return -EINVAL;
-	}
-
-	/* detect low and high frequency */
-	low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
-	high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
-
-	result = speedstep_smi_get_freqs(low, high);
-	if (result) {
-		/* fall back to speedstep_lib.c dection mechanism:
-		 * try both states out */
-		pr_debug("could not detect low and high frequencies "
-				"by SMI call.\n");
-		result = speedstep_get_freqs(speedstep_processor,
-				low, high,
-				NULL,
-				&speedstep_set_state);
-
-		if (result) {
-			pr_debug("could not detect two different speeds"
-					" -- aborting.\n");
-			return result;
-		} else
-			pr_debug("workaround worked.\n");
-	}
-
-	/* get current speed setting */
-	state = speedstep_get_state();
-	speed = speedstep_freqs[state].frequency;
-
-	pr_debug("currently at %s speed setting - %i MHz\n",
-		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
-		? "low" : "high",
-		(speed / 1000));
-
-	/* cpuinfo and default policy values */
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	policy->cur = speed;
-
-	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
-	if (result)
-		return result;
-
-	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
-
-	return 0;
-}
-
-static int speedstep_cpu_exit(struct cpufreq_policy *policy)
-{
-	cpufreq_frequency_table_put_attr(policy->cpu);
-	return 0;
-}
-
-static unsigned int speedstep_get(unsigned int cpu)
-{
-	if (cpu)
-		return -ENODEV;
-	return speedstep_get_frequency(speedstep_processor);
-}
-
-
-static int speedstep_resume(struct cpufreq_policy *policy)
-{
-	int result = speedstep_smi_ownership();
-
-	if (result)
-		pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
-
-	return result;
-}
-
-static struct freq_attr *speedstep_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
-	NULL,
-};
-
-static struct cpufreq_driver speedstep_driver = {
-	.name		= "speedstep-smi",
-	.verify		= speedstep_verify,
-	.target		= speedstep_target,
-	.init		= speedstep_cpu_init,
-	.exit		= speedstep_cpu_exit,
-	.get		= speedstep_get,
-	.resume		= speedstep_resume,
-	.owner		= THIS_MODULE,
-	.attr		= speedstep_attr,
-};
-
-/**
- * speedstep_init - initializes the SpeedStep CPUFreq driver
- *
- *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
- * BIOS, -EINVAL on problems during initiatization, and zero on
- * success.
- */
-static int __init speedstep_init(void)
-{
-	speedstep_processor = speedstep_detect_processor();
-
-	switch (speedstep_processor) {
-	case SPEEDSTEP_CPU_PIII_T:
-	case SPEEDSTEP_CPU_PIII_C:
-	case SPEEDSTEP_CPU_PIII_C_EARLY:
-		break;
-	default:
-		speedstep_processor = 0;
-	}
-
-	if (!speedstep_processor) {
-		pr_debug("No supported Intel CPU detected.\n");
-		return -ENODEV;
-	}
-
-	pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
-		"event:0x%.8ulx, perf_level:0x%.8ulx.\n",
-		ist_info.signature, ist_info.command,
-		ist_info.event, ist_info.perf_level);
-
-	/* Error if no IST-SMI BIOS or no PARM
-		 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
-	if ((ist_info.signature !=  0x47534943) && (
-	    (smi_port == 0) || (smi_cmd == 0)))
-		return -ENODEV;
-
-	if (smi_sig == 1)
-		smi_sig = 0x47534943;
-	else
-		smi_sig = ist_info.signature;
-
-	/* setup smi_port from MODLULE_PARM or BIOS */
-	if ((smi_port > 0xff) || (smi_port < 0))
-		return -EINVAL;
-	else if (smi_port == 0)
-		smi_port = ist_info.command & 0xff;
-
-	if ((smi_cmd > 0xff) || (smi_cmd < 0))
-		return -EINVAL;
-	else if (smi_cmd == 0)
-		smi_cmd = (ist_info.command >> 16) & 0xff;
-
-	return cpufreq_register_driver(&speedstep_driver);
-}
-
-
-/**
- * speedstep_exit - unregisters SpeedStep support
- *
- *   Unregisters SpeedStep support.
- */
-static void __exit speedstep_exit(void)
-{
-	cpufreq_unregister_driver(&speedstep_driver);
-}
-
-module_param(smi_port, int, 0444);
-module_param(smi_cmd,  int, 0444);
-module_param(smi_sig, uint, 0444);
-
-MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
-		"-- Intel's default setting is 0xb2");
-MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
-		"-- Intel's default setting is 0x82");
-MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
-		"SMI interface.");
-
-MODULE_AUTHOR("Hiroshi Miura");
-MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
-MODULE_LICENSE("GPL");
-
-module_init(speedstep_init);
-module_exit(speedstep_exit);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index b78baa547ef5..9fb84853d8e3 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -1,3 +1,5 @@
+menu "CPU Frequency scaling"
+
 config CPU_FREQ
 	bool "CPU Frequency scaling"
 	help
@@ -177,4 +179,10 @@ config CPU_FREQ_GOV_CONSERVATIVE
 
 	  If in doubt, say N.
 
-endif	# CPU_FREQ
+menu "x86 CPU frequency scaling drivers"
+depends on X86
+source "drivers/cpufreq/Kconfig.x86"
+endmenu
+
+endif
+endmenu
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
new file mode 100644
index 000000000000..343f84760487
--- /dev/null
+++ b/drivers/cpufreq/Kconfig.x86
@@ -0,0 +1,255 @@
+#
+# x86 CPU Frequency scaling drivers
+#
+
+config X86_PCC_CPUFREQ
+	tristate "Processor Clocking Control interface driver"
+	depends on ACPI && ACPI_PROCESSOR
+	help
+	  This driver adds support for the PCC interface.
+
+	  For details, take a look at:
+	  <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pcc-cpufreq.
+
+	  If in doubt, say N.
+
+config X86_ACPI_CPUFREQ
+	tristate "ACPI Processor P-States driver"
+	select CPU_FREQ_TABLE
+	depends on ACPI_PROCESSOR
+	help
+	  This driver adds a CPUFreq driver which utilizes the ACPI
+	  Processor Performance States.
+	  This driver also supports Intel Enhanced Speedstep.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called acpi-cpufreq.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config ELAN_CPUFREQ
+	tristate "AMD Elan SC400 and SC410"
+	select CPU_FREQ_TABLE
+	depends on X86_ELAN
+	---help---
+	  This adds the CPUFreq driver for AMD Elan SC400 and SC410
+	  processors.
+
+	  You need to specify the processor maximum speed as boot
+	  parameter: elanfreq=maxspeed (in kHz) or as module
+	  parameter "max_freq".
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config SC520_CPUFREQ
+	tristate "AMD Elan SC520"
+	select CPU_FREQ_TABLE
+	depends on X86_ELAN
+	---help---
+	  This adds the CPUFreq driver for AMD Elan SC520 processor.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+
+config X86_POWERNOW_K6
+	tristate "AMD Mobile K6-2/K6-3 PowerNow!"
+	select CPU_FREQ_TABLE
+	depends on X86_32
+	help
+	  This adds the CPUFreq driver for mobile AMD K6-2+ and mobile
+	  AMD K6-3+ processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K7
+	tristate "AMD Mobile Athlon/Duron PowerNow!"
+	select CPU_FREQ_TABLE
+	depends on X86_32
+	help
+	  This adds the CPUFreq driver for mobile AMD K7 mobile processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_POWERNOW_K7_ACPI
+	bool
+	depends on X86_POWERNOW_K7 && ACPI_PROCESSOR
+	depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m)
+	depends on X86_32
+	default y
+
+config X86_POWERNOW_K8
+	tristate "AMD Opteron/Athlon64 PowerNow!"
+	select CPU_FREQ_TABLE
+	depends on ACPI && ACPI_PROCESSOR
+	help
+	  This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called powernow-k8.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+config X86_GX_SUSPMOD
+	tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
+	depends on X86_32 && PCI
+	help
+	 This add the CPUFreq driver for NatSemi Geode processors which
+	 support suspend modulation.
+
+	 For details, take a look at <file:Documentation/cpu-freq/>.
+
+	 If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO
+	tristate "Intel Enhanced SpeedStep (deprecated)"
+	select CPU_FREQ_TABLE
+	select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32
+	depends on X86_32 || (X86_64 && ACPI_PROCESSOR)
+	help
+	  This is deprecated and this functionality is now merged into
+	  acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of
+	  speedstep_centrino.
+	  This adds the CPUFreq driver for Enhanced SpeedStep enabled
+	  mobile CPUs.  This means Intel Pentium M (Centrino) CPUs
+	  or 64bit enabled Intel Xeons.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called speedstep-centrino.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_CENTRINO_TABLE
+	bool "Built-in tables for Banias CPUs"
+	depends on X86_32 && X86_SPEEDSTEP_CENTRINO
+	default y
+	help
+	  Use built-in tables for Banias CPUs if ACPI encoding
+	  is not available.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_ICH
+	tristate "Intel Speedstep on ICH-M chipsets (ioport interface)"
+	select CPU_FREQ_TABLE
+	depends on X86_32
+	help
+	  This adds the CPUFreq driver for certain mobile Intel Pentium III
+	  (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all
+	  mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2,
+	  ICH3 or ICH4 southbridge.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_SPEEDSTEP_SMI
+	tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)"
+	select CPU_FREQ_TABLE
+	depends on X86_32 && EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for certain mobile Intel Pentium III
+	  (Coppermine), all mobile Intel Pentium III-M (Tualatin)
+	  on systems which have an Intel 440BX/ZX/MX southbridge.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_P4_CLOCKMOD
+	tristate "Intel Pentium 4 clock modulation"
+	select CPU_FREQ_TABLE
+	help
+	  This adds the CPUFreq driver for Intel Pentium 4 / XEON
+	  processors.  When enabled it will lower CPU temperature by skipping
+	  clocks.
+
+	  This driver should be only used in exceptional
+	  circumstances when very low power is needed because it causes severe
+	  slowdowns and noticeable latencies.  Normally Speedstep should be used
+	  instead.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called p4-clockmod.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  Unless you are absolutely sure say N.
+
+config X86_CPUFREQ_NFORCE2
+	tristate "nVidia nForce2 FSB changing"
+	depends on X86_32 && EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for FSB changing on nVidia nForce2
+	  platforms.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_LONGRUN
+	tristate "Transmeta LongRun"
+	depends on X86_32
+	help
+	  This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors
+	  which support LongRun.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_LONGHAUL
+	tristate "VIA Cyrix III Longhaul"
+	select CPU_FREQ_TABLE
+	depends on X86_32 && ACPI_PROCESSOR
+	help
+	  This adds the CPUFreq driver for VIA Samuel/CyrixIII,
+	  VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T
+	  processors.
+
+	  For details, take a look at <file:Documentation/cpu-freq/>.
+
+	  If in doubt, say N.
+
+config X86_E_POWERSAVER
+	tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
+	select CPU_FREQ_TABLE
+	depends on X86_32 && EXPERIMENTAL
+	help
+	  This adds the CPUFreq driver for VIA C7 processors.  However, this driver
+	  does not have any safeguards to prevent operating the CPU out of spec
+	  and is thus considered dangerous.  Please use the regular ACPI cpufreq
+	  driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
+
+	  If in doubt, say N.
+
+comment "shared options"
+
+config X86_SPEEDSTEP_LIB
+	tristate
+	default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD)
+
+config X86_SPEEDSTEP_RELAXED_CAP_CHECK
+	bool "Relaxed speedstep capability checks"
+	depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH)
+	help
+	  Don't perform all checks for a speedstep capable system which would
+	  normally be done. Some ancient or strange systems, though speedstep
+	  capable, don't always indicate that they are speedstep capable. This
+	  option lets the probing code bypass some of those checks if the
+	  parameter "relaxed_check=1" is passed to the module.
+
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 71fc3b4173f1..c7f1a6f16b6e 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -13,3 +13,29 @@ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
 # CPUfreq cross-arch helpers
 obj-$(CONFIG_CPU_FREQ_TABLE)		+= freq_table.o
 
+##################################################################################d
+# x86 drivers.
+# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
+# K8 systems. ACPI is preferred to all other hardware-specific drivers.
+# speedstep-* is preferred over p4-clockmod.
+
+obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o mperf.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o mperf.o
+obj-$(CONFIG_X86_PCC_CPUFREQ)		+= pcc-cpufreq.o
+obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
+obj-$(CONFIG_X86_POWERNOW_K7)		+= powernow-k7.o
+obj-$(CONFIG_X86_LONGHAUL)		+= longhaul.o
+obj-$(CONFIG_X86_E_POWERSAVER)		+= e_powersaver.o
+obj-$(CONFIG_ELAN_CPUFREQ)		+= elanfreq.o
+obj-$(CONFIG_SC520_CPUFREQ)		+= sc520_freq.o
+obj-$(CONFIG_X86_LONGRUN)		+= longrun.o
+obj-$(CONFIG_X86_GX_SUSPMOD)		+= gx-suspmod.o
+obj-$(CONFIG_X86_SPEEDSTEP_ICH)		+= speedstep-ich.o
+obj-$(CONFIG_X86_SPEEDSTEP_LIB)		+= speedstep-lib.o
+obj-$(CONFIG_X86_SPEEDSTEP_SMI)		+= speedstep-smi.o
+obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO)	+= speedstep-centrino.o
+obj-$(CONFIG_X86_P4_CLOCKMOD)		+= p4-clockmod.o
+obj-$(CONFIG_X86_CPUFREQ_NFORCE2)	+= cpufreq-nforce2.o
+
+##################################################################################d
+
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
new file mode 100644
index 000000000000..4e04e1274388
--- /dev/null
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -0,0 +1,773 @@
+/*
+ * acpi-cpufreq.c - ACPI Processor P-States Driver
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
+ *  Copyright (C) 2006       Denis Sadykov <denis.m.sadykov@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/compiler.h>
+#include <linux/dmi.h>
+#include <linux/slab.h>
+
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+
+#include <acpi/processor.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include "mperf.h"
+
+MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
+MODULE_DESCRIPTION("ACPI Processor P-States Driver");
+MODULE_LICENSE("GPL");
+
+enum {
+	UNDEFINED_CAPABLE = 0,
+	SYSTEM_INTEL_MSR_CAPABLE,
+	SYSTEM_IO_CAPABLE,
+};
+
+#define INTEL_MSR_RANGE		(0xffff)
+
+struct acpi_cpufreq_data {
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int resume;
+	unsigned int cpu_feature;
+};
+
+static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
+
+/* acpi_perf_data is a pointer to percpu data. */
+static struct acpi_processor_performance __percpu *acpi_perf_data;
+
+static struct cpufreq_driver acpi_cpufreq_driver;
+
+static unsigned int acpi_pstate_strict;
+
+static int check_est_cpu(unsigned int cpuid)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
+
+	return cpu_has(cpu, X86_FEATURE_EST);
+}
+
+static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
+{
+	struct acpi_processor_performance *perf;
+	int i;
+
+	perf = data->acpi_data;
+
+	for (i = 0; i < perf->state_count; i++) {
+		if (value == perf->states[i].status)
+			return data->freq_table[i].frequency;
+	}
+	return 0;
+}
+
+static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
+{
+	int i;
+	struct acpi_processor_performance *perf;
+
+	msr &= INTEL_MSR_RANGE;
+	perf = data->acpi_data;
+
+	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		if (msr == perf->states[data->freq_table[i].index].status)
+			return data->freq_table[i].frequency;
+	}
+	return data->freq_table[0].frequency;
+}
+
+static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
+{
+	switch (data->cpu_feature) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		return extract_msr(val, data);
+	case SYSTEM_IO_CAPABLE:
+		return extract_io(val, data);
+	default:
+		return 0;
+	}
+}
+
+struct msr_addr {
+	u32 reg;
+};
+
+struct io_addr {
+	u16 port;
+	u8 bit_width;
+};
+
+struct drv_cmd {
+	unsigned int type;
+	const struct cpumask *mask;
+	union {
+		struct msr_addr msr;
+		struct io_addr io;
+	} addr;
+	u32 val;
+};
+
+/* Called via smp_call_function_single(), on the target CPU */
+static void do_drv_read(void *_cmd)
+{
+	struct drv_cmd *cmd = _cmd;
+	u32 h;
+
+	switch (cmd->type) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		rdmsr(cmd->addr.msr.reg, cmd->val, h);
+		break;
+	case SYSTEM_IO_CAPABLE:
+		acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
+				&cmd->val,
+				(u32)cmd->addr.io.bit_width);
+		break;
+	default:
+		break;
+	}
+}
+
+/* Called via smp_call_function_many(), on the target CPUs */
+static void do_drv_write(void *_cmd)
+{
+	struct drv_cmd *cmd = _cmd;
+	u32 lo, hi;
+
+	switch (cmd->type) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		rdmsr(cmd->addr.msr.reg, lo, hi);
+		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
+		wrmsr(cmd->addr.msr.reg, lo, hi);
+		break;
+	case SYSTEM_IO_CAPABLE:
+		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
+				cmd->val,
+				(u32)cmd->addr.io.bit_width);
+		break;
+	default:
+		break;
+	}
+}
+
+static void drv_read(struct drv_cmd *cmd)
+{
+	int err;
+	cmd->val = 0;
+
+	err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
+	WARN_ON_ONCE(err);	/* smp_call_function_any() was buggy? */
+}
+
+static void drv_write(struct drv_cmd *cmd)
+{
+	int this_cpu;
+
+	this_cpu = get_cpu();
+	if (cpumask_test_cpu(this_cpu, cmd->mask))
+		do_drv_write(cmd);
+	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+	put_cpu();
+}
+
+static u32 get_cur_val(const struct cpumask *mask)
+{
+	struct acpi_processor_performance *perf;
+	struct drv_cmd cmd;
+
+	if (unlikely(cpumask_empty(mask)))
+		return 0;
+
+	switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
+		break;
+	case SYSTEM_IO_CAPABLE:
+		cmd.type = SYSTEM_IO_CAPABLE;
+		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
+		cmd.addr.io.port = perf->control_register.address;
+		cmd.addr.io.bit_width = perf->control_register.bit_width;
+		break;
+	default:
+		return 0;
+	}
+
+	cmd.mask = mask;
+	drv_read(&cmd);
+
+	pr_debug("get_cur_val = %u\n", cmd.val);
+
+	return cmd.val;
+}
+
+static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
+{
+	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
+	unsigned int freq;
+	unsigned int cached_freq;
+
+	pr_debug("get_cur_freq_on_cpu (%d)\n", cpu);
+
+	if (unlikely(data == NULL ||
+		     data->acpi_data == NULL || data->freq_table == NULL)) {
+		return 0;
+	}
+
+	cached_freq = data->freq_table[data->acpi_data->state].frequency;
+	freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
+	if (freq != cached_freq) {
+		/*
+		 * The dreaded BIOS frequency change behind our back.
+		 * Force set the frequency on next target call.
+		 */
+		data->resume = 1;
+	}
+
+	pr_debug("cur freq = %u\n", freq);
+
+	return freq;
+}
+
+static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
+				struct acpi_cpufreq_data *data)
+{
+	unsigned int cur_freq;
+	unsigned int i;
+
+	for (i = 0; i < 100; i++) {
+		cur_freq = extract_freq(get_cur_val(mask), data);
+		if (cur_freq == freq)
+			return 1;
+		udelay(10);
+	}
+	return 0;
+}
+
+static int acpi_cpufreq_target(struct cpufreq_policy *policy,
+			       unsigned int target_freq, unsigned int relation)
+{
+	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+	struct acpi_processor_performance *perf;
+	struct cpufreq_freqs freqs;
+	struct drv_cmd cmd;
+	unsigned int next_state = 0; /* Index into freq_table */
+	unsigned int next_perf_state = 0; /* Index into perf table */
+	unsigned int i;
+	int result = 0;
+
+	pr_debug("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
+
+	if (unlikely(data == NULL ||
+	     data->acpi_data == NULL || data->freq_table == NULL)) {
+		return -ENODEV;
+	}
+
+	perf = data->acpi_data;
+	result = cpufreq_frequency_table_target(policy,
+						data->freq_table,
+						target_freq,
+						relation, &next_state);
+	if (unlikely(result)) {
+		result = -ENODEV;
+		goto out;
+	}
+
+	next_perf_state = data->freq_table[next_state].index;
+	if (perf->state == next_perf_state) {
+		if (unlikely(data->resume)) {
+			pr_debug("Called after resume, resetting to P%d\n",
+				next_perf_state);
+			data->resume = 0;
+		} else {
+			pr_debug("Already at target state (P%d)\n",
+				next_perf_state);
+			goto out;
+		}
+	}
+
+	switch (data->cpu_feature) {
+	case SYSTEM_INTEL_MSR_CAPABLE:
+		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
+		cmd.val = (u32) perf->states[next_perf_state].control;
+		break;
+	case SYSTEM_IO_CAPABLE:
+		cmd.type = SYSTEM_IO_CAPABLE;
+		cmd.addr.io.port = perf->control_register.address;
+		cmd.addr.io.bit_width = perf->control_register.bit_width;
+		cmd.val = (u32) perf->states[next_perf_state].control;
+		break;
+	default:
+		result = -ENODEV;
+		goto out;
+	}
+
+	/* cpufreq holds the hotplug lock, so we are safe from here on */
+	if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
+		cmd.mask = policy->cpus;
+	else
+		cmd.mask = cpumask_of(policy->cpu);
+
+	freqs.old = perf->states[perf->state].core_frequency * 1000;
+	freqs.new = data->freq_table[next_state].frequency;
+	for_each_cpu(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	drv_write(&cmd);
+
+	if (acpi_pstate_strict) {
+		if (!check_freqs(cmd.mask, freqs.new, data)) {
+			pr_debug("acpi_cpufreq_target failed (%d)\n",
+				policy->cpu);
+			result = -EAGAIN;
+			goto out;
+		}
+	}
+
+	for_each_cpu(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+	perf->state = next_perf_state;
+
+out:
+	return result;
+}
+
+static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+
+	pr_debug("acpi_cpufreq_verify\n");
+
+	return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
+static unsigned long
+acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
+{
+	struct acpi_processor_performance *perf = data->acpi_data;
+
+	if (cpu_khz) {
+		/* search the closest match to cpu_khz */
+		unsigned int i;
+		unsigned long freq;
+		unsigned long freqn = perf->states[0].core_frequency * 1000;
+
+		for (i = 0; i < (perf->state_count-1); i++) {
+			freq = freqn;
+			freqn = perf->states[i+1].core_frequency * 1000;
+			if ((2 * cpu_khz) > (freqn + freq)) {
+				perf->state = i;
+				return freq;
+			}
+		}
+		perf->state = perf->state_count-1;
+		return freqn;
+	} else {
+		/* assume CPU is at P0... */
+		perf->state = 0;
+		return perf->states[0].core_frequency * 1000;
+	}
+}
+
+static void free_acpi_perf_data(void)
+{
+	unsigned int i;
+
+	/* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
+	for_each_possible_cpu(i)
+		free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
+				 ->shared_cpu_map);
+	free_percpu(acpi_perf_data);
+}
+
+/*
+ * acpi_cpufreq_early_init - initialize ACPI P-States library
+ *
+ * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
+ * in order to determine correct frequency and voltage pairings. We can
+ * do _PDC and _PSD and find out the processor dependency for the
+ * actual init that will happen later...
+ */
+static int __init acpi_cpufreq_early_init(void)
+{
+	unsigned int i;
+	pr_debug("acpi_cpufreq_early_init\n");
+
+	acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
+	if (!acpi_perf_data) {
+		pr_debug("Memory allocation error for acpi_perf_data.\n");
+		return -ENOMEM;
+	}
+	for_each_possible_cpu(i) {
+		if (!zalloc_cpumask_var_node(
+			&per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
+			GFP_KERNEL, cpu_to_node(i))) {
+
+			/* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
+			free_acpi_perf_data();
+			return -ENOMEM;
+		}
+	}
+
+	/* Do initialization in ACPI core */
+	acpi_processor_preregister_performance(acpi_perf_data);
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int sw_any_bug_found(const struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+static const struct dmi_system_id sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
+static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
+{
+	/* Intel Xeon Processor 7100 Series Specification Update
+	 * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
+	 * AL30: A Machine Check Exception (MCE) Occurring during an
+	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
+	 * Both Processor Cores to Lock Up. */
+	if (c->x86_vendor == X86_VENDOR_INTEL) {
+		if ((c->x86 == 15) &&
+		    (c->x86_model == 6) &&
+		    (c->x86_mask == 8)) {
+			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
+			    "Xeon(R) 7100 Errata AL30, processors may "
+			    "lock up on frequency changes: disabling "
+			    "acpi-cpufreq.\n");
+			return -ENODEV;
+		    }
+		}
+	return 0;
+}
+#endif
+
+static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	unsigned int valid_states = 0;
+	unsigned int cpu = policy->cpu;
+	struct acpi_cpufreq_data *data;
+	unsigned int result = 0;
+	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
+	struct acpi_processor_performance *perf;
+#ifdef CONFIG_SMP
+	static int blacklisted;
+#endif
+
+	pr_debug("acpi_cpufreq_cpu_init\n");
+
+#ifdef CONFIG_SMP
+	if (blacklisted)
+		return blacklisted;
+	blacklisted = acpi_cpufreq_blacklist(c);
+	if (blacklisted)
+		return blacklisted;
+#endif
+
+	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
+	per_cpu(acfreq_data, cpu) = data;
+
+	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
+		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	result = acpi_processor_register_performance(data->acpi_data, cpu);
+	if (result)
+		goto err_free;
+
+	perf = data->acpi_data;
+	policy->shared_type = perf->shared_type;
+
+	/*
+	 * Will let policy->cpus know about dependency only when software
+	 * coordination is required.
+	 */
+	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+		cpumask_copy(policy->cpus, perf->shared_cpu_map);
+	}
+	cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
+	}
+#endif
+
+	/* capability check */
+	if (perf->state_count <= 1) {
+		pr_debug("No P-States\n");
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	if (perf->control_register.space_id != perf->status_register.space_id) {
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	switch (perf->control_register.space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		pr_debug("SYSTEM IO addr space\n");
+		data->cpu_feature = SYSTEM_IO_CAPABLE;
+		break;
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		pr_debug("HARDWARE addr space\n");
+		if (!check_est_cpu(cpu)) {
+			result = -ENODEV;
+			goto err_unreg;
+		}
+		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+		break;
+	default:
+		pr_debug("Unknown addr space %d\n",
+			(u32) (perf->control_register.space_id));
+		result = -ENODEV;
+		goto err_unreg;
+	}
+
+	data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
+		    (perf->state_count+1), GFP_KERNEL);
+	if (!data->freq_table) {
+		result = -ENOMEM;
+		goto err_unreg;
+	}
+
+	/* detect transition latency */
+	policy->cpuinfo.transition_latency = 0;
+	for (i = 0; i < perf->state_count; i++) {
+		if ((perf->states[i].transition_latency * 1000) >
+		    policy->cpuinfo.transition_latency)
+			policy->cpuinfo.transition_latency =
+			    perf->states[i].transition_latency * 1000;
+	}
+
+	/* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
+	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
+	    policy->cpuinfo.transition_latency > 20 * 1000) {
+		policy->cpuinfo.transition_latency = 20 * 1000;
+		printk_once(KERN_INFO
+			    "P-state transition latency capped at 20 uS\n");
+	}
+
+	/* table init */
+	for (i = 0; i < perf->state_count; i++) {
+		if (i > 0 && perf->states[i].core_frequency >=
+		    data->freq_table[valid_states-1].frequency / 1000)
+			continue;
+
+		data->freq_table[valid_states].index = i;
+		data->freq_table[valid_states].frequency =
+		    perf->states[i].core_frequency * 1000;
+		valid_states++;
+	}
+	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+	perf->state = 0;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+	if (result)
+		goto err_freqfree;
+
+	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
+		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+
+	switch (perf->control_register.space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+		/* Current speed is unknown and not detectable by IO port */
+		policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
+		break;
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
+		policy->cur = get_cur_freq_on_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	/* Check for APERF/MPERF support in hardware */
+	if (cpu_has(c, X86_FEATURE_APERFMPERF))
+		acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
+
+	pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
+	for (i = 0; i < perf->state_count; i++)
+		pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
+			(i == perf->state ? '*' : ' '), i,
+			(u32) perf->states[i].core_frequency,
+			(u32) perf->states[i].power,
+			(u32) perf->states[i].transition_latency);
+
+	cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
+
+	/*
+	 * the first call to ->target() should result in us actually
+	 * writing something to the appropriate registers.
+	 */
+	data->resume = 1;
+
+	return result;
+
+err_freqfree:
+	kfree(data->freq_table);
+err_unreg:
+	acpi_processor_unregister_performance(perf, cpu);
+err_free:
+	kfree(data);
+	per_cpu(acfreq_data, cpu) = NULL;
+
+	return result;
+}
+
+static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+
+	pr_debug("acpi_cpufreq_cpu_exit\n");
+
+	if (data) {
+		cpufreq_frequency_table_put_attr(policy->cpu);
+		per_cpu(acfreq_data, policy->cpu) = NULL;
+		acpi_processor_unregister_performance(data->acpi_data,
+						      policy->cpu);
+		kfree(data->freq_table);
+		kfree(data);
+	}
+
+	return 0;
+}
+
+static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
+{
+	struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
+
+	pr_debug("acpi_cpufreq_resume\n");
+
+	data->resume = 1;
+
+	return 0;
+}
+
+static struct freq_attr *acpi_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver acpi_cpufreq_driver = {
+	.verify		= acpi_cpufreq_verify,
+	.target		= acpi_cpufreq_target,
+	.bios_limit	= acpi_processor_get_bios_limit,
+	.init		= acpi_cpufreq_cpu_init,
+	.exit		= acpi_cpufreq_cpu_exit,
+	.resume		= acpi_cpufreq_resume,
+	.name		= "acpi-cpufreq",
+	.owner		= THIS_MODULE,
+	.attr		= acpi_cpufreq_attr,
+};
+
+static int __init acpi_cpufreq_init(void)
+{
+	int ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	pr_debug("acpi_cpufreq_init\n");
+
+	ret = acpi_cpufreq_early_init();
+	if (ret)
+		return ret;
+
+	ret = cpufreq_register_driver(&acpi_cpufreq_driver);
+	if (ret)
+		free_acpi_perf_data();
+
+	return ret;
+}
+
+static void __exit acpi_cpufreq_exit(void)
+{
+	pr_debug("acpi_cpufreq_exit\n");
+
+	cpufreq_unregister_driver(&acpi_cpufreq_driver);
+
+	free_percpu(acpi_perf_data);
+}
+
+module_param(acpi_pstate_strict, uint, 0644);
+MODULE_PARM_DESC(acpi_pstate_strict,
+	"value 0 or non-zero. non-zero -> strict ACPI checks are "
+	"performed during frequency changes.");
+
+late_initcall(acpi_cpufreq_init);
+module_exit(acpi_cpufreq_exit);
+
+MODULE_ALIAS("acpi");
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
new file mode 100644
index 000000000000..7bac808804f3
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -0,0 +1,444 @@
+/*
+ * (C) 2004-2006  Sebastian Witt <se.witt@gmx.net>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon reverse engineered information
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#define NFORCE2_XTAL 25
+#define NFORCE2_BOOTFSB 0x48
+#define NFORCE2_PLLENABLE 0xa8
+#define NFORCE2_PLLREG 0xa4
+#define NFORCE2_PLLADR 0xa0
+#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div)
+
+#define NFORCE2_MIN_FSB 50
+#define NFORCE2_SAFE_DISTANCE 50
+
+/* Delay in ms between FSB changes */
+/* #define NFORCE2_DELAY 10 */
+
+/*
+ * nforce2_chipset:
+ * FSB is changed using the chipset
+ */
+static struct pci_dev *nforce2_dev;
+
+/* fid:
+ * multiplier * 10
+ */
+static int fid;
+
+/* min_fsb, max_fsb:
+ * minimum and maximum FSB (= FSB at boot time)
+ */
+static int min_fsb;
+static int max_fsb;
+
+MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>");
+MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver");
+MODULE_LICENSE("GPL");
+
+module_param(fid, int, 0444);
+module_param(min_fsb, int, 0444);
+
+MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
+MODULE_PARM_DESC(min_fsb,
+		"Minimum FSB to use, if not defined: current FSB - 50");
+
+#define PFX "cpufreq-nforce2: "
+
+/**
+ * nforce2_calc_fsb - calculate FSB
+ * @pll: PLL value
+ *
+ *   Calculates FSB from PLL value
+ */
+static int nforce2_calc_fsb(int pll)
+{
+	unsigned char mul, div;
+
+	mul = (pll >> 8) & 0xff;
+	div = pll & 0xff;
+
+	if (div > 0)
+		return NFORCE2_XTAL * mul / div;
+
+	return 0;
+}
+
+/**
+ * nforce2_calc_pll - calculate PLL value
+ * @fsb: FSB
+ *
+ *   Calculate PLL value for given FSB
+ */
+static int nforce2_calc_pll(unsigned int fsb)
+{
+	unsigned char xmul, xdiv;
+	unsigned char mul = 0, div = 0;
+	int tried = 0;
+
+	/* Try to calculate multiplier and divider up to 4 times */
+	while (((mul == 0) || (div == 0)) && (tried <= 3)) {
+		for (xdiv = 2; xdiv <= 0x80; xdiv++)
+			for (xmul = 1; xmul <= 0xfe; xmul++)
+				if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
+				    fsb + tried) {
+					mul = xmul;
+					div = xdiv;
+				}
+		tried++;
+	}
+
+	if ((mul == 0) || (div == 0))
+		return -1;
+
+	return NFORCE2_PLL(mul, div);
+}
+
+/**
+ * nforce2_write_pll - write PLL value to chipset
+ * @pll: PLL value
+ *
+ *   Writes new FSB PLL value to chipset
+ */
+static void nforce2_write_pll(int pll)
+{
+	int temp;
+
+	/* Set the pll addr. to 0x00 */
+	pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
+
+	/* Now write the value in all 64 registers */
+	for (temp = 0; temp <= 0x3f; temp++)
+		pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
+
+	return;
+}
+
+/**
+ * nforce2_fsb_read - Read FSB
+ *
+ *   Read FSB from chipset
+ *   If bootfsb != 0, return FSB at boot-time
+ */
+static unsigned int nforce2_fsb_read(int bootfsb)
+{
+	struct pci_dev *nforce2_sub5;
+	u32 fsb, temp = 0;
+
+	/* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
+	nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
+				PCI_ANY_ID, PCI_ANY_ID, NULL);
+	if (!nforce2_sub5)
+		return 0;
+
+	pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb);
+	fsb /= 1000000;
+
+	/* Check if PLL register is already set */
+	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+
+	if (bootfsb || !temp)
+		return fsb;
+
+	/* Use PLL register FSB value */
+	pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
+	fsb = nforce2_calc_fsb(temp);
+
+	return fsb;
+}
+
+/**
+ * nforce2_set_fsb - set new FSB
+ * @fsb: New FSB
+ *
+ *   Sets new FSB
+ */
+static int nforce2_set_fsb(unsigned int fsb)
+{
+	u32 temp = 0;
+	unsigned int tfsb;
+	int diff;
+	int pll = 0;
+
+	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
+		printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
+		return -EINVAL;
+	}
+
+	tfsb = nforce2_fsb_read(0);
+	if (!tfsb) {
+		printk(KERN_ERR PFX "Error while reading the FSB\n");
+		return -EINVAL;
+	}
+
+	/* First write? Then set actual value */
+	pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
+	if (!temp) {
+		pll = nforce2_calc_pll(tfsb);
+
+		if (pll < 0)
+			return -EINVAL;
+
+		nforce2_write_pll(pll);
+	}
+
+	/* Enable write access */
+	temp = 0x01;
+	pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
+
+	diff = tfsb - fsb;
+
+	if (!diff)
+		return 0;
+
+	while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) {
+		if (diff < 0)
+			tfsb++;
+		else
+			tfsb--;
+
+		/* Calculate the PLL reg. value */
+		pll = nforce2_calc_pll(tfsb);
+		if (pll == -1)
+			return -EINVAL;
+
+		nforce2_write_pll(pll);
+#ifdef NFORCE2_DELAY
+		mdelay(NFORCE2_DELAY);
+#endif
+	}
+
+	temp = 0x40;
+	pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
+
+	return 0;
+}
+
+/**
+ * nforce2_get - get the CPU frequency
+ * @cpu: CPU number
+ *
+ * Returns the CPU frequency
+ */
+static unsigned int nforce2_get(unsigned int cpu)
+{
+	if (cpu)
+		return 0;
+	return nforce2_fsb_read(0) * fid * 100;
+}
+
+/**
+ * nforce2_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int nforce2_target(struct cpufreq_policy *policy,
+			  unsigned int target_freq, unsigned int relation)
+{
+/*        unsigned long         flags; */
+	struct cpufreq_freqs freqs;
+	unsigned int target_fsb;
+
+	if ((target_freq > policy->max) || (target_freq < policy->min))
+		return -EINVAL;
+
+	target_fsb = target_freq / (fid * 100);
+
+	freqs.old = nforce2_get(policy->cpu);
+	freqs.new = target_fsb * fid * 100;
+	freqs.cpu = 0;		/* Only one CPU on nForce2 platforms */
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
+	       freqs.old, freqs.new);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Disable IRQs */
+	/* local_irq_save(flags); */
+
+	if (nforce2_set_fsb(target_fsb) < 0)
+		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
+			target_fsb);
+	else
+		pr_debug("Changed FSB successfully to %d\n",
+			target_fsb);
+
+	/* Enable IRQs */
+	/* local_irq_restore(flags); */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
+}
+
+/**
+ * nforce2_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ */
+static int nforce2_verify(struct cpufreq_policy *policy)
+{
+	unsigned int fsb_pol_max;
+
+	fsb_pol_max = policy->max / (fid * 100);
+
+	if (policy->min < (fsb_pol_max * fid * 100))
+		policy->max = (fsb_pol_max + 1) * fid * 100;
+
+	cpufreq_verify_within_limits(policy,
+				     policy->cpuinfo.min_freq,
+				     policy->cpuinfo.max_freq);
+	return 0;
+}
+
+static int nforce2_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int fsb;
+	unsigned int rfid;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* Get current FSB */
+	fsb = nforce2_fsb_read(0);
+
+	if (!fsb)
+		return -EIO;
+
+	/* FIX: Get FID from CPU */
+	if (!fid) {
+		if (!cpu_khz) {
+			printk(KERN_WARNING PFX
+			"cpu_khz not set, can't calculate multiplier!\n");
+			return -ENODEV;
+		}
+
+		fid = cpu_khz / (fsb * 100);
+		rfid = fid % 5;
+
+		if (rfid) {
+			if (rfid > 2)
+				fid += 5 - rfid;
+			else
+				fid -= rfid;
+		}
+	}
+
+	printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
+	       fid / 10, fid % 10);
+
+	/* Set maximum FSB to FSB at boot time */
+	max_fsb = nforce2_fsb_read(1);
+
+	if (!max_fsb)
+		return -EIO;
+
+	if (!min_fsb)
+		min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE;
+
+	if (min_fsb < NFORCE2_MIN_FSB)
+		min_fsb = NFORCE2_MIN_FSB;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.min_freq = min_fsb * fid * 100;
+	policy->cpuinfo.max_freq = max_fsb * fid * 100;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = nforce2_get(policy->cpu);
+	policy->min = policy->cpuinfo.min_freq;
+	policy->max = policy->cpuinfo.max_freq;
+
+	return 0;
+}
+
+static int nforce2_cpu_exit(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+
+static struct cpufreq_driver nforce2_driver = {
+	.name = "nforce2",
+	.verify = nforce2_verify,
+	.target = nforce2_target,
+	.get = nforce2_get,
+	.init = nforce2_cpu_init,
+	.exit = nforce2_cpu_exit,
+	.owner = THIS_MODULE,
+};
+
+/**
+ * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic
+ *
+ * Detects nForce2 A2 and C1 stepping
+ *
+ */
+static int nforce2_detect_chipset(void)
+{
+	nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
+					PCI_DEVICE_ID_NVIDIA_NFORCE2,
+					PCI_ANY_ID, PCI_ANY_ID, NULL);
+
+	if (nforce2_dev == NULL)
+		return -ENODEV;
+
+	printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
+	       nforce2_dev->revision);
+	printk(KERN_INFO PFX
+	       "FSB changing is maybe unstable and can lead to "
+	       "crashes and data loss.\n");
+
+	return 0;
+}
+
+/**
+ * nforce2_init - initializes the nForce2 CPUFreq driver
+ *
+ * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init nforce2_init(void)
+{
+	/* TODO: do we need to detect the processor? */
+
+	/* detect chipset */
+	if (nforce2_detect_chipset()) {
+		printk(KERN_INFO PFX "No nForce2 chipset.\n");
+		return -ENODEV;
+	}
+
+	return cpufreq_register_driver(&nforce2_driver);
+}
+
+/**
+ * nforce2_exit - unregisters cpufreq module
+ *
+ *   Unregisters nForce2 FSB change support.
+ */
+static void __exit nforce2_exit(void)
+{
+	cpufreq_unregister_driver(&nforce2_driver);
+}
+
+module_init(nforce2_init);
+module_exit(nforce2_exit);
+
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
new file mode 100644
index 000000000000..35a257dd4bb7
--- /dev/null
+++ b/drivers/cpufreq/e_powersaver.c
@@ -0,0 +1,367 @@
+/*
+ *  Based on documentation provided by Dave Jones. Thanks!
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+
+#include <asm/msr.h>
+#include <asm/tsc.h>
+
+#define EPS_BRAND_C7M	0
+#define EPS_BRAND_C7	1
+#define EPS_BRAND_EDEN	2
+#define EPS_BRAND_C3	3
+#define EPS_BRAND_C7D	4
+
+struct eps_cpu_data {
+	u32 fsb;
+	struct cpufreq_frequency_table freq_table[];
+};
+
+static struct eps_cpu_data *eps_cpu[NR_CPUS];
+
+
+static unsigned int eps_get(unsigned int cpu)
+{
+	struct eps_cpu_data *centaur;
+	u32 lo, hi;
+
+	if (cpu)
+		return 0;
+	centaur = eps_cpu[cpu];
+	if (centaur == NULL)
+		return 0;
+
+	/* Return current frequency */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	return centaur->fsb * ((lo >> 8) & 0xff);
+}
+
+static int eps_set_state(struct eps_cpu_data *centaur,
+			 unsigned int cpu,
+			 u32 dest_state)
+{
+	struct cpufreq_freqs freqs;
+	u32 lo, hi;
+	int err = 0;
+	int i;
+
+	freqs.old = eps_get(cpu);
+	freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
+	freqs.cpu = cpu;
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Wait while CPU is busy */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	i = 0;
+	while (lo & ((1 << 16) | (1 << 17))) {
+		udelay(16);
+		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+		i++;
+		if (unlikely(i > 64)) {
+			err = -ENODEV;
+			goto postchange;
+		}
+	}
+	/* Set new multiplier and voltage */
+	wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
+	/* Wait until transition end */
+	i = 0;
+	do {
+		udelay(16);
+		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+		i++;
+		if (unlikely(i > 64)) {
+			err = -ENODEV;
+			goto postchange;
+		}
+	} while (lo & ((1 << 16) | (1 << 17)));
+
+	/* Return current frequency */
+postchange:
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
+
+#ifdef DEBUG
+	{
+	u8 current_multiplier, current_voltage;
+
+	/* Print voltage and multiplier */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	current_voltage = lo & 0xff;
+	printk(KERN_INFO "eps: Current voltage = %dmV\n",
+		current_voltage * 16 + 700);
+	current_multiplier = (lo >> 8) & 0xff;
+	printk(KERN_INFO "eps: Current multiplier = %d\n",
+		current_multiplier);
+	}
+#endif
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	return err;
+}
+
+static int eps_target(struct cpufreq_policy *policy,
+			       unsigned int target_freq,
+			       unsigned int relation)
+{
+	struct eps_cpu_data *centaur;
+	unsigned int newstate = 0;
+	unsigned int cpu = policy->cpu;
+	unsigned int dest_state;
+	int ret;
+
+	if (unlikely(eps_cpu[cpu] == NULL))
+		return -ENODEV;
+	centaur = eps_cpu[cpu];
+
+	if (unlikely(cpufreq_frequency_table_target(policy,
+			&eps_cpu[cpu]->freq_table[0],
+			target_freq,
+			relation,
+			&newstate))) {
+		return -EINVAL;
+	}
+
+	/* Make frequency transition */
+	dest_state = centaur->freq_table[newstate].index & 0xffff;
+	ret = eps_set_state(centaur, cpu, dest_state);
+	if (ret)
+		printk(KERN_ERR "eps: Timeout!\n");
+	return ret;
+}
+
+static int eps_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy,
+			&eps_cpu[policy->cpu]->freq_table[0]);
+}
+
+static int eps_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	u32 lo, hi;
+	u64 val;
+	u8 current_multiplier, current_voltage;
+	u8 max_multiplier, max_voltage;
+	u8 min_multiplier, min_voltage;
+	u8 brand = 0;
+	u32 fsb;
+	struct eps_cpu_data *centaur;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	struct cpufreq_frequency_table *f_table;
+	int k, step, voltage;
+	int ret;
+	int states;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* Check brand */
+	printk(KERN_INFO "eps: Detected VIA ");
+
+	switch (c->x86_model) {
+	case 10:
+		rdmsr(0x1153, lo, hi);
+		brand = (((lo >> 2) ^ lo) >> 18) & 3;
+		printk(KERN_CONT "Model A ");
+		break;
+	case 13:
+		rdmsr(0x1154, lo, hi);
+		brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
+		printk(KERN_CONT "Model D ");
+		break;
+	}
+
+	switch (brand) {
+	case EPS_BRAND_C7M:
+		printk(KERN_CONT "C7-M\n");
+		break;
+	case EPS_BRAND_C7:
+		printk(KERN_CONT "C7\n");
+		break;
+	case EPS_BRAND_EDEN:
+		printk(KERN_CONT "Eden\n");
+		break;
+	case EPS_BRAND_C7D:
+		printk(KERN_CONT "C7-D\n");
+		break;
+	case EPS_BRAND_C3:
+		printk(KERN_CONT "C3\n");
+		return -ENODEV;
+		break;
+	}
+	/* Enable Enhanced PowerSaver */
+	rdmsrl(MSR_IA32_MISC_ENABLE, val);
+	if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+		val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
+		wrmsrl(MSR_IA32_MISC_ENABLE, val);
+		/* Can be locked at 0 */
+		rdmsrl(MSR_IA32_MISC_ENABLE, val);
+		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
+			return -ENODEV;
+		}
+	}
+
+	/* Print voltage and multiplier */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	current_voltage = lo & 0xff;
+	printk(KERN_INFO "eps: Current voltage = %dmV\n",
+			current_voltage * 16 + 700);
+	current_multiplier = (lo >> 8) & 0xff;
+	printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
+
+	/* Print limits */
+	max_voltage = hi & 0xff;
+	printk(KERN_INFO "eps: Highest voltage = %dmV\n",
+			max_voltage * 16 + 700);
+	max_multiplier = (hi >> 8) & 0xff;
+	printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
+	min_voltage = (hi >> 16) & 0xff;
+	printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
+			min_voltage * 16 + 700);
+	min_multiplier = (hi >> 24) & 0xff;
+	printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
+
+	/* Sanity checks */
+	if (current_multiplier == 0 || max_multiplier == 0
+	    || min_multiplier == 0)
+		return -EINVAL;
+	if (current_multiplier > max_multiplier
+	    || max_multiplier <= min_multiplier)
+		return -EINVAL;
+	if (current_voltage > 0x1f || max_voltage > 0x1f)
+		return -EINVAL;
+	if (max_voltage < min_voltage)
+		return -EINVAL;
+
+	/* Calc FSB speed */
+	fsb = cpu_khz / current_multiplier;
+	/* Calc number of p-states supported */
+	if (brand == EPS_BRAND_C7M)
+		states = max_multiplier - min_multiplier + 1;
+	else
+		states = 2;
+
+	/* Allocate private data and frequency table for current cpu */
+	centaur = kzalloc(sizeof(struct eps_cpu_data)
+		    + (states + 1) * sizeof(struct cpufreq_frequency_table),
+		    GFP_KERNEL);
+	if (!centaur)
+		return -ENOMEM;
+	eps_cpu[0] = centaur;
+
+	/* Copy basic values */
+	centaur->fsb = fsb;
+
+	/* Fill frequency and MSR value table */
+	f_table = &centaur->freq_table[0];
+	if (brand != EPS_BRAND_C7M) {
+		f_table[0].frequency = fsb * min_multiplier;
+		f_table[0].index = (min_multiplier << 8) | min_voltage;
+		f_table[1].frequency = fsb * max_multiplier;
+		f_table[1].index = (max_multiplier << 8) | max_voltage;
+		f_table[2].frequency = CPUFREQ_TABLE_END;
+	} else {
+		k = 0;
+		step = ((max_voltage - min_voltage) * 256)
+			/ (max_multiplier - min_multiplier);
+		for (i = min_multiplier; i <= max_multiplier; i++) {
+			voltage = (k * step) / 256 + min_voltage;
+			f_table[k].frequency = fsb * i;
+			f_table[k].index = (i << 8) | voltage;
+			k++;
+		}
+		f_table[k].frequency = CPUFREQ_TABLE_END;
+	}
+
+	policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
+	policy->cur = fsb * current_multiplier;
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
+	if (ret) {
+		kfree(centaur);
+		return ret;
+	}
+
+	cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
+	return 0;
+}
+
+static int eps_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct eps_cpu_data *centaur;
+	u32 lo, hi;
+
+	if (eps_cpu[cpu] == NULL)
+		return -ENODEV;
+	centaur = eps_cpu[cpu];
+
+	/* Get max frequency */
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	/* Set max frequency */
+	eps_set_state(centaur, cpu, hi & 0xffff);
+	/* Bye */
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	kfree(eps_cpu[cpu]);
+	eps_cpu[cpu] = NULL;
+	return 0;
+}
+
+static struct freq_attr *eps_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver eps_driver = {
+	.verify		= eps_verify,
+	.target		= eps_target,
+	.init		= eps_cpu_init,
+	.exit		= eps_cpu_exit,
+	.get		= eps_get,
+	.name		= "e_powersaver",
+	.owner		= THIS_MODULE,
+	.attr		= eps_attr,
+};
+
+static int __init eps_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	/* This driver will work only on Centaur C7 processors with
+	 * Enhanced SpeedStep/PowerSaver registers */
+	if (c->x86_vendor != X86_VENDOR_CENTAUR
+	    || c->x86 != 6 || c->x86_model < 10)
+		return -ENODEV;
+	if (!cpu_has(c, X86_FEATURE_EST))
+		return -ENODEV;
+
+	if (cpufreq_register_driver(&eps_driver))
+		return -EINVAL;
+	return 0;
+}
+
+static void __exit eps_exit(void)
+{
+	cpufreq_unregister_driver(&eps_driver);
+}
+
+MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
+MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
+MODULE_LICENSE("GPL");
+
+module_init(eps_init);
+module_exit(eps_exit);
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
new file mode 100644
index 000000000000..c587db472a75
--- /dev/null
+++ b/drivers/cpufreq/elanfreq.c
@@ -0,0 +1,309 @@
+/*
+ *	elanfreq:	cpufreq driver for the AMD ELAN family
+ *
+ *	(c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de>
+ *
+ *	Parts of this code are (c) Sven Geggus <sven@geggus.net>
+ *
+ *      All Rights Reserved.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#define REG_CSCIR 0x22		/* Chip Setup and Control Index Register    */
+#define REG_CSCDR 0x23		/* Chip Setup and Control Data  Register    */
+
+/* Module parameter */
+static int max_freq;
+
+struct s_elan_multiplier {
+	int clock;		/* frequency in kHz                         */
+	int val40h;		/* PMU Force Mode register                  */
+	int val80h;		/* CPU Clock Speed Register                 */
+};
+
+/*
+ * It is important that the frequencies
+ * are listed in ascending order here!
+ */
+static struct s_elan_multiplier elan_multiplier[] = {
+	{1000,	0x02,	0x18},
+	{2000,	0x02,	0x10},
+	{4000,	0x02,	0x08},
+	{8000,	0x00,	0x00},
+	{16000,	0x00,	0x02},
+	{33000,	0x00,	0x04},
+	{66000,	0x01,	0x04},
+	{99000,	0x01,	0x05}
+};
+
+static struct cpufreq_frequency_table elanfreq_table[] = {
+	{0,	1000},
+	{1,	2000},
+	{2,	4000},
+	{3,	8000},
+	{4,	16000},
+	{5,	33000},
+	{6,	66000},
+	{7,	99000},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+
+/**
+ *	elanfreq_get_cpu_frequency: determine current cpu speed
+ *
+ *	Finds out at which frequency the CPU of the Elan SOC runs
+ *	at the moment. Frequencies from 1 to 33 MHz are generated
+ *	the normal way, 66 and 99 MHz are called "Hyperspeed Mode"
+ *	and have the rest of the chip running with 33 MHz.
+ */
+
+static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu)
+{
+	u8 clockspeed_reg;    /* Clock Speed Register */
+
+	local_irq_disable();
+	outb_p(0x80, REG_CSCIR);
+	clockspeed_reg = inb_p(REG_CSCDR);
+	local_irq_enable();
+
+	if ((clockspeed_reg & 0xE0) == 0xE0)
+		return 0;
+
+	/* Are we in CPU clock multiplied mode (66/99 MHz)? */
+	if ((clockspeed_reg & 0xE0) == 0xC0) {
+		if ((clockspeed_reg & 0x01) == 0)
+			return 66000;
+		else
+			return 99000;
+	}
+
+	/* 33 MHz is not 32 MHz... */
+	if ((clockspeed_reg & 0xE0) == 0xA0)
+		return 33000;
+
+	return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000;
+}
+
+
+/**
+ *	elanfreq_set_cpu_frequency: Change the CPU core frequency
+ *	@cpu: cpu number
+ *	@freq: frequency in kHz
+ *
+ *	This function takes a frequency value and changes the CPU frequency
+ *	according to this. Note that the frequency has to be checked by
+ *	elanfreq_validatespeed() for correctness!
+ *
+ *	There is no return value.
+ */
+
+static void elanfreq_set_cpu_state(unsigned int state)
+{
+	struct cpufreq_freqs    freqs;
+
+	freqs.old = elanfreq_get_cpu_frequency(0);
+	freqs.new = elan_multiplier[state].clock;
+	freqs.cpu = 0; /* elanfreq.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",
+			elan_multiplier[state].clock);
+
+
+	/*
+	 * Access to the Elan's internal registers is indexed via
+	 * 0x22: Chip Setup & Control Register Index Register (CSCI)
+	 * 0x23: Chip Setup & Control Register Data  Register (CSCD)
+	 *
+	 */
+
+	/*
+	 * 0x40 is the Power Management Unit's Force Mode Register.
+	 * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency)
+	 */
+
+	local_irq_disable();
+	outb_p(0x40, REG_CSCIR);		/* Disable hyperspeed mode */
+	outb_p(0x00, REG_CSCDR);
+	local_irq_enable();		/* wait till internal pipelines and */
+	udelay(1000);			/* buffers have cleaned up          */
+
+	local_irq_disable();
+
+	/* now, set the CPU clock speed register (0x80) */
+	outb_p(0x80, REG_CSCIR);
+	outb_p(elan_multiplier[state].val80h, REG_CSCDR);
+
+	/* now, the hyperspeed bit in PMU Force Mode Register (0x40) */
+	outb_p(0x40, REG_CSCIR);
+	outb_p(elan_multiplier[state].val40h, REG_CSCDR);
+	udelay(10000);
+	local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+
+/**
+ *	elanfreq_validatespeed: test if frequency range is valid
+ *	@policy: the policy to validate
+ *
+ *	This function checks if a given frequency range in kHz is valid
+ *	for the hardware supported by the driver.
+ */
+
+static int elanfreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]);
+}
+
+static int elanfreq_target(struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
+				target_freq, relation, &newstate))
+		return -EINVAL;
+
+	elanfreq_set_cpu_state(newstate);
+
+	return 0;
+}
+
+
+/*
+ *	Module init and exit code
+ */
+
+static int elanfreq_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	unsigned int i;
+	int result;
+
+	/* capability check */
+	if ((c->x86_vendor != X86_VENDOR_AMD) ||
+	    (c->x86 != 4) || (c->x86_model != 10))
+		return -ENODEV;
+
+	/* max freq */
+	if (!max_freq)
+		max_freq = elanfreq_get_cpu_frequency(0);
+
+	/* table init */
+	for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if (elanfreq_table[i].frequency > max_freq)
+			elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+	}
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = elanfreq_get_cpu_frequency(0);
+
+	result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table);
+	if (result)
+		return result;
+
+	cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu);
+	return 0;
+}
+
+
+static int elanfreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+
+#ifndef MODULE
+/**
+ * elanfreq_setup - elanfreq command line parameter parsing
+ *
+ * elanfreq command line parameter.  Use:
+ *  elanfreq=66000
+ * to set the maximum CPU frequency to 66 MHz. Note that in
+ * case you do not give this boot parameter, the maximum
+ * frequency will fall back to _current_ CPU frequency which
+ * might be lower. If you build this as a module, use the
+ * max_freq module parameter instead.
+ */
+static int __init elanfreq_setup(char *str)
+{
+	max_freq = simple_strtoul(str, &str, 0);
+	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+	return 1;
+}
+__setup("elanfreq=", elanfreq_setup);
+#endif
+
+
+static struct freq_attr *elanfreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver elanfreq_driver = {
+	.get		= elanfreq_get_cpu_frequency,
+	.verify		= elanfreq_verify,
+	.target		= elanfreq_target,
+	.init		= elanfreq_cpu_init,
+	.exit		= elanfreq_cpu_exit,
+	.name		= "elanfreq",
+	.owner		= THIS_MODULE,
+	.attr		= elanfreq_attr,
+};
+
+
+static int __init elanfreq_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	/* Test if we have the right hardware */
+	if ((c->x86_vendor != X86_VENDOR_AMD) ||
+		(c->x86 != 4) || (c->x86_model != 10)) {
+		printk(KERN_INFO "elanfreq: error: no Elan processor found!\n");
+		return -ENODEV;
+	}
+	return cpufreq_register_driver(&elanfreq_driver);
+}
+
+
+static void __exit elanfreq_exit(void)
+{
+	cpufreq_unregister_driver(&elanfreq_driver);
+}
+
+
+module_param(max_freq, int, 0444);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
+		"Sven Geggus <sven@geggus.net>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
+
+module_init(elanfreq_init);
+module_exit(elanfreq_exit);
diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c
new file mode 100644
index 000000000000..ffe1f2c92ed3
--- /dev/null
+++ b/drivers/cpufreq/gx-suspmod.c
@@ -0,0 +1,514 @@
+/*
+ *	Cyrix MediaGX and NatSemi Geode Suspend Modulation
+ *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ *	(C) 2002 Hiroshi Miura   <miura@da-cha.org>
+ *	All Rights Reserved
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      version 2 as published by the Free Software Foundation
+ *
+ *      The author(s) of this software shall not be held liable for damages
+ *      of any nature resulting due to the use of this software. This
+ *      software is provided AS-IS with no warranties.
+ *
+ * Theoretical note:
+ *
+ *	(see Geode(tm) CS5530 manual (rev.4.1) page.56)
+ *
+ *	CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0
+ *	are based on Suspend Modulation.
+ *
+ *	Suspend Modulation works by asserting and de-asserting the SUSP# pin
+ *	to CPU(GX1/GXLV) for configurable durations. When asserting SUSP#
+ *	the CPU enters an idle state. GX1 stops its core clock when SUSP# is
+ *	asserted then power consumption is reduced.
+ *
+ *	Suspend Modulation's OFF/ON duration are configurable
+ *	with 'Suspend Modulation OFF Count Register'
+ *	and 'Suspend Modulation ON Count Register'.
+ *	These registers are 8bit counters that represent the number of
+ *	32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF)
+ *	to the processor.
+ *
+ *	These counters define a ratio which is the effective frequency
+ *	of operation of the system.
+ *
+ *			       OFF Count
+ *	F_eff = Fgx * ----------------------
+ *	                OFF Count + ON Count
+ *
+ *	0 <= On Count, Off Count <= 255
+ *
+ *	From these limits, we can get register values
+ *
+ *	off_duration + on_duration <= MAX_DURATION
+ *	on_duration = off_duration * (stock_freq - freq) / freq
+ *
+ *      off_duration  =  (freq * DURATION) / stock_freq
+ *      on_duration = DURATION - off_duration
+ *
+ *
+ *---------------------------------------------------------------------------
+ *
+ * ChangeLog:
+ *	Dec. 12, 2003	Hiroshi Miura <miura@da-cha.org>
+ *		- fix on/off register mistake
+ *		- fix cpu_khz calc when it stops cpu modulation.
+ *
+ *	Dec. 11, 2002	Hiroshi Miura <miura@da-cha.org>
+ *		- rewrite for Cyrix MediaGX Cx5510/5520 and
+ *		  NatSemi Geode Cs5530(A).
+ *
+ *	Jul. ??, 2002  Zwane Mwaikambo <zwane@commfireservices.com>
+ *		- cs5530_mod patch for 2.4.19-rc1.
+ *
+ *---------------------------------------------------------------------------
+ *
+ * Todo
+ *	Test on machines with 5510, 5530, 5530A
+ */
+
+/************************************************************************
+ *			Suspend Modulation - Definitions		*
+ ************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+#include <asm/processor-cyrix.h>
+
+/* PCI config registers, all at F0 */
+#define PCI_PMER1	0x80	/* power management enable register 1 */
+#define PCI_PMER2	0x81	/* power management enable register 2 */
+#define PCI_PMER3	0x82	/* power management enable register 3 */
+#define PCI_IRQTC	0x8c	/* irq speedup timer counter register:typical 2 to 4ms */
+#define PCI_VIDTC	0x8d	/* video speedup timer counter register: typical 50 to 100ms */
+#define PCI_MODOFF	0x94	/* suspend modulation OFF counter register, 1 = 32us */
+#define PCI_MODON	0x95	/* suspend modulation ON counter register */
+#define PCI_SUSCFG	0x96	/* suspend configuration register */
+
+/* PMER1 bits */
+#define GPM		(1<<0)	/* global power management */
+#define GIT		(1<<1)	/* globally enable PM device idle timers */
+#define GTR		(1<<2)	/* globally enable IO traps */
+#define IRQ_SPDUP	(1<<3)	/* disable clock throttle during interrupt handling */
+#define VID_SPDUP	(1<<4)	/* disable clock throttle during vga video handling */
+
+/* SUSCFG bits */
+#define SUSMOD		(1<<0)	/* enable/disable suspend modulation */
+/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */
+#define SMISPDUP	(1<<1)	/* select how SMI re-enable suspend modulation: */
+				/* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */
+#define SUSCFG		(1<<2)	/* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */
+/* the below is supported only with cs5530A */
+#define PWRSVE_ISA	(1<<3)	/* stop ISA clock  */
+#define PWRSVE		(1<<4)	/* active idle */
+
+struct gxfreq_params {
+	u8 on_duration;
+	u8 off_duration;
+	u8 pci_suscfg;
+	u8 pci_pmer1;
+	u8 pci_pmer2;
+	struct pci_dev *cs55x0;
+};
+
+static struct gxfreq_params *gx_params;
+static int stock_freq;
+
+/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
+static int pci_busclk;
+module_param(pci_busclk, int, 0444);
+
+/* maximum duration for which the cpu may be suspended
+ * (32us * MAX_DURATION). If no parameter is given, this defaults
+ * to 255.
+ * Note that this leads to a maximum of 8 ms(!) where the CPU clock
+ * is suspended -- processing power is just 0.39% of what it used to be,
+ * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
+static int max_duration = 255;
+module_param(max_duration, int, 0444);
+
+/* For the default policy, we want at least some processing power
+ * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
+ */
+#define POLICY_MIN_DIV 20
+
+
+/**
+ * we can detect a core multipiler from dir0_lsb
+ * from GX1 datasheet p.56,
+ *	MULT[3:0]:
+ *	0000 = SYSCLK multiplied by 4 (test only)
+ *	0001 = SYSCLK multiplied by 10
+ *	0010 = SYSCLK multiplied by 4
+ *	0011 = SYSCLK multiplied by 6
+ *	0100 = SYSCLK multiplied by 9
+ *	0101 = SYSCLK multiplied by 5
+ *	0110 = SYSCLK multiplied by 7
+ *	0111 = SYSCLK multiplied by 8
+ *              of 33.3MHz
+ **/
+static int gx_freq_mult[16] = {
+		4, 10, 4, 6, 9, 5, 7, 8,
+		0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
+/****************************************************************
+ *	Low Level chipset interface				*
+ ****************************************************************/
+static struct pci_device_id gx_chipset_tbl[] __initdata = {
+	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), },
+	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), },
+	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },
+	{ 0, },
+};
+
+static void gx_write_byte(int reg, int value)
+{
+	pci_write_config_byte(gx_params->cs55x0, reg, value);
+}
+
+/**
+ * gx_detect_chipset:
+ *
+ **/
+static __init struct pci_dev *gx_detect_chipset(void)
+{
+	struct pci_dev *gx_pci = NULL;
+
+	/* check if CPU is a MediaGX or a Geode. */
+	if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
+	    (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
+		pr_debug("error: no MediaGX/Geode processor found!\n");
+		return NULL;
+	}
+
+	/* detect which companion chip is used */
+	for_each_pci_dev(gx_pci) {
+		if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
+			return gx_pci;
+	}
+
+	pr_debug("error: no supported chipset found!\n");
+	return NULL;
+}
+
+/**
+ * gx_get_cpuspeed:
+ *
+ * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
+ * Geode CPU runs.
+ */
+static unsigned int gx_get_cpuspeed(unsigned int cpu)
+{
+	if ((gx_params->pci_suscfg & SUSMOD) == 0)
+		return stock_freq;
+
+	return (stock_freq * gx_params->off_duration)
+		/ (gx_params->on_duration + gx_params->off_duration);
+}
+
+/**
+ *      gx_validate_speed:
+ *      determine current cpu speed
+ *
+ **/
+
+static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
+		u8 *off_duration)
+{
+	unsigned int i;
+	u8 tmp_on, tmp_off;
+	int old_tmp_freq = stock_freq;
+	int tmp_freq;
+
+	*off_duration = 1;
+	*on_duration = 0;
+
+	for (i = max_duration; i > 0; i--) {
+		tmp_off = ((khz * i) / stock_freq) & 0xff;
+		tmp_on = i - tmp_off;
+		tmp_freq = (stock_freq * tmp_off) / i;
+		/* if this relation is closer to khz, use this. If it's equal,
+		 * prefer it, too - lower latency */
+		if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) {
+			*on_duration = tmp_on;
+			*off_duration = tmp_off;
+			old_tmp_freq = tmp_freq;
+		}
+	}
+
+	return old_tmp_freq;
+}
+
+
+/**
+ * gx_set_cpuspeed:
+ * set cpu speed in khz.
+ **/
+
+static void gx_set_cpuspeed(unsigned int khz)
+{
+	u8 suscfg, pmer1;
+	unsigned int new_khz;
+	unsigned long flags;
+	struct cpufreq_freqs freqs;
+
+	freqs.cpu = 0;
+	freqs.old = gx_get_cpuspeed(0);
+
+	new_khz = gx_validate_speed(khz, &gx_params->on_duration,
+			&gx_params->off_duration);
+
+	freqs.new = new_khz;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	local_irq_save(flags);
+
+
+
+	if (new_khz != stock_freq) {
+		/* if new khz == 100% of CPU speed, it is special case */
+		switch (gx_params->cs55x0->device) {
+		case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
+			pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
+			/* FIXME: need to test other values -- Zwane,Miura */
+			/* typical 2 to 4ms */
+			gx_write_byte(PCI_IRQTC, 4);
+			/* typical 50 to 100ms */
+			gx_write_byte(PCI_VIDTC, 100);
+			gx_write_byte(PCI_PMER1, pmer1);
+
+			if (gx_params->cs55x0->revision < 0x10) {
+				/* CS5530(rev 1.2, 1.3) */
+				suscfg = gx_params->pci_suscfg|SUSMOD;
+			} else {
+				/* CS5530A,B.. */
+				suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
+			}
+			break;
+		case PCI_DEVICE_ID_CYRIX_5520:
+		case PCI_DEVICE_ID_CYRIX_5510:
+			suscfg = gx_params->pci_suscfg | SUSMOD;
+			break;
+		default:
+			local_irq_restore(flags);
+			pr_debug("fatal: try to set unknown chipset.\n");
+			return;
+		}
+	} else {
+		suscfg = gx_params->pci_suscfg & ~(SUSMOD);
+		gx_params->off_duration = 0;
+		gx_params->on_duration = 0;
+		pr_debug("suspend modulation disabled: cpu runs 100%% speed.\n");
+	}
+
+	gx_write_byte(PCI_MODOFF, gx_params->off_duration);
+	gx_write_byte(PCI_MODON, gx_params->on_duration);
+
+	gx_write_byte(PCI_SUSCFG, suscfg);
+	pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
+
+	local_irq_restore(flags);
+
+	gx_params->pci_suscfg = suscfg;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	pr_debug("suspend modulation w/ duration of ON:%d us, OFF:%d us\n",
+		gx_params->on_duration * 32, gx_params->off_duration * 32);
+	pr_debug("suspend modulation w/ clock speed: %d kHz.\n", freqs.new);
+}
+
+/****************************************************************
+ *             High level functions                             *
+ ****************************************************************/
+
+/*
+ *	cpufreq_gx_verify: test if frequency range is valid
+ *
+ *	This function checks if a given frequency range in kHz is valid
+ *      for the hardware supported by the driver.
+ */
+
+static int cpufreq_gx_verify(struct cpufreq_policy *policy)
+{
+	unsigned int tmp_freq = 0;
+	u8 tmp1, tmp2;
+
+	if (!stock_freq || !policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+			stock_freq);
+
+	/* it needs to be assured that at least one supported frequency is
+	 * within policy->min and policy->max. If it is not, policy->max
+	 * needs to be increased until one freuqency is supported.
+	 * policy->min may not be decreased, though. This way we guarantee a
+	 * specific processing capacity.
+	 */
+	tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2);
+	if (tmp_freq < policy->min)
+		tmp_freq += stock_freq / max_duration;
+	policy->min = tmp_freq;
+	if (policy->min > policy->max)
+		policy->max = tmp_freq;
+	tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2);
+	if (tmp_freq > policy->max)
+		tmp_freq -= stock_freq / max_duration;
+	policy->max = tmp_freq;
+	if (policy->max < policy->min)
+		policy->max = policy->min;
+	cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
+			stock_freq);
+
+	return 0;
+}
+
+/*
+ *      cpufreq_gx_target:
+ *
+ */
+static int cpufreq_gx_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	u8 tmp1, tmp2;
+	unsigned int tmp_freq;
+
+	if (!stock_freq || !policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+
+	tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2);
+	while (tmp_freq < policy->min) {
+		tmp_freq += stock_freq / max_duration;
+		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+	}
+	while (tmp_freq > policy->max) {
+		tmp_freq -= stock_freq / max_duration;
+		tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2);
+	}
+
+	gx_set_cpuspeed(tmp_freq);
+
+	return 0;
+}
+
+static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int maxfreq, curfreq;
+
+	if (!policy || policy->cpu != 0)
+		return -ENODEV;
+
+	/* determine maximum frequency */
+	if (pci_busclk)
+		maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+	else if (cpu_khz)
+		maxfreq = cpu_khz;
+	else
+		maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
+
+	stock_freq = maxfreq;
+	curfreq = gx_get_cpuspeed(0);
+
+	pr_debug("cpu max frequency is %d.\n", maxfreq);
+	pr_debug("cpu current frequency is %dkHz.\n", curfreq);
+
+	/* setup basic struct for cpufreq API */
+	policy->cpu = 0;
+
+	if (max_duration < POLICY_MIN_DIV)
+		policy->min = maxfreq / max_duration;
+	else
+		policy->min = maxfreq / POLICY_MIN_DIV;
+	policy->max = maxfreq;
+	policy->cur = curfreq;
+	policy->cpuinfo.min_freq = maxfreq / max_duration;
+	policy->cpuinfo.max_freq = maxfreq;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+
+	return 0;
+}
+
+/*
+ * cpufreq_gx_init:
+ *   MediaGX/Geode GX initialize cpufreq driver
+ */
+static struct cpufreq_driver gx_suspmod_driver = {
+	.get		= gx_get_cpuspeed,
+	.verify		= cpufreq_gx_verify,
+	.target		= cpufreq_gx_target,
+	.init		= cpufreq_gx_cpu_init,
+	.name		= "gx-suspmod",
+	.owner		= THIS_MODULE,
+};
+
+static int __init cpufreq_gx_init(void)
+{
+	int ret;
+	struct gxfreq_params *params;
+	struct pci_dev *gx_pci;
+
+	/* Test if we have the right hardware */
+	gx_pci = gx_detect_chipset();
+	if (gx_pci == NULL)
+		return -ENODEV;
+
+	/* check whether module parameters are sane */
+	if (max_duration > 0xff)
+		max_duration = 0xff;
+
+	pr_debug("geode suspend modulation available.\n");
+
+	params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL);
+	if (params == NULL)
+		return -ENOMEM;
+
+	params->cs55x0 = gx_pci;
+	gx_params = params;
+
+	/* keep cs55x0 configurations */
+	pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg));
+	pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
+	pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
+	pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
+	pci_read_config_byte(params->cs55x0, PCI_MODOFF,
+			&(params->off_duration));
+
+	ret = cpufreq_register_driver(&gx_suspmod_driver);
+	if (ret) {
+		kfree(params);
+		return ret;                   /* register error! */
+	}
+
+	return 0;
+}
+
+static void __exit cpufreq_gx_exit(void)
+{
+	cpufreq_unregister_driver(&gx_suspmod_driver);
+	pci_dev_put(gx_params->cs55x0);
+	kfree(gx_params);
+}
+
+MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
+MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
+MODULE_LICENSE("GPL");
+
+module_init(cpufreq_gx_init);
+module_exit(cpufreq_gx_exit);
+
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
new file mode 100644
index 000000000000..f47d26e2a135
--- /dev/null
+++ b/drivers/cpufreq/longhaul.c
@@ -0,0 +1,1024 @@
+/*
+ *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
+ *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by VIA.
+ *
+ *  VIA have currently 3 different versions of Longhaul.
+ *  Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
+ *   It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
+ *  Version 2 of longhaul is backward compatible with v1, but adds
+ *   LONGHAUL MSR for purpose of both frequency and voltage scaling.
+ *   Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
+ *  Version 3 of longhaul got renamed to Powersaver and redesigned
+ *   to use only the POWERSAVER MSR at 0x110a.
+ *   It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
+ *   It's pretty much the same feature wise to longhaul v2, though
+ *   there is provision for scaling FSB too, but this doesn't work
+ *   too well in practice so we don't even try to use this.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+
+#include <asm/msr.h>
+#include <acpi/processor.h>
+
+#include "longhaul.h"
+
+#define PFX "longhaul: "
+
+#define TYPE_LONGHAUL_V1	1
+#define TYPE_LONGHAUL_V2	2
+#define TYPE_POWERSAVER		3
+
+#define	CPU_SAMUEL	1
+#define	CPU_SAMUEL2	2
+#define	CPU_EZRA	3
+#define	CPU_EZRA_T	4
+#define	CPU_NEHEMIAH	5
+#define	CPU_NEHEMIAH_C	6
+
+/* Flags */
+#define USE_ACPI_C3		(1 << 1)
+#define USE_NORTHBRIDGE		(1 << 2)
+
+static int cpu_model;
+static unsigned int numscales = 16;
+static unsigned int fsb;
+
+static const struct mV_pos *vrm_mV_table;
+static const unsigned char *mV_vrm_table;
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
+static unsigned int minmult, maxmult;
+static int can_scale_voltage;
+static struct acpi_processor *pr;
+static struct acpi_processor_cx *cx;
+static u32 acpi_regs_addr;
+static u8 longhaul_flags;
+static unsigned int longhaul_index;
+
+/* Module parameters */
+static int scale_voltage;
+static int disable_acpi_c3;
+static int revid_errata;
+
+
+/* Clock ratios multiplied by 10 */
+static int mults[32];
+static int eblcr[32];
+static int longhaul_version;
+static struct cpufreq_frequency_table *longhaul_table;
+
+static char speedbuffer[8];
+
+static char *print_speed(int speed)
+{
+	if (speed < 1000) {
+		snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
+		return speedbuffer;
+	}
+
+	if (speed%1000 == 0)
+		snprintf(speedbuffer, sizeof(speedbuffer),
+			"%dGHz", speed/1000);
+	else
+		snprintf(speedbuffer, sizeof(speedbuffer),
+			"%d.%dGHz", speed/1000, (speed%1000)/100);
+
+	return speedbuffer;
+}
+
+
+static unsigned int calc_speed(int mult)
+{
+	int khz;
+	khz = (mult/10)*fsb;
+	if (mult%10)
+		khz += fsb/2;
+	khz *= 1000;
+	return khz;
+}
+
+
+static int longhaul_get_cpu_mult(void)
+{
+	unsigned long invalue = 0, lo, hi;
+
+	rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
+	invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
+	if (longhaul_version == TYPE_LONGHAUL_V2 ||
+	    longhaul_version == TYPE_POWERSAVER) {
+		if (lo & (1<<27))
+			invalue += 16;
+	}
+	return eblcr[invalue];
+}
+
+/* For processor with BCR2 MSR */
+
+static void do_longhaul1(unsigned int mults_index)
+{
+	union msr_bcr2 bcr2;
+
+	rdmsrl(MSR_VIA_BCR2, bcr2.val);
+	/* Enable software clock multiplier */
+	bcr2.bits.ESOFTBF = 1;
+	bcr2.bits.CLOCKMUL = mults_index & 0xff;
+
+	/* Sync to timer tick */
+	safe_halt();
+	/* Change frequency on next halt or sleep */
+	wrmsrl(MSR_VIA_BCR2, bcr2.val);
+	/* Invoke transition */
+	ACPI_FLUSH_CPU_CACHE();
+	halt();
+
+	/* Disable software clock multiplier */
+	local_irq_disable();
+	rdmsrl(MSR_VIA_BCR2, bcr2.val);
+	bcr2.bits.ESOFTBF = 0;
+	wrmsrl(MSR_VIA_BCR2, bcr2.val);
+}
+
+/* For processor with Longhaul MSR */
+
+static void do_powersaver(int cx_address, unsigned int mults_index,
+			  unsigned int dir)
+{
+	union msr_longhaul longhaul;
+	u32 t;
+
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	/* Setup new frequency */
+	if (!revid_errata)
+		longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
+	else
+		longhaul.bits.RevisionKey = 0;
+	longhaul.bits.SoftBusRatio = mults_index & 0xf;
+	longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
+	/* Setup new voltage */
+	if (can_scale_voltage)
+		longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
+	/* Sync to timer tick */
+	safe_halt();
+	/* Raise voltage if necessary */
+	if (can_scale_voltage && dir) {
+		longhaul.bits.EnableSoftVID = 1;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+		/* Change voltage */
+		if (!cx_address) {
+			ACPI_FLUSH_CPU_CACHE();
+			halt();
+		} else {
+			ACPI_FLUSH_CPU_CACHE();
+			/* Invoke C3 */
+			inb(cx_address);
+			/* Dummy op - must do something useless after P_LVL3
+			 * read */
+			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		}
+		longhaul.bits.EnableSoftVID = 0;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	}
+
+	/* Change frequency on next halt or sleep */
+	longhaul.bits.EnableSoftBusRatio = 1;
+	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!cx_address) {
+		ACPI_FLUSH_CPU_CACHE();
+		halt();
+	} else {
+		ACPI_FLUSH_CPU_CACHE();
+		/* Invoke C3 */
+		inb(cx_address);
+		/* Dummy op - must do something useless after P_LVL3 read */
+		t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	}
+	/* Disable bus ratio bit */
+	longhaul.bits.EnableSoftBusRatio = 0;
+	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+
+	/* Reduce voltage if necessary */
+	if (can_scale_voltage && !dir) {
+		longhaul.bits.EnableSoftVID = 1;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+		/* Change voltage */
+		if (!cx_address) {
+			ACPI_FLUSH_CPU_CACHE();
+			halt();
+		} else {
+			ACPI_FLUSH_CPU_CACHE();
+			/* Invoke C3 */
+			inb(cx_address);
+			/* Dummy op - must do something useless after P_LVL3
+			 * read */
+			t = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		}
+		longhaul.bits.EnableSoftVID = 0;
+		wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	}
+}
+
+/**
+ * longhaul_set_cpu_frequency()
+ * @mults_index : bitpattern of the new multiplier.
+ *
+ * Sets a new clock ratio.
+ */
+
+static void longhaul_setstate(unsigned int table_index)
+{
+	unsigned int mults_index;
+	int speed, mult;
+	struct cpufreq_freqs freqs;
+	unsigned long flags;
+	unsigned int pic1_mask, pic2_mask;
+	u16 bm_status = 0;
+	u32 bm_timeout = 1000;
+	unsigned int dir = 0;
+
+	mults_index = longhaul_table[table_index].index;
+	/* Safety precautions */
+	mult = mults[mults_index & 0x1f];
+	if (mult == -1)
+		return;
+	speed = calc_speed(mult);
+	if ((speed > highest_speed) || (speed < lowest_speed))
+		return;
+	/* Voltage transition before frequency transition? */
+	if (can_scale_voltage && longhaul_index < table_index)
+		dir = 1;
+
+	freqs.old = calc_speed(longhaul_get_cpu_mult());
+	freqs.new = speed;
+	freqs.cpu = 0; /* longhaul.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	pr_debug("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
+			fsb, mult/10, mult%10, print_speed(speed/1000));
+retry_loop:
+	preempt_disable();
+	local_irq_save(flags);
+
+	pic2_mask = inb(0xA1);
+	pic1_mask = inb(0x21);	/* works on C3. save mask. */
+	outb(0xFF, 0xA1);	/* Overkill */
+	outb(0xFE, 0x21);	/* TMR0 only */
+
+	/* Wait while PCI bus is busy. */
+	if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
+	    || ((pr != NULL) && pr->flags.bm_control))) {
+		bm_status = inw(acpi_regs_addr);
+		bm_status &= 1 << 4;
+		while (bm_status && bm_timeout) {
+			outw(1 << 4, acpi_regs_addr);
+			bm_timeout--;
+			bm_status = inw(acpi_regs_addr);
+			bm_status &= 1 << 4;
+		}
+	}
+
+	if (longhaul_flags & USE_NORTHBRIDGE) {
+		/* Disable AGP and PCI arbiters */
+		outb(3, 0x22);
+	} else if ((pr != NULL) && pr->flags.bm_control) {
+		/* Disable bus master arbitration */
+		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
+	}
+	switch (longhaul_version) {
+
+	/*
+	 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
+	 * Software controlled multipliers only.
+	 */
+	case TYPE_LONGHAUL_V1:
+		do_longhaul1(mults_index);
+		break;
+
+	/*
+	 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
+	 *
+	 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
+	 * Nehemiah can do FSB scaling too, but this has never been proven
+	 * to work in practice.
+	 */
+	case TYPE_LONGHAUL_V2:
+	case TYPE_POWERSAVER:
+		if (longhaul_flags & USE_ACPI_C3) {
+			/* Don't allow wakeup */
+			acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+			do_powersaver(cx->address, mults_index, dir);
+		} else {
+			do_powersaver(0, mults_index, dir);
+		}
+		break;
+	}
+
+	if (longhaul_flags & USE_NORTHBRIDGE) {
+		/* Enable arbiters */
+		outb(0, 0x22);
+	} else if ((pr != NULL) && pr->flags.bm_control) {
+		/* Enable bus master arbitration */
+		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
+	}
+	outb(pic2_mask, 0xA1);	/* restore mask */
+	outb(pic1_mask, 0x21);
+
+	local_irq_restore(flags);
+	preempt_enable();
+
+	freqs.new = calc_speed(longhaul_get_cpu_mult());
+	/* Check if requested frequency is set. */
+	if (unlikely(freqs.new != speed)) {
+		printk(KERN_INFO PFX "Failed to set requested frequency!\n");
+		/* Revision ID = 1 but processor is expecting revision key
+		 * equal to 0. Jumpers at the bottom of processor will change
+		 * multiplier and FSB, but will not change bits in Longhaul
+		 * MSR nor enable voltage scaling. */
+		if (!revid_errata) {
+			printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
+						"option.\n");
+			revid_errata = 1;
+			msleep(200);
+			goto retry_loop;
+		}
+		/* Why ACPI C3 sometimes doesn't work is a mystery for me.
+		 * But it does happen. Processor is entering ACPI C3 state,
+		 * but it doesn't change frequency. I tried poking various
+		 * bits in northbridge registers, but without success. */
+		if (longhaul_flags & USE_ACPI_C3) {
+			printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
+			longhaul_flags &= ~USE_ACPI_C3;
+			if (revid_errata) {
+				printk(KERN_INFO PFX "Disabling \"Ignore "
+						"Revision ID\" option.\n");
+				revid_errata = 0;
+			}
+			msleep(200);
+			goto retry_loop;
+		}
+		/* This shouldn't happen. Longhaul ver. 2 was reported not
+		 * working on processors without voltage scaling, but with
+		 * RevID = 1. RevID errata will make things right. Just
+		 * to be 100% sure. */
+		if (longhaul_version == TYPE_LONGHAUL_V2) {
+			printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
+			longhaul_version = TYPE_LONGHAUL_V1;
+			msleep(200);
+			goto retry_loop;
+		}
+	}
+	/* Report true CPU frequency */
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	if (!bm_timeout)
+		printk(KERN_INFO PFX "Warning: Timeout while waiting for "
+				"idle PCI bus.\n");
+}
+
+/*
+ * Centaur decided to make life a little more tricky.
+ * Only longhaul v1 is allowed to read EBLCR BSEL[0:1].
+ * Samuel2 and above have to try and guess what the FSB is.
+ * We do this by assuming we booted at maximum multiplier, and interpolate
+ * between that value multiplied by possible FSBs and cpu_mhz which
+ * was calculated at boot time. Really ugly, but no other way to do this.
+ */
+
+#define ROUNDING	0xf
+
+static int guess_fsb(int mult)
+{
+	int speed = cpu_khz / 1000;
+	int i;
+	int speeds[] = { 666, 1000, 1333, 2000 };
+	int f_max, f_min;
+
+	for (i = 0; i < 4; i++) {
+		f_max = ((speeds[i] * mult) + 50) / 100;
+		f_max += (ROUNDING / 2);
+		f_min = f_max - ROUNDING;
+		if ((speed <= f_max) && (speed >= f_min))
+			return speeds[i] / 10;
+	}
+	return 0;
+}
+
+
+static int __cpuinit longhaul_get_ranges(void)
+{
+	unsigned int i, j, k = 0;
+	unsigned int ratio;
+	int mult;
+
+	/* Get current frequency */
+	mult = longhaul_get_cpu_mult();
+	if (mult == -1) {
+		printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
+		return -EINVAL;
+	}
+	fsb = guess_fsb(mult);
+	if (fsb == 0) {
+		printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
+		return -EINVAL;
+	}
+	/* Get max multiplier - as we always did.
+	 * Longhaul MSR is useful only when voltage scaling is enabled.
+	 * C3 is booting at max anyway. */
+	maxmult = mult;
+	/* Get min multiplier */
+	switch (cpu_model) {
+	case CPU_NEHEMIAH:
+		minmult = 50;
+		break;
+	case CPU_NEHEMIAH_C:
+		minmult = 40;
+		break;
+	default:
+		minmult = 30;
+		break;
+	}
+
+	pr_debug("MinMult:%d.%dx MaxMult:%d.%dx\n",
+		 minmult/10, minmult%10, maxmult/10, maxmult%10);
+
+	highest_speed = calc_speed(maxmult);
+	lowest_speed = calc_speed(minmult);
+	pr_debug("FSB:%dMHz  Lowest speed: %s   Highest speed:%s\n", fsb,
+		 print_speed(lowest_speed/1000),
+		 print_speed(highest_speed/1000));
+
+	if (lowest_speed == highest_speed) {
+		printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+		return -EINVAL;
+	}
+	if (lowest_speed > highest_speed) {
+		printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+			lowest_speed, highest_speed);
+		return -EINVAL;
+	}
+
+	longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
+			GFP_KERNEL);
+	if (!longhaul_table)
+		return -ENOMEM;
+
+	for (j = 0; j < numscales; j++) {
+		ratio = mults[j];
+		if (ratio == -1)
+			continue;
+		if (ratio > maxmult || ratio < minmult)
+			continue;
+		longhaul_table[k].frequency = calc_speed(ratio);
+		longhaul_table[k].index	= j;
+		k++;
+	}
+	if (k <= 1) {
+		kfree(longhaul_table);
+		return -ENODEV;
+	}
+	/* Sort */
+	for (j = 0; j < k - 1; j++) {
+		unsigned int min_f, min_i;
+		min_f = longhaul_table[j].frequency;
+		min_i = j;
+		for (i = j + 1; i < k; i++) {
+			if (longhaul_table[i].frequency < min_f) {
+				min_f = longhaul_table[i].frequency;
+				min_i = i;
+			}
+		}
+		if (min_i != j) {
+			swap(longhaul_table[j].frequency,
+			     longhaul_table[min_i].frequency);
+			swap(longhaul_table[j].index,
+			     longhaul_table[min_i].index);
+		}
+	}
+
+	longhaul_table[k].frequency = CPUFREQ_TABLE_END;
+
+	/* Find index we are running on */
+	for (j = 0; j < k; j++) {
+		if (mults[longhaul_table[j].index & 0x1f] == mult) {
+			longhaul_index = j;
+			break;
+		}
+	}
+	return 0;
+}
+
+
+static void __cpuinit longhaul_setup_voltagescaling(void)
+{
+	union msr_longhaul longhaul;
+	struct mV_pos minvid, maxvid, vid;
+	unsigned int j, speed, pos, kHz_step, numvscales;
+	int min_vid_speed;
+
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!(longhaul.bits.RevisionID & 1)) {
+		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
+		return;
+	}
+
+	if (!longhaul.bits.VRMRev) {
+		printk(KERN_INFO PFX "VRM 8.5\n");
+		vrm_mV_table = &vrm85_mV[0];
+		mV_vrm_table = &mV_vrm85[0];
+	} else {
+		printk(KERN_INFO PFX "Mobile VRM\n");
+		if (cpu_model < CPU_NEHEMIAH)
+			return;
+		vrm_mV_table = &mobilevrm_mV[0];
+		mV_vrm_table = &mV_mobilevrm[0];
+	}
+
+	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+
+	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
+		printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
+					"Voltage scaling disabled.\n",
+					minvid.mV/1000, minvid.mV%1000,
+					maxvid.mV/1000, maxvid.mV%1000);
+		return;
+	}
+
+	if (minvid.mV == maxvid.mV) {
+		printk(KERN_INFO PFX "Claims to support voltage scaling but "
+				"min & max are both %d.%03d. "
+				"Voltage scaling disabled\n",
+				maxvid.mV/1000, maxvid.mV%1000);
+		return;
+	}
+
+	/* How many voltage steps*/
+	numvscales = maxvid.pos - minvid.pos + 1;
+	printk(KERN_INFO PFX
+		"Max VID=%d.%03d  "
+		"Min VID=%d.%03d, "
+		"%d possible voltage scales\n",
+		maxvid.mV/1000, maxvid.mV%1000,
+		minvid.mV/1000, minvid.mV%1000,
+		numvscales);
+
+	/* Calculate max frequency at min voltage */
+	j = longhaul.bits.MinMHzBR;
+	if (longhaul.bits.MinMHzBR4)
+		j += 16;
+	min_vid_speed = eblcr[j];
+	if (min_vid_speed == -1)
+		return;
+	switch (longhaul.bits.MinMHzFSB) {
+	case 0:
+		min_vid_speed *= 13333;
+		break;
+	case 1:
+		min_vid_speed *= 10000;
+		break;
+	case 3:
+		min_vid_speed *= 6666;
+		break;
+	default:
+		return;
+		break;
+	}
+	if (min_vid_speed >= highest_speed)
+		return;
+	/* Calculate kHz for one voltage step */
+	kHz_step = (highest_speed - min_vid_speed) / numvscales;
+
+	j = 0;
+	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+		speed = longhaul_table[j].frequency;
+		if (speed > min_vid_speed)
+			pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
+		else
+			pos = minvid.pos;
+		longhaul_table[j].index |= mV_vrm_table[pos] << 8;
+		vid = vrm_mV_table[mV_vrm_table[pos]];
+		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+				speed, j, vid.mV);
+		j++;
+	}
+
+	can_scale_voltage = 1;
+	printk(KERN_INFO PFX "Voltage scaling enabled.\n");
+}
+
+
+static int longhaul_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, longhaul_table);
+}
+
+
+static int longhaul_target(struct cpufreq_policy *policy,
+			    unsigned int target_freq, unsigned int relation)
+{
+	unsigned int table_index = 0;
+	unsigned int i;
+	unsigned int dir = 0;
+	u8 vid, current_vid;
+
+	if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
+				relation, &table_index))
+		return -EINVAL;
+
+	/* Don't set same frequency again */
+	if (longhaul_index == table_index)
+		return 0;
+
+	if (!can_scale_voltage)
+		longhaul_setstate(table_index);
+	else {
+		/* On test system voltage transitions exceeding single
+		 * step up or down were turning motherboard off. Both
+		 * "ondemand" and "userspace" are unsafe. C7 is doing
+		 * this in hardware, C3 is old and we need to do this
+		 * in software. */
+		i = longhaul_index;
+		current_vid = (longhaul_table[longhaul_index].index >> 8);
+		current_vid &= 0x1f;
+		if (table_index > longhaul_index)
+			dir = 1;
+		while (i != table_index) {
+			vid = (longhaul_table[i].index >> 8) & 0x1f;
+			if (vid != current_vid) {
+				longhaul_setstate(i);
+				current_vid = vid;
+				msleep(200);
+			}
+			if (dir)
+				i++;
+			else
+				i--;
+		}
+		longhaul_setstate(table_index);
+	}
+	longhaul_index = table_index;
+	return 0;
+}
+
+
+static unsigned int longhaul_get(unsigned int cpu)
+{
+	if (cpu)
+		return 0;
+	return calc_speed(longhaul_get_cpu_mult());
+}
+
+static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
+					  u32 nesting_level,
+					  void *context, void **return_value)
+{
+	struct acpi_device *d;
+
+	if (acpi_bus_get_device(obj_handle, &d))
+		return 0;
+
+	*return_value = acpi_driver_data(d);
+	return 1;
+}
+
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+	struct pci_dev *dev;
+	int status = 1;
+	int reg;
+	u8 pci_cmd;
+
+	/* Find PLE133 host bridge */
+	reg = 0x78;
+	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0,
+			     NULL);
+	/* Find PM133/VT8605 host bridge */
+	if (dev == NULL)
+		dev = pci_get_device(PCI_VENDOR_ID_VIA,
+				     PCI_DEVICE_ID_VIA_8605_0, NULL);
+	/* Find CLE266 host bridge */
+	if (dev == NULL) {
+		reg = 0x76;
+		dev = pci_get_device(PCI_VENDOR_ID_VIA,
+				     PCI_DEVICE_ID_VIA_862X_0, NULL);
+		/* Find CN400 V-Link host bridge */
+		if (dev == NULL)
+			dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL);
+	}
+	if (dev != NULL) {
+		/* Enable access to port 0x22 */
+		pci_read_config_byte(dev, reg, &pci_cmd);
+		if (!(pci_cmd & 1<<7)) {
+			pci_cmd |= 1<<7;
+			pci_write_config_byte(dev, reg, pci_cmd);
+			pci_read_config_byte(dev, reg, &pci_cmd);
+			if (!(pci_cmd & 1<<7)) {
+				printk(KERN_ERR PFX
+					"Can't enable access to port 0x22.\n");
+				status = 0;
+			}
+		}
+		pci_dev_put(dev);
+		return status;
+	}
+	return 0;
+}
+
+static int longhaul_setup_southbridge(void)
+{
+	struct pci_dev *dev;
+	u8 pci_cmd;
+
+	/* Find VT8235 southbridge */
+	dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
+	if (dev == NULL)
+		/* Find VT8237 southbridge */
+		dev = pci_get_device(PCI_VENDOR_ID_VIA,
+				     PCI_DEVICE_ID_VIA_8237, NULL);
+	if (dev != NULL) {
+		/* Set transition time to max */
+		pci_read_config_byte(dev, 0xec, &pci_cmd);
+		pci_cmd &= ~(1 << 2);
+		pci_write_config_byte(dev, 0xec, pci_cmd);
+		pci_read_config_byte(dev, 0xe4, &pci_cmd);
+		pci_cmd &= ~(1 << 7);
+		pci_write_config_byte(dev, 0xe4, pci_cmd);
+		pci_read_config_byte(dev, 0xe5, &pci_cmd);
+		pci_cmd |= 1 << 7;
+		pci_write_config_byte(dev, 0xe5, pci_cmd);
+		/* Get address of ACPI registers block*/
+		pci_read_config_byte(dev, 0x81, &pci_cmd);
+		if (pci_cmd & 1 << 7) {
+			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
+			acpi_regs_addr &= 0xff00;
+			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
+					acpi_regs_addr);
+		}
+
+		pci_dev_put(dev);
+		return 1;
+	}
+	return 0;
+}
+
+static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	char *cpuname = NULL;
+	int ret;
+	u32 lo, hi;
+
+	/* Check what we have on this motherboard */
+	switch (c->x86_model) {
+	case 6:
+		cpu_model = CPU_SAMUEL;
+		cpuname = "C3 'Samuel' [C5A]";
+		longhaul_version = TYPE_LONGHAUL_V1;
+		memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+		memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
+		break;
+
+	case 7:
+		switch (c->x86_mask) {
+		case 0:
+			longhaul_version = TYPE_LONGHAUL_V1;
+			cpu_model = CPU_SAMUEL2;
+			cpuname = "C3 'Samuel 2' [C5B]";
+			/* Note, this is not a typo, early Samuel2's had
+			 * Samuel1 ratios. */
+			memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
+			memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
+			break;
+		case 1 ... 15:
+			longhaul_version = TYPE_LONGHAUL_V2;
+			if (c->x86_mask < 8) {
+				cpu_model = CPU_SAMUEL2;
+				cpuname = "C3 'Samuel 2' [C5B]";
+			} else {
+				cpu_model = CPU_EZRA;
+				cpuname = "C3 'Ezra' [C5C]";
+			}
+			memcpy(mults, ezra_mults, sizeof(ezra_mults));
+			memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
+			break;
+		}
+		break;
+
+	case 8:
+		cpu_model = CPU_EZRA_T;
+		cpuname = "C3 'Ezra-T' [C5M]";
+		longhaul_version = TYPE_POWERSAVER;
+		numscales = 32;
+		memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
+		memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
+		break;
+
+	case 9:
+		longhaul_version = TYPE_POWERSAVER;
+		numscales = 32;
+		memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
+		memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
+		switch (c->x86_mask) {
+		case 0 ... 1:
+			cpu_model = CPU_NEHEMIAH;
+			cpuname = "C3 'Nehemiah A' [C5XLOE]";
+			break;
+		case 2 ... 4:
+			cpu_model = CPU_NEHEMIAH;
+			cpuname = "C3 'Nehemiah B' [C5XLOH]";
+			break;
+		case 5 ... 15:
+			cpu_model = CPU_NEHEMIAH_C;
+			cpuname = "C3 'Nehemiah C' [C5P]";
+			break;
+		}
+		break;
+
+	default:
+		cpuname = "Unknown";
+		break;
+	}
+	/* Check Longhaul ver. 2 */
+	if (longhaul_version == TYPE_LONGHAUL_V2) {
+		rdmsr(MSR_VIA_LONGHAUL, lo, hi);
+		if (lo == 0 && hi == 0)
+			/* Looks like MSR isn't present */
+			longhaul_version = TYPE_LONGHAUL_V1;
+	}
+
+	printk(KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
+	switch (longhaul_version) {
+	case TYPE_LONGHAUL_V1:
+	case TYPE_LONGHAUL_V2:
+		printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
+		break;
+	case TYPE_POWERSAVER:
+		printk(KERN_CONT "Powersaver supported.\n");
+		break;
+	};
+
+	/* Doesn't hurt */
+	longhaul_setup_southbridge();
+
+	/* Find ACPI data for processor */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+				ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
+				NULL, (void *)&pr);
+
+	/* Check ACPI support for C3 state */
+	if (pr != NULL && longhaul_version == TYPE_POWERSAVER) {
+		cx = &pr->power.states[ACPI_STATE_C3];
+		if (cx->address > 0 && cx->latency <= 1000)
+			longhaul_flags |= USE_ACPI_C3;
+	}
+	/* Disable if it isn't working */
+	if (disable_acpi_c3)
+		longhaul_flags &= ~USE_ACPI_C3;
+	/* Check if northbridge is friendly */
+	if (enable_arbiter_disable())
+		longhaul_flags |= USE_NORTHBRIDGE;
+
+	/* Check ACPI support for bus master arbiter disable */
+	if (!(longhaul_flags & USE_ACPI_C3
+	     || longhaul_flags & USE_NORTHBRIDGE)
+	    && ((pr == NULL) || !(pr->flags.bm_control))) {
+		printk(KERN_ERR PFX
+			"No ACPI support. Unsupported northbridge.\n");
+		return -ENODEV;
+	}
+
+	if (longhaul_flags & USE_NORTHBRIDGE)
+		printk(KERN_INFO PFX "Using northbridge support.\n");
+	if (longhaul_flags & USE_ACPI_C3)
+		printk(KERN_INFO PFX "Using ACPI support.\n");
+
+	ret = longhaul_get_ranges();
+	if (ret != 0)
+		return ret;
+
+	if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
+		longhaul_setup_voltagescaling();
+
+	policy->cpuinfo.transition_latency = 200000;	/* nsec */
+	policy->cur = calc_speed(longhaul_get_cpu_mult());
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table);
+	if (ret)
+		return ret;
+
+	cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu);
+
+	return 0;
+}
+
+static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static struct freq_attr *longhaul_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver longhaul_driver = {
+	.verify	= longhaul_verify,
+	.target	= longhaul_target,
+	.get	= longhaul_get,
+	.init	= longhaul_cpu_init,
+	.exit	= __devexit_p(longhaul_cpu_exit),
+	.name	= "longhaul",
+	.owner	= THIS_MODULE,
+	.attr	= longhaul_attr,
+};
+
+
+static int __init longhaul_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
+		return -ENODEV;
+
+#ifdef CONFIG_SMP
+	if (num_online_cpus() > 1) {
+		printk(KERN_ERR PFX "More than 1 CPU detected, "
+				"longhaul disabled.\n");
+		return -ENODEV;
+	}
+#endif
+#ifdef CONFIG_X86_IO_APIC
+	if (cpu_has_apic) {
+		printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
+				"broken in this configuration.\n");
+		return -ENODEV;
+	}
+#endif
+	switch (c->x86_model) {
+	case 6 ... 9:
+		return cpufreq_register_driver(&longhaul_driver);
+	case 10:
+		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
+	default:
+		;
+	}
+
+	return -ENODEV;
+}
+
+
+static void __exit longhaul_exit(void)
+{
+	int i;
+
+	for (i = 0; i < numscales; i++) {
+		if (mults[i] == maxmult) {
+			longhaul_setstate(i);
+			break;
+		}
+	}
+
+	cpufreq_unregister_driver(&longhaul_driver);
+	kfree(longhaul_table);
+}
+
+/* Even if BIOS is exporting ACPI C3 state, and it is used
+ * with success when CPU is idle, this state doesn't
+ * trigger frequency transition in some cases. */
+module_param(disable_acpi_c3, int, 0644);
+MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
+/* Change CPU voltage with frequency. Very useful to save
+ * power, but most VIA C3 processors aren't supporting it. */
+module_param(scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+/* Force revision key to 0 for processors which doesn't
+ * support voltage scaling, but are introducing itself as
+ * such. */
+module_param(revid_errata, int, 0644);
+MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
+
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
+MODULE_LICENSE("GPL");
+
+late_initcall(longhaul_init);
+module_exit(longhaul_exit);
diff --git a/drivers/cpufreq/longhaul.h b/drivers/cpufreq/longhaul.h
new file mode 100644
index 000000000000..cbf48fbca881
--- /dev/null
+++ b/drivers/cpufreq/longhaul.h
@@ -0,0 +1,353 @@
+/*
+ *  longhaul.h
+ *  (C) 2003 Dave Jones.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  VIA-specific information
+ */
+
+union msr_bcr2 {
+	struct {
+		unsigned Reseved:19,	// 18:0
+		ESOFTBF:1,		// 19
+		Reserved2:3,		// 22:20
+		CLOCKMUL:4,		// 26:23
+		Reserved3:5;		// 31:27
+	} bits;
+	unsigned long val;
+};
+
+union msr_longhaul {
+	struct {
+		unsigned RevisionID:4,	// 3:0
+		RevisionKey:4,		// 7:4
+		EnableSoftBusRatio:1,	// 8
+		EnableSoftVID:1,	// 9
+		EnableSoftBSEL:1,	// 10
+		Reserved:3,		// 11:13
+		SoftBusRatio4:1,	// 14
+		VRMRev:1,		// 15
+		SoftBusRatio:4,		// 19:16
+		SoftVID:5,		// 24:20
+		Reserved2:3,		// 27:25
+		SoftBSEL:2,		// 29:28
+		Reserved3:2,		// 31:30
+		MaxMHzBR:4,		// 35:32
+		MaximumVID:5,		// 40:36
+		MaxMHzFSB:2,		// 42:41
+		MaxMHzBR4:1,		// 43
+		Reserved4:4,		// 47:44
+		MinMHzBR:4,		// 51:48
+		MinimumVID:5,		// 56:52
+		MinMHzFSB:2,		// 58:57
+		MinMHzBR4:1,		// 59
+		Reserved5:4;		// 63:60
+	} bits;
+	unsigned long long val;
+};
+
+/*
+ * Clock ratio tables. Div/Mod by 10 to get ratio.
+ * The eblcr values specify the ratio read from the CPU.
+ * The mults values specify what to write to the CPU.
+ */
+
+/*
+ * VIA C3 Samuel 1  & Samuel 2 (stepping 0)
+ */
+static const int __cpuinitdata samuel1_mults[16] = {
+	-1, /* 0000 -> RESERVED */
+	30, /* 0001 ->  3.0x */
+	40, /* 0010 ->  4.0x */
+	-1, /* 0011 -> RESERVED */
+	-1, /* 0100 -> RESERVED */
+	35, /* 0101 ->  3.5x */
+	45, /* 0110 ->  4.5x */
+	55, /* 0111 ->  5.5x */
+	60, /* 1000 ->  6.0x */
+	70, /* 1001 ->  7.0x */
+	80, /* 1010 ->  8.0x */
+	50, /* 1011 ->  5.0x */
+	65, /* 1100 ->  6.5x */
+	75, /* 1101 ->  7.5x */
+	-1, /* 1110 -> RESERVED */
+	-1, /* 1111 -> RESERVED */
+};
+
+static const int __cpuinitdata samuel1_eblcr[16] = {
+	50, /* 0000 -> RESERVED */
+	30, /* 0001 ->  3.0x */
+	40, /* 0010 ->  4.0x */
+	-1, /* 0011 -> RESERVED */
+	55, /* 0100 ->  5.5x */
+	35, /* 0101 ->  3.5x */
+	45, /* 0110 ->  4.5x */
+	-1, /* 0111 -> RESERVED */
+	-1, /* 1000 -> RESERVED */
+	70, /* 1001 ->  7.0x */
+	80, /* 1010 ->  8.0x */
+	60, /* 1011 ->  6.0x */
+	-1, /* 1100 -> RESERVED */
+	75, /* 1101 ->  7.5x */
+	-1, /* 1110 -> RESERVED */
+	65, /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 Samuel2 Stepping 1->15
+ */
+static const int __cpuinitdata samuel2_eblcr[16] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	110, /* 0111 -> 11.0x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	130, /* 1110 -> 13.0x */
+	65,  /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 Ezra
+ */
+static const int __cpuinitdata ezra_mults[16] = {
+	100, /* 0000 -> 10.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+};
+
+static const int __cpuinitdata ezra_eblcr[16] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+};
+
+/*
+ * VIA C3 (Ezra-T) [C5M].
+ */
+static const int __cpuinitdata ezrat_mults[32] = {
+	100, /* 0000 -> 10.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 ->  12.0x */
+
+	-1,  /* 0000 -> RESERVED (10.0x) */
+	110, /* 0001 -> 11.0x */
+	-1, /* 0010 -> 12.0x */
+	-1,  /* 0011 -> RESERVED (9.0x)*/
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	-1,  /* 1111 -> RESERVED (12.0x) */
+};
+
+static const int __cpuinitdata ezrat_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	30,  /* 0001 ->  3.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	35,  /* 0101 ->  3.5x */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+
+	-1,  /* 0000 -> RESERVED (9.0x) */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	-1,  /* 0011 -> RESERVED (10.0x)*/
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	-1,  /* 1100 -> RESERVED (12.0x) */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145, /* 1111 -> 14.5x */
+};
+
+/*
+ * VIA C3 Nehemiah */
+
+static const int __cpuinitdata nehemiah_mults[32] = {
+	100, /* 0000 -> 10.0x */
+	-1, /* 0001 -> 16.0x */
+	40,  /* 0010 ->  4.0x */
+	90,  /* 0011 ->  9.0x */
+	95,  /* 0100 ->  9.5x */
+	-1,  /* 0101 ->  RESERVED */
+	45,  /* 0110 ->  4.5x */
+	55,  /* 0111 ->  5.5x */
+	60,  /* 1000 ->  6.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	50,  /* 1011 ->  5.0x */
+	65,  /* 1100 ->  6.5x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	120, /* 1111 -> 12.0x */
+	-1, /* 0000 -> 10.0x */
+	110, /* 0001 -> 11.0x */
+	-1, /* 0010 -> 12.0x */
+	-1,  /* 0011 ->  9.0x */
+	105, /* 0100 -> 10.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	135, /* 0111 -> 13.5x */
+	140, /* 1000 -> 14.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	130, /* 1011 -> 13.0x */
+	145, /* 1100 -> 14.5x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	-1, /* 1111 -> 12.0x */
+};
+
+static const int __cpuinitdata nehemiah_eblcr[32] = {
+	50,  /* 0000 ->  5.0x */
+	160, /* 0001 -> 16.0x */
+	40,  /* 0010 ->  4.0x */
+	100, /* 0011 -> 10.0x */
+	55,  /* 0100 ->  5.5x */
+	-1,  /* 0101 ->  RESERVED */
+	45,  /* 0110 ->  4.5x */
+	95,  /* 0111 ->  9.5x */
+	90,  /* 1000 ->  9.0x */
+	70,  /* 1001 ->  7.0x */
+	80,  /* 1010 ->  8.0x */
+	60,  /* 1011 ->  6.0x */
+	120, /* 1100 -> 12.0x */
+	75,  /* 1101 ->  7.5x */
+	85,  /* 1110 ->  8.5x */
+	65,  /* 1111 ->  6.5x */
+	90,  /* 0000 ->  9.0x */
+	110, /* 0001 -> 11.0x */
+	120, /* 0010 -> 12.0x */
+	100, /* 0011 -> 10.0x */
+	135, /* 0100 -> 13.5x */
+	115, /* 0101 -> 11.5x */
+	125, /* 0110 -> 12.5x */
+	105, /* 0111 -> 10.5x */
+	130, /* 1000 -> 13.0x */
+	150, /* 1001 -> 15.0x */
+	160, /* 1010 -> 16.0x */
+	140, /* 1011 -> 14.0x */
+	120, /* 1100 -> 12.0x */
+	155, /* 1101 -> 15.5x */
+	-1,  /* 1110 -> RESERVED (13.0x) */
+	145 /* 1111 -> 14.5x */
+};
+
+/*
+ * Voltage scales. Div/Mod by 1000 to get actual voltage.
+ * Which scale to use depends on the VRM type in use.
+ */
+
+struct mV_pos {
+	unsigned short mV;
+	unsigned short pos;
+};
+
+static const struct mV_pos __cpuinitdata vrm85_mV[32] = {
+	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
+	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
+	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
+	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
+	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
+	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
+	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
+	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
+};
+
+static const unsigned char __cpuinitdata mV_vrm85[32] = {
+	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
+	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
+	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
+	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
+};
+
+static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {
+	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
+	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
+	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
+	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
+	{975, 15},	{950, 14},	{925, 13},	{900, 12},
+	{875, 11},	{850, 10},	{825, 9},	{800, 8},
+	{775, 7},	{750, 6},	{725, 5},	{700, 4},
+	{675, 3},	{650, 2},	{625, 1},	{600, 0}
+};
+
+static const unsigned char __cpuinitdata mV_mobilevrm[32] = {
+	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
+	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
+	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
+	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
+};
+
diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c
new file mode 100644
index 000000000000..34ea359b370e
--- /dev/null
+++ b/drivers/cpufreq/longrun.c
@@ -0,0 +1,324 @@
+/*
+ * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/timex.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+
+static struct cpufreq_driver	longrun_driver;
+
+/**
+ * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz
+ * values into per cent values. In TMTA microcode, the following is valid:
+ * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+ */
+static unsigned int longrun_low_freq, longrun_high_freq;
+
+
+/**
+ * longrun_get_policy - get the current LongRun policy
+ * @policy: struct cpufreq_policy where current policy is written into
+ *
+ * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS
+ * and MSR_TMTA_LONGRUN_CTRL
+ */
+static void __cpuinit longrun_get_policy(struct cpufreq_policy *policy)
+{
+	u32 msr_lo, msr_hi;
+
+	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+	pr_debug("longrun flags are %x - %x\n", msr_lo, msr_hi);
+	if (msr_lo & 0x01)
+		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+	else
+		policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	pr_debug("longrun ctrl is %x - %x\n", msr_lo, msr_hi);
+	msr_lo &= 0x0000007F;
+	msr_hi &= 0x0000007F;
+
+	if (longrun_high_freq <= longrun_low_freq) {
+		/* Assume degenerate Longrun table */
+		policy->min = policy->max = longrun_high_freq;
+	} else {
+		policy->min = longrun_low_freq + msr_lo *
+			((longrun_high_freq - longrun_low_freq) / 100);
+		policy->max = longrun_low_freq + msr_hi *
+			((longrun_high_freq - longrun_low_freq) / 100);
+	}
+	policy->cpu = 0;
+}
+
+
+/**
+ * longrun_set_policy - sets a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Sets a new CPUFreq policy on LongRun-capable processors. This function
+ * has to be called with cpufreq_driver locked.
+ */
+static int longrun_set_policy(struct cpufreq_policy *policy)
+{
+	u32 msr_lo, msr_hi;
+	u32 pctg_lo, pctg_hi;
+
+	if (!policy)
+		return -EINVAL;
+
+	if (longrun_high_freq <= longrun_low_freq) {
+		/* Assume degenerate Longrun table */
+		pctg_lo = pctg_hi = 100;
+	} else {
+		pctg_lo = (policy->min - longrun_low_freq) /
+			((longrun_high_freq - longrun_low_freq) / 100);
+		pctg_hi = (policy->max - longrun_low_freq) /
+			((longrun_high_freq - longrun_low_freq) / 100);
+	}
+
+	if (pctg_hi > 100)
+		pctg_hi = 100;
+	if (pctg_lo > pctg_hi)
+		pctg_lo = pctg_hi;
+
+	/* performance or economy mode */
+	rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+	msr_lo &= 0xFFFFFFFE;
+	switch (policy->policy) {
+	case CPUFREQ_POLICY_PERFORMANCE:
+		msr_lo |= 0x00000001;
+		break;
+	case CPUFREQ_POLICY_POWERSAVE:
+		break;
+	}
+	wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi);
+
+	/* lower and upper boundary */
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	msr_lo &= 0xFFFFFF80;
+	msr_hi &= 0xFFFFFF80;
+	msr_lo |= pctg_lo;
+	msr_hi |= pctg_hi;
+	wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+	return 0;
+}
+
+
+/**
+ * longrun_verify_poliy - verifies a new CPUFreq policy
+ * @policy: the policy to verify
+ *
+ * Validates a new CPUFreq policy. This function has to be called with
+ * cpufreq_driver locked.
+ */
+static int longrun_verify_policy(struct cpufreq_policy *policy)
+{
+	if (!policy)
+		return -EINVAL;
+
+	policy->cpu = 0;
+	cpufreq_verify_within_limits(policy,
+		policy->cpuinfo.min_freq,
+		policy->cpuinfo.max_freq);
+
+	if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) &&
+	    (policy->policy != CPUFREQ_POLICY_PERFORMANCE))
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int longrun_get(unsigned int cpu)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (cpu)
+		return 0;
+
+	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+	pr_debug("cpuid eax is %u\n", eax);
+
+	return eax * 1000;
+}
+
+/**
+ * longrun_determine_freqs - determines the lowest and highest possible core frequency
+ * @low_freq: an int to put the lowest frequency into
+ * @high_freq: an int to put the highest frequency into
+ *
+ * Determines the lowest and highest possible core frequencies on this CPU.
+ * This is necessary to calculate the performance percentage according to
+ * TMTA rules:
+ * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
+ */
+static int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
+						      unsigned int *high_freq)
+{
+	u32 msr_lo, msr_hi;
+	u32 save_lo, save_hi;
+	u32 eax, ebx, ecx, edx;
+	u32 try_hi;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (!low_freq || !high_freq)
+		return -EINVAL;
+
+	if (cpu_has(c, X86_FEATURE_LRTI)) {
+		/* if the LongRun Table Interface is present, the
+		 * detection is a bit easier:
+		 * For minimum frequency, read out the maximum
+		 * level (msr_hi), write that into "currently
+		 * selected level", and read out the frequency.
+		 * For maximum frequency, read out level zero.
+		 */
+		/* minimum */
+		rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi);
+		wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi);
+		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+		*low_freq = msr_lo * 1000; /* to kHz */
+
+		/* maximum */
+		wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi);
+		rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
+		*high_freq = msr_lo * 1000; /* to kHz */
+
+		pr_debug("longrun table interface told %u - %u kHz\n",
+				*low_freq, *high_freq);
+
+		if (*low_freq > *high_freq)
+			*low_freq = *high_freq;
+		return 0;
+	}
+
+	/* set the upper border to the value determined during TSC init */
+	*high_freq = (cpu_khz / 1000);
+	*high_freq = *high_freq * 1000;
+	pr_debug("high frequency is %u kHz\n", *high_freq);
+
+	/* get current borders */
+	rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+	save_lo = msr_lo & 0x0000007F;
+	save_hi = msr_hi & 0x0000007F;
+
+	/* if current perf_pctg is larger than 90%, we need to decrease the
+	 * upper limit to make the calculation more accurate.
+	 */
+	cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+	/* try decreasing in 10% steps, some processors react only
+	 * on some barrier values */
+	for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
+		/* set to 0 to try_hi perf_pctg */
+		msr_lo &= 0xFFFFFF80;
+		msr_hi &= 0xFFFFFF80;
+		msr_hi |= try_hi;
+		wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
+
+		/* read out current core MHz and current perf_pctg */
+		cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
+
+		/* restore values */
+		wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi);
+	}
+	pr_debug("percentage is %u %%, freq is %u MHz\n", ecx, eax);
+
+	/* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
+	 * eqals
+	 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
+	 *
+	 * high_freq * perf_pctg is stored tempoarily into "ebx".
+	 */
+	ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */
+
+	if ((ecx > 95) || (ecx == 0) || (eax < ebx))
+		return -EIO;
+
+	edx = ((eax - ebx) * 100) / (100 - ecx);
+	*low_freq = edx * 1000; /* back to kHz */
+
+	pr_debug("low frequency is %u kHz\n", *low_freq);
+
+	if (*low_freq > *high_freq)
+		*low_freq = *high_freq;
+
+	return 0;
+}
+
+
+static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)
+{
+	int result = 0;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* detect low and high frequency */
+	result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq);
+	if (result)
+		return result;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.min_freq = longrun_low_freq;
+	policy->cpuinfo.max_freq = longrun_high_freq;
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	longrun_get_policy(policy);
+
+	return 0;
+}
+
+
+static struct cpufreq_driver longrun_driver = {
+	.flags		= CPUFREQ_CONST_LOOPS,
+	.verify		= longrun_verify_policy,
+	.setpolicy	= longrun_set_policy,
+	.get		= longrun_get,
+	.init		= longrun_cpu_init,
+	.name		= "longrun",
+	.owner		= THIS_MODULE,
+};
+
+
+/**
+ * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver
+ *
+ * Initializes the LongRun support.
+ */
+static int __init longrun_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
+	    !cpu_has(c, X86_FEATURE_LONGRUN))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&longrun_driver);
+}
+
+
+/**
+ * longrun_exit - unregisters LongRun support
+ */
+static void __exit longrun_exit(void)
+{
+	cpufreq_unregister_driver(&longrun_driver);
+}
+
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
+		"Efficeon processors.");
+MODULE_LICENSE("GPL");
+
+module_init(longrun_init);
+module_exit(longrun_exit);
diff --git a/drivers/cpufreq/mperf.c b/drivers/cpufreq/mperf.c
new file mode 100644
index 000000000000..911e193018ae
--- /dev/null
+++ b/drivers/cpufreq/mperf.c
@@ -0,0 +1,51 @@
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+
+#include "mperf.h"
+
+static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
+
+/* Called via smp_call_function_single(), on the target CPU */
+static void read_measured_perf_ctrs(void *_cur)
+{
+	struct aperfmperf *am = _cur;
+
+	get_aperfmperf(am);
+}
+
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
+					unsigned int cpu)
+{
+	struct aperfmperf perf;
+	unsigned long ratio;
+	unsigned int retval;
+
+	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
+		return 0;
+
+	ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+	per_cpu(acfreq_old_perf, cpu) = perf;
+
+	retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/mperf.h b/drivers/cpufreq/mperf.h
new file mode 100644
index 000000000000..5dbf2950dc22
--- /dev/null
+++ b/drivers/cpufreq/mperf.h
@@ -0,0 +1,9 @@
+/*
+ *  (c) 2010 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ */
+
+unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
+					unsigned int cpu);
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
new file mode 100644
index 000000000000..6be3e0760c26
--- /dev/null
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -0,0 +1,329 @@
+/*
+ *	Pentium 4/Xeon CPU on demand clock modulation/speed scaling
+ *	(C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *	(C) 2002 Zwane Mwaikambo <zwane@commfireservices.com>
+ *	(C) 2002 Arjan van de Ven <arjanv@redhat.com>
+ *	(C) 2002 Tora T. Engstad
+ *	All Rights Reserved
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      The author(s) of this software shall not be held liable for damages
+ *      of any nature resulting due to the use of this software. This
+ *      software is provided AS-IS with no warranties.
+ *
+ *	Date		Errata			Description
+ *	20020525	N44, O17	12.5% or 25% DC causes lockup
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/timex.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/timer.h>
+
+#include "speedstep-lib.h"
+
+#define PFX	"p4-clockmod: "
+
+/*
+ * Duty Cycle (3bits), note DC_DISABLE is not specified in
+ * intel docs i just use it to mean disable
+ */
+enum {
+	DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT,
+	DC_64PT, DC_75PT, DC_88PT, DC_DISABLE
+};
+
+#define DC_ENTRIES	8
+
+
+static int has_N44_O17_errata[NR_CPUS];
+static unsigned int stock_freq;
+static struct cpufreq_driver p4clockmod_driver;
+static unsigned int cpufreq_p4_get(unsigned int cpu);
+
+static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
+{
+	u32 l, h;
+
+	if (!cpu_online(cpu) ||
+	    (newstate > DC_DISABLE) || (newstate == DC_RESV))
+		return -EINVAL;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
+
+	if (l & 0x01)
+		pr_debug("CPU#%d currently thermal throttled\n", cpu);
+
+	if (has_N44_O17_errata[cpu] &&
+	    (newstate == DC_25PT || newstate == DC_DFLT))
+		newstate = DC_38PT;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
+	if (newstate == DC_DISABLE) {
+		pr_debug("CPU#%d disabling modulation\n", cpu);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h);
+	} else {
+		pr_debug("CPU#%d setting duty cycle to %d%%\n",
+			cpu, ((125 * newstate) / 10));
+		/* bits 63 - 5	: reserved
+		 * bit  4	: enable/disable
+		 * bits 3-1	: duty cycle
+		 * bit  0	: reserved
+		 */
+		l = (l & ~14);
+		l = l | (1<<4) | ((newstate & 0x7)<<1);
+		wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h);
+	}
+
+	return 0;
+}
+
+
+static struct cpufreq_frequency_table p4clockmod_table[] = {
+	{DC_RESV, CPUFREQ_ENTRY_INVALID},
+	{DC_DFLT, 0},
+	{DC_25PT, 0},
+	{DC_38PT, 0},
+	{DC_50PT, 0},
+	{DC_64PT, 0},
+	{DC_75PT, 0},
+	{DC_88PT, 0},
+	{DC_DISABLE, 0},
+	{DC_RESV, CPUFREQ_TABLE_END},
+};
+
+
+static int cpufreq_p4_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	unsigned int    newstate = DC_RESV;
+	struct cpufreq_freqs freqs;
+	int i;
+
+	if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
+				target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = cpufreq_p4_get(policy->cpu);
+	freqs.new = stock_freq * p4clockmod_table[newstate].index / 8;
+
+	if (freqs.new == freqs.old)
+		return 0;
+
+	/* notifiers */
+	for_each_cpu(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	/* run on each logical CPU,
+	 * see section 13.15.3 of IA32 Intel Architecture Software
+	 * Developer's Manual, Volume 3
+	 */
+	for_each_cpu(i, policy->cpus)
+		cpufreq_p4_setdc(i, p4clockmod_table[newstate].index);
+
+	/* notifiers */
+	for_each_cpu(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return 0;
+}
+
+
+static int cpufreq_p4_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]);
+}
+
+
+static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
+{
+	if (c->x86 == 0x06) {
+		if (cpu_has(c, X86_FEATURE_EST))
+			printk_once(KERN_WARNING PFX "Warning: EST-capable "
+			       "CPU detected. The acpi-cpufreq module offers "
+			       "voltage scaling in addition to frequency "
+			       "scaling. You should use that instead of "
+			       "p4-clockmod, if possible.\n");
+		switch (c->x86_model) {
+		case 0x0E: /* Core */
+		case 0x0F: /* Core Duo */
+		case 0x16: /* Celeron Core */
+		case 0x1C: /* Atom */
+			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+			return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
+		case 0x0D: /* Pentium M (Dothan) */
+			p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+			/* fall through */
+		case 0x09: /* Pentium M (Banias) */
+			return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
+		}
+	}
+
+	if (c->x86 != 0xF)
+		return 0;
+
+	/* on P-4s, the TSC runs with constant frequency independent whether
+	 * throttling is active or not. */
+	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
+		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
+		       "The speedstep-ich or acpi cpufreq modules offer "
+		       "voltage scaling in addition of frequency scaling. "
+		       "You should use either one instead of p4-clockmod, "
+		       "if possible.\n");
+		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
+	}
+
+	return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
+}
+
+
+
+static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
+	int cpuid = 0;
+	unsigned int i;
+
+#ifdef CONFIG_SMP
+	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+#endif
+
+	/* Errata workaround */
+	cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask;
+	switch (cpuid) {
+	case 0x0f07:
+	case 0x0f0a:
+	case 0x0f11:
+	case 0x0f12:
+		has_N44_O17_errata[policy->cpu] = 1;
+		pr_debug("has errata -- disabling low frequencies\n");
+	}
+
+	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
+	    c->x86_model < 2) {
+		/* switch to maximum frequency and measure result */
+		cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
+		recalibrate_cpu_khz();
+	}
+	/* get max frequency */
+	stock_freq = cpufreq_p4_get_frequency(c);
+	if (!stock_freq)
+		return -EINVAL;
+
+	/* table init */
+	for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
+		if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
+			p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+		else
+			p4clockmod_table[i].frequency = (stock_freq * i)/8;
+	}
+	cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
+
+	/* cpuinfo and default policy values */
+
+	/* the transition latency is set to be 1 higher than the maximum
+	 * transition latency of the ondemand governor */
+	policy->cpuinfo.transition_latency = 10000001;
+	policy->cur = stock_freq;
+
+	return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
+}
+
+
+static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int cpufreq_p4_get(unsigned int cpu)
+{
+	u32 l, h;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
+
+	if (l & 0x10) {
+		l = l >> 1;
+		l &= 0x7;
+	} else
+		l = DC_DISABLE;
+
+	if (l != DC_DISABLE)
+		return stock_freq * l / 8;
+
+	return stock_freq;
+}
+
+static struct freq_attr *p4clockmod_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver p4clockmod_driver = {
+	.verify		= cpufreq_p4_verify,
+	.target		= cpufreq_p4_target,
+	.init		= cpufreq_p4_cpu_init,
+	.exit		= cpufreq_p4_cpu_exit,
+	.get		= cpufreq_p4_get,
+	.name		= "p4-clockmod",
+	.owner		= THIS_MODULE,
+	.attr		= p4clockmod_attr,
+};
+
+
+static int __init cpufreq_p4_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	int ret;
+
+	/*
+	 * THERM_CONTROL is architectural for IA32 now, so
+	 * we can rely on the capability checks
+	 */
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return -ENODEV;
+
+	if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
+				!test_cpu_cap(c, X86_FEATURE_ACC))
+		return -ENODEV;
+
+	ret = cpufreq_register_driver(&p4clockmod_driver);
+	if (!ret)
+		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
+				"Modulation available\n");
+
+	return ret;
+}
+
+
+static void __exit cpufreq_p4_exit(void)
+{
+	cpufreq_unregister_driver(&p4clockmod_driver);
+}
+
+
+MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
+MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
+MODULE_LICENSE("GPL");
+
+late_initcall(cpufreq_p4_init);
+module_exit(cpufreq_p4_exit);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
new file mode 100644
index 000000000000..7b0603eb0129
--- /dev/null
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,621 @@
+/*
+ *  pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
+ *
+ *  Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
+ *  Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *	Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
+ *  INFRINGEMENT. See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/cpufreq.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+#include <acpi/processor.h>
+
+#define PCC_VERSION	"1.10.00"
+#define POLL_LOOPS 	300
+
+#define CMD_COMPLETE 	0x1
+#define CMD_GET_FREQ 	0x0
+#define CMD_SET_FREQ 	0x1
+
+#define BUF_SZ		4
+
+struct pcc_register_resource {
+	u8 descriptor;
+	u16 length;
+	u8 space_id;
+	u8 bit_width;
+	u8 bit_offset;
+	u8 access_size;
+	u64 address;
+} __attribute__ ((packed));
+
+struct pcc_memory_resource {
+	u8 descriptor;
+	u16 length;
+	u8 space_id;
+	u8 resource_usage;
+	u8 type_specific;
+	u64 granularity;
+	u64 minimum;
+	u64 maximum;
+	u64 translation_offset;
+	u64 address_length;
+} __attribute__ ((packed));
+
+static struct cpufreq_driver pcc_cpufreq_driver;
+
+struct pcc_header {
+	u32 signature;
+	u16 length;
+	u8 major;
+	u8 minor;
+	u32 features;
+	u16 command;
+	u16 status;
+	u32 latency;
+	u32 minimum_time;
+	u32 maximum_time;
+	u32 nominal;
+	u32 throttled_frequency;
+	u32 minimum_frequency;
+};
+
+static void __iomem *pcch_virt_addr;
+static struct pcc_header __iomem *pcch_hdr;
+
+static DEFINE_SPINLOCK(pcc_lock);
+
+static struct acpi_generic_address doorbell;
+
+static u64 doorbell_preserve;
+static u64 doorbell_write;
+
+static u8 OSC_UUID[16] = {0x9F, 0x2C, 0x9B, 0x63, 0x91, 0x70, 0x1f, 0x49,
+			  0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
+
+struct pcc_cpu {
+	u32 input_offset;
+	u32 output_offset;
+};
+
+static struct pcc_cpu __percpu *pcc_cpu_info;
+
+static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+				     policy->cpuinfo.max_freq);
+	return 0;
+}
+
+static inline void pcc_cmd(void)
+{
+	u64 doorbell_value;
+	int i;
+
+	acpi_read(&doorbell_value, &doorbell);
+	acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
+		   &doorbell);
+
+	for (i = 0; i < POLL_LOOPS; i++) {
+		if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
+			break;
+	}
+}
+
+static inline void pcc_clear_mapping(void)
+{
+	if (pcch_virt_addr)
+		iounmap(pcch_virt_addr);
+	pcch_virt_addr = NULL;
+}
+
+static unsigned int pcc_get_freq(unsigned int cpu)
+{
+	struct pcc_cpu *pcc_cpu_data;
+	unsigned int curr_freq;
+	unsigned int freq_limit;
+	u16 status;
+	u32 input_buffer;
+	u32 output_buffer;
+
+	spin_lock(&pcc_lock);
+
+	pr_debug("get: get_freq for CPU %d\n", cpu);
+	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+	input_buffer = 0x1;
+	iowrite32(input_buffer,
+			(pcch_virt_addr + pcc_cpu_data->input_offset));
+	iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
+
+	pcc_cmd();
+
+	output_buffer =
+		ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
+
+	/* Clear the input buffer - we are done with the current command */
+	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+
+	status = ioread16(&pcch_hdr->status);
+	if (status != CMD_COMPLETE) {
+		pr_debug("get: FAILED: for CPU %d, status is %d\n",
+			cpu, status);
+		goto cmd_incomplete;
+	}
+	iowrite16(0, &pcch_hdr->status);
+	curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
+			/ 100) * 1000);
+
+	pr_debug("get: SUCCESS: (virtual) output_offset for cpu %d is "
+		"0x%p, contains a value of: 0x%x. Speed is: %d MHz\n",
+		cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
+		output_buffer, curr_freq);
+
+	freq_limit = (output_buffer >> 8) & 0xff;
+	if (freq_limit != 0xff) {
+		pr_debug("get: frequency for cpu %d is being temporarily"
+			" capped at %d\n", cpu, curr_freq);
+	}
+
+	spin_unlock(&pcc_lock);
+	return curr_freq;
+
+cmd_incomplete:
+	iowrite16(0, &pcch_hdr->status);
+	spin_unlock(&pcc_lock);
+	return 0;
+}
+
+static int pcc_cpufreq_target(struct cpufreq_policy *policy,
+			      unsigned int target_freq,
+			      unsigned int relation)
+{
+	struct pcc_cpu *pcc_cpu_data;
+	struct cpufreq_freqs freqs;
+	u16 status;
+	u32 input_buffer;
+	int cpu;
+
+	spin_lock(&pcc_lock);
+	cpu = policy->cpu;
+	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+	pr_debug("target: CPU %d should go to target freq: %d "
+		"(virtual) input_offset is 0x%p\n",
+		cpu, target_freq,
+		(pcch_virt_addr + pcc_cpu_data->input_offset));
+
+	freqs.new = target_freq;
+	freqs.cpu = cpu;
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	input_buffer = 0x1 | (((target_freq * 100)
+			       / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
+	iowrite32(input_buffer,
+			(pcch_virt_addr + pcc_cpu_data->input_offset));
+	iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
+
+	pcc_cmd();
+
+	/* Clear the input buffer - we are done with the current command */
+	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+
+	status = ioread16(&pcch_hdr->status);
+	if (status != CMD_COMPLETE) {
+		pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
+			cpu, status);
+		goto cmd_incomplete;
+	}
+	iowrite16(0, &pcch_hdr->status);
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	pr_debug("target: was SUCCESSFUL for cpu %d\n", cpu);
+	spin_unlock(&pcc_lock);
+
+	return 0;
+
+cmd_incomplete:
+	iowrite16(0, &pcch_hdr->status);
+	spin_unlock(&pcc_lock);
+	return -EINVAL;
+}
+
+static int pcc_get_offset(int cpu)
+{
+	acpi_status status;
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object *pccp, *offset;
+	struct pcc_cpu *pcc_cpu_data;
+	struct acpi_processor *pr;
+	int ret = 0;
+
+	pr = per_cpu(processors, cpu);
+	pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
+
+	status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	pccp = buffer.pointer;
+	if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
+		ret = -ENODEV;
+		goto out_free;
+	};
+
+	offset = &(pccp->package.elements[0]);
+	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	pcc_cpu_data->input_offset = offset->integer.value;
+
+	offset = &(pccp->package.elements[1]);
+	if (!offset || offset->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	pcc_cpu_data->output_offset = offset->integer.value;
+
+	memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
+	memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
+
+	pr_debug("pcc_get_offset: for CPU %d: pcc_cpu_data "
+		"input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
+		cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
+out_free:
+	kfree(buffer.pointer);
+	return ret;
+}
+
+static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
+{
+	acpi_status status;
+	struct acpi_object_list input;
+	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+	union acpi_object in_params[4];
+	union acpi_object *out_obj;
+	u32 capabilities[2];
+	u32 errors;
+	u32 supported;
+	int ret = 0;
+
+	input.count = 4;
+	input.pointer = in_params;
+	in_params[0].type               = ACPI_TYPE_BUFFER;
+	in_params[0].buffer.length      = 16;
+	in_params[0].buffer.pointer     = OSC_UUID;
+	in_params[1].type               = ACPI_TYPE_INTEGER;
+	in_params[1].integer.value      = 1;
+	in_params[2].type               = ACPI_TYPE_INTEGER;
+	in_params[2].integer.value      = 2;
+	in_params[3].type               = ACPI_TYPE_BUFFER;
+	in_params[3].buffer.length      = 8;
+	in_params[3].buffer.pointer     = (u8 *)&capabilities;
+
+	capabilities[0] = OSC_QUERY_ENABLE;
+	capabilities[1] = 0x1;
+
+	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (!output.length)
+		return -ENODEV;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	supported = *((u32 *)(out_obj->buffer.pointer + 4));
+	if (!(supported & 0x1)) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	kfree(output.pointer);
+	capabilities[0] = 0x0;
+	capabilities[1] = 0x1;
+
+	status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (!output.length)
+		return -ENODEV;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
+	if (errors) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	supported = *((u32 *)(out_obj->buffer.pointer + 4));
+	if (!(supported & 0x1)) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+out_free:
+	kfree(output.pointer);
+	return ret;
+}
+
+static int __init pcc_cpufreq_probe(void)
+{
+	acpi_status status;
+	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct pcc_memory_resource *mem_resource;
+	struct pcc_register_resource *reg_resource;
+	union acpi_object *out_obj, *member;
+	acpi_handle handle, osc_handle, pcch_handle;
+	int ret = 0;
+
+	status = acpi_get_handle(NULL, "\\_SB", &handle);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	status = acpi_get_handle(handle, "PCCH", &pcch_handle);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	status = acpi_get_handle(handle, "_OSC", &osc_handle);
+	if (ACPI_SUCCESS(status)) {
+		ret = pcc_cpufreq_do_osc(&osc_handle);
+		if (ret)
+			pr_debug("probe: _OSC evaluation did not succeed\n");
+		/* Firmware's use of _OSC is optional */
+		ret = 0;
+	}
+
+	status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	out_obj = output.pointer;
+	if (out_obj->type != ACPI_TYPE_PACKAGE) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	member = &out_obj->package.elements[0];
+	if (member->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
+
+	pr_debug("probe: mem_resource descriptor: 0x%x,"
+		" length: %d, space_id: %d, resource_usage: %d,"
+		" type_specific: %d, granularity: 0x%llx,"
+		" minimum: 0x%llx, maximum: 0x%llx,"
+		" translation_offset: 0x%llx, address_length: 0x%llx\n",
+		mem_resource->descriptor, mem_resource->length,
+		mem_resource->space_id, mem_resource->resource_usage,
+		mem_resource->type_specific, mem_resource->granularity,
+		mem_resource->minimum, mem_resource->maximum,
+		mem_resource->translation_offset,
+		mem_resource->address_length);
+
+	if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
+					mem_resource->address_length);
+	if (pcch_virt_addr == NULL) {
+		pr_debug("probe: could not map shared mem region\n");
+		goto out_free;
+	}
+	pcch_hdr = pcch_virt_addr;
+
+	pr_debug("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
+	pr_debug("probe: PCCH header is at physical address: 0x%llx,"
+		" signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
+		" supported features: 0x%x, command field: 0x%x,"
+		" status field: 0x%x, nominal latency: %d us\n",
+		mem_resource->minimum, ioread32(&pcch_hdr->signature),
+		ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
+		ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
+		ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
+		ioread32(&pcch_hdr->latency));
+
+	pr_debug("probe: min time between commands: %d us,"
+		" max time between commands: %d us,"
+		" nominal CPU frequency: %d MHz,"
+		" minimum CPU frequency: %d MHz,"
+		" minimum CPU frequency without throttling: %d MHz\n",
+		ioread32(&pcch_hdr->minimum_time),
+		ioread32(&pcch_hdr->maximum_time),
+		ioread32(&pcch_hdr->nominal),
+		ioread32(&pcch_hdr->throttled_frequency),
+		ioread32(&pcch_hdr->minimum_frequency));
+
+	member = &out_obj->package.elements[1];
+	if (member->type != ACPI_TYPE_BUFFER) {
+		ret = -ENODEV;
+		goto pcch_free;
+	}
+
+	reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
+
+	doorbell.space_id = reg_resource->space_id;
+	doorbell.bit_width = reg_resource->bit_width;
+	doorbell.bit_offset = reg_resource->bit_offset;
+	doorbell.access_width = 64;
+	doorbell.address = reg_resource->address;
+
+	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
+		"bit_offset is %d, access_width is %d, address is 0x%llx\n",
+		doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
+		doorbell.access_width, reg_resource->address);
+
+	member = &out_obj->package.elements[2];
+	if (member->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto pcch_free;
+	}
+
+	doorbell_preserve = member->integer.value;
+
+	member = &out_obj->package.elements[3];
+	if (member->type != ACPI_TYPE_INTEGER) {
+		ret = -ENODEV;
+		goto pcch_free;
+	}
+
+	doorbell_write = member->integer.value;
+
+	pr_debug("probe: doorbell_preserve: 0x%llx,"
+		" doorbell_write: 0x%llx\n",
+		doorbell_preserve, doorbell_write);
+
+	pcc_cpu_info = alloc_percpu(struct pcc_cpu);
+	if (!pcc_cpu_info) {
+		ret = -ENOMEM;
+		goto pcch_free;
+	}
+
+	printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
+	       " limits: %d MHz, %d MHz\n", PCC_VERSION,
+	       ioread32(&pcch_hdr->minimum_frequency),
+	       ioread32(&pcch_hdr->nominal));
+	kfree(output.pointer);
+	return ret;
+pcch_free:
+	pcc_clear_mapping();
+out_free:
+	kfree(output.pointer);
+	return ret;
+}
+
+static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	unsigned int result = 0;
+
+	if (!pcch_virt_addr) {
+		result = -1;
+		goto out;
+	}
+
+	result = pcc_get_offset(cpu);
+	if (result) {
+		pr_debug("init: PCCP evaluation failed\n");
+		goto out;
+	}
+
+	policy->max = policy->cpuinfo.max_freq =
+		ioread32(&pcch_hdr->nominal) * 1000;
+	policy->min = policy->cpuinfo.min_freq =
+		ioread32(&pcch_hdr->minimum_frequency) * 1000;
+	policy->cur = pcc_get_freq(cpu);
+
+	if (!policy->cur) {
+		pr_debug("init: Unable to get current CPU frequency\n");
+		result = -EINVAL;
+		goto out;
+	}
+
+	pr_debug("init: policy->max is %d, policy->min is %d\n",
+		policy->max, policy->min);
+out:
+	return result;
+}
+
+static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	return 0;
+}
+
+static struct cpufreq_driver pcc_cpufreq_driver = {
+	.flags = CPUFREQ_CONST_LOOPS,
+	.get = pcc_get_freq,
+	.verify = pcc_cpufreq_verify,
+	.target = pcc_cpufreq_target,
+	.init = pcc_cpufreq_cpu_init,
+	.exit = pcc_cpufreq_cpu_exit,
+	.name = "pcc-cpufreq",
+	.owner = THIS_MODULE,
+};
+
+static int __init pcc_cpufreq_init(void)
+{
+	int ret;
+
+	if (acpi_disabled)
+		return 0;
+
+	ret = pcc_cpufreq_probe();
+	if (ret) {
+		pr_debug("pcc_cpufreq_init: PCCH evaluation failed\n");
+		return ret;
+	}
+
+	ret = cpufreq_register_driver(&pcc_cpufreq_driver);
+
+	return ret;
+}
+
+static void __exit pcc_cpufreq_exit(void)
+{
+	cpufreq_unregister_driver(&pcc_cpufreq_driver);
+
+	pcc_clear_mapping();
+
+	free_percpu(pcc_cpu_info);
+}
+
+MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
+MODULE_VERSION(PCC_VERSION);
+MODULE_DESCRIPTION("Processor Clocking Control interface driver");
+MODULE_LICENSE("GPL");
+
+late_initcall(pcc_cpufreq_init);
+module_exit(pcc_cpufreq_exit);
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
new file mode 100644
index 000000000000..b3379d6a5c57
--- /dev/null
+++ b/drivers/cpufreq/powernow-k6.c
@@ -0,0 +1,261 @@
+/*
+ *  This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
+ *  (C) 2000-2003  Dave Jones, Arjan van de Ven, Janne Pänkälä,
+ *                 Dominik Brodowski.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/ioport.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <asm/msr.h>
+
+#define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
+					   as it is unused */
+
+#define PFX "powernow-k6: "
+static unsigned int                     busfreq;   /* FSB, in 10 kHz */
+static unsigned int                     max_multiplier;
+
+
+/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */
+static struct cpufreq_frequency_table clock_ratio[] = {
+	{45,  /* 000 -> 4.5x */ 0},
+	{50,  /* 001 -> 5.0x */ 0},
+	{40,  /* 010 -> 4.0x */ 0},
+	{55,  /* 011 -> 5.5x */ 0},
+	{20,  /* 100 -> 2.0x */ 0},
+	{30,  /* 101 -> 3.0x */ 0},
+	{60,  /* 110 -> 6.0x */ 0},
+	{35,  /* 111 -> 3.5x */ 0},
+	{0, CPUFREQ_TABLE_END}
+};
+
+
+/**
+ * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier
+ *
+ *   Returns the current setting of the frequency multiplier. Core clock
+ * speed is frequency of the Front-Side Bus multiplied with this value.
+ */
+static int powernow_k6_get_cpu_multiplier(void)
+{
+	u64 invalue = 0;
+	u32 msrval;
+
+	msrval = POWERNOW_IOPORT + 0x1;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+	invalue = inl(POWERNOW_IOPORT + 0x8);
+	msrval = POWERNOW_IOPORT + 0x0;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+	return clock_ratio[(invalue >> 5)&7].index;
+}
+
+
+/**
+ * powernow_k6_set_state - set the PowerNow! multiplier
+ * @best_i: clock_ratio[best_i] is the target multiplier
+ *
+ *   Tries to change the PowerNow! multiplier
+ */
+static void powernow_k6_set_state(unsigned int best_i)
+{
+	unsigned long outvalue = 0, invalue = 0;
+	unsigned long msrval;
+	struct cpufreq_freqs freqs;
+
+	if (clock_ratio[best_i].index > max_multiplier) {
+		printk(KERN_ERR PFX "invalid target frequency\n");
+		return;
+	}
+
+	freqs.old = busfreq * powernow_k6_get_cpu_multiplier();
+	freqs.new = busfreq * clock_ratio[best_i].index;
+	freqs.cpu = 0; /* powernow-k6.c is UP only driver */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* we now need to transform best_i to the BVC format, see AMD#23446 */
+
+	outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5);
+
+	msrval = POWERNOW_IOPORT + 0x1;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
+	invalue = inl(POWERNOW_IOPORT + 0x8);
+	invalue = invalue & 0xf;
+	outvalue = outvalue | invalue;
+	outl(outvalue , (POWERNOW_IOPORT + 0x8));
+	msrval = POWERNOW_IOPORT + 0x0;
+	wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return;
+}
+
+
+/**
+ * powernow_k6_verify - verifies a new CPUfreq policy
+ * @policy: new policy
+ *
+ * Policy must be within lowest and highest possible CPU Frequency,
+ * and at least one possible state must be within min and max.
+ */
+static int powernow_k6_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &clock_ratio[0]);
+}
+
+
+/**
+ * powernow_k6_setpolicy - sets a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * sets a new CPUFreq policy
+ */
+static int powernow_k6_target(struct cpufreq_policy *policy,
+			       unsigned int target_freq,
+			       unsigned int relation)
+{
+	unsigned int newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
+				target_freq, relation, &newstate))
+		return -EINVAL;
+
+	powernow_k6_set_state(newstate);
+
+	return 0;
+}
+
+
+static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
+{
+	unsigned int i, f;
+	int result;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	/* get frequencies */
+	max_multiplier = powernow_k6_get_cpu_multiplier();
+	busfreq = cpu_khz / max_multiplier;
+
+	/* table init */
+	for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
+		f = clock_ratio[i].index;
+		if (f > max_multiplier)
+			clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
+		else
+			clock_ratio[i].frequency = busfreq * f;
+	}
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = 200000;
+	policy->cur = busfreq * max_multiplier;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
+	if (result)
+		return result;
+
+	cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu);
+
+	return 0;
+}
+
+
+static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int i;
+	for (i = 0; i < 8; i++) {
+		if (i == max_multiplier)
+			powernow_k6_set_state(i);
+	}
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int powernow_k6_get(unsigned int cpu)
+{
+	unsigned int ret;
+	ret = (busfreq * powernow_k6_get_cpu_multiplier());
+	return ret;
+}
+
+static struct freq_attr *powernow_k6_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver powernow_k6_driver = {
+	.verify		= powernow_k6_verify,
+	.target		= powernow_k6_target,
+	.init		= powernow_k6_cpu_init,
+	.exit		= powernow_k6_cpu_exit,
+	.get		= powernow_k6_get,
+	.name		= "powernow-k6",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_k6_attr,
+};
+
+
+/**
+ * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver
+ *
+ *   Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported
+ * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero
+ * on success.
+ */
+static int __init powernow_k6_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
+		((c->x86_model != 12) && (c->x86_model != 13)))
+		return -ENODEV;
+
+	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
+		printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
+		return -EIO;
+	}
+
+	if (cpufreq_register_driver(&powernow_k6_driver)) {
+		release_region(POWERNOW_IOPORT, 16);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+/**
+ * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support
+ *
+ *   Unregisters AMD K6-2+ / K6-3+ PowerNow! support.
+ */
+static void __exit powernow_k6_exit(void)
+{
+	cpufreq_unregister_driver(&powernow_k6_driver);
+	release_region(POWERNOW_IOPORT, 16);
+}
+
+
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+		"Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
+MODULE_LICENSE("GPL");
+
+module_init(powernow_k6_init);
+module_exit(powernow_k6_exit);
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
new file mode 100644
index 000000000000..d71d9f372359
--- /dev/null
+++ b/drivers/cpufreq/powernow-k7.c
@@ -0,0 +1,747 @@
+/*
+ *  AMD K7 Powernow driver.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs.
+ *  (C) 2003-2004 Dave Jones <davej@redhat.com>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ * Errata 5:
+ *  CPU may fail to execute a FID/VID change in presence of interrupt.
+ *  - We cli/sti on stepping A0 CPUs around the FID/VID transition.
+ * Errata 15:
+ *  CPU with half frequency multipliers may hang upon wakeup from disconnect.
+ *  - We disable half multipliers if ACPI is used on A0 stepping CPUs.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dmi.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <asm/timer.h>		/* Needed for recalibrate_cpu_khz() */
+#include <asm/msr.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+#include <linux/acpi.h>
+#include <acpi/processor.h>
+#endif
+
+#include "powernow-k7.h"
+
+#define PFX "powernow: "
+
+
+struct psb_s {
+	u8 signature[10];
+	u8 tableversion;
+	u8 flags;
+	u16 settlingtime;
+	u8 reserved1;
+	u8 numpst;
+};
+
+struct pst_s {
+	u32 cpuid;
+	u8 fsbspeed;
+	u8 maxfid;
+	u8 startvid;
+	u8 numpstates;
+};
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+union powernow_acpi_control_t {
+	struct {
+		unsigned long fid:5,
+			vid:5,
+			sgtc:20,
+			res1:2;
+	} bits;
+	unsigned long val;
+};
+#endif
+
+/* divide by 1000 to get VCore voltage in V. */
+static const int mobile_vid_table[32] = {
+    2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
+    1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
+    1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
+    1075, 1050, 1025, 1000, 975, 950, 925, 0,
+};
+
+/* divide by 10 to get FID. */
+static const int fid_codes[32] = {
+    110, 115, 120, 125, 50, 55, 60, 65,
+    70, 75, 80, 85, 90, 95, 100, 105,
+    30, 190, 40, 200, 130, 135, 140, 210,
+    150, 225, 160, 165, 170, 180, -1, -1,
+};
+
+/* This parameter is used in order to force ACPI instead of legacy method for
+ * configuration purpose.
+ */
+
+static int acpi_force;
+
+static struct cpufreq_frequency_table *powernow_table;
+
+static unsigned int can_scale_bus;
+static unsigned int can_scale_vid;
+static unsigned int minimum_speed = -1;
+static unsigned int maximum_speed;
+static unsigned int number_scales;
+static unsigned int fsb;
+static unsigned int latency;
+static char have_a0;
+
+static int check_fsb(unsigned int fsbspeed)
+{
+	int delta;
+	unsigned int f = fsb / 1000;
+
+	delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
+	return delta < 5;
+}
+
+static int check_powernow(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	unsigned int maxei, eax, ebx, ecx, edx;
+
+	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
+#ifdef MODULE
+		printk(KERN_INFO PFX "This module only works with "
+				"AMD K7 CPUs\n");
+#endif
+		return 0;
+	}
+
+	/* Get maximum capabilities */
+	maxei = cpuid_eax(0x80000000);
+	if (maxei < 0x80000007) {	/* Any powernow info ? */
+#ifdef MODULE
+		printk(KERN_INFO PFX "No powernow capabilities detected\n");
+#endif
+		return 0;
+	}
+
+	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
+		printk(KERN_INFO PFX "K7 660[A0] core detected, "
+				"enabling errata workarounds\n");
+		have_a0 = 1;
+	}
+
+	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+	/* Check we can actually do something before we say anything.*/
+	if (!(edx & (1 << 1 | 1 << 2)))
+		return 0;
+
+	printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+
+	if (edx & 1 << 1) {
+		printk("frequency");
+		can_scale_bus = 1;
+	}
+
+	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
+		printk(" and ");
+
+	if (edx & 1 << 2) {
+		printk("voltage");
+		can_scale_vid = 1;
+	}
+
+	printk(".\n");
+	return 1;
+}
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+static void invalidate_entry(unsigned int entry)
+{
+	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
+#endif
+
+static int get_ranges(unsigned char *pst)
+{
+	unsigned int j;
+	unsigned int speed;
+	u8 fid, vid;
+
+	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+				(number_scales + 1)), GFP_KERNEL);
+	if (!powernow_table)
+		return -ENOMEM;
+
+	for (j = 0 ; j < number_scales; j++) {
+		fid = *pst++;
+
+		powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
+		powernow_table[j].index = fid; /* lower 8 bits */
+
+		speed = powernow_table[j].frequency;
+
+		if ((fid_codes[fid] % 10) == 5) {
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+			if (have_a0 == 1)
+				invalidate_entry(j);
+#endif
+		}
+
+		if (speed < minimum_speed)
+			minimum_speed = speed;
+		if (speed > maximum_speed)
+			maximum_speed = speed;
+
+		vid = *pst++;
+		powernow_table[j].index |= (vid << 8); /* upper 8 bits */
+
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
+			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
+			 fid_codes[fid] % 10, speed/1000, vid,
+			 mobile_vid_table[vid]/1000,
+			 mobile_vid_table[vid]%1000);
+	}
+	powernow_table[number_scales].frequency = CPUFREQ_TABLE_END;
+	powernow_table[number_scales].index = 0;
+
+	return 0;
+}
+
+
+static void change_FID(int fid)
+{
+	union msr_fidvidctl fidvidctl;
+
+	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+	if (fidvidctl.bits.FID != fid) {
+		fidvidctl.bits.SGTC = latency;
+		fidvidctl.bits.FID = fid;
+		fidvidctl.bits.VIDC = 0;
+		fidvidctl.bits.FIDC = 1;
+		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+	}
+}
+
+
+static void change_VID(int vid)
+{
+	union msr_fidvidctl fidvidctl;
+
+	rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+	if (fidvidctl.bits.VID != vid) {
+		fidvidctl.bits.SGTC = latency;
+		fidvidctl.bits.VID = vid;
+		fidvidctl.bits.FIDC = 0;
+		fidvidctl.bits.VIDC = 1;
+		wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
+	}
+}
+
+
+static void change_speed(unsigned int index)
+{
+	u8 fid, vid;
+	struct cpufreq_freqs freqs;
+	union msr_fidvidstatus fidvidstatus;
+	int cfid;
+
+	/* fid are the lower 8 bits of the index we stored into
+	 * the cpufreq frequency table in powernow_decode_bios,
+	 * vid are the upper 8 bits.
+	 */
+
+	fid = powernow_table[index].index & 0xFF;
+	vid = (powernow_table[index].index & 0xFF00) >> 8;
+
+	freqs.cpu = 0;
+
+	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	cfid = fidvidstatus.bits.CFID;
+	freqs.old = fsb * fid_codes[cfid] / 10;
+
+	freqs.new = powernow_table[index].frequency;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	/* Now do the magic poking into the MSRs.  */
+
+	if (have_a0 == 1)	/* A0 errata 5 */
+		local_irq_disable();
+
+	if (freqs.old > freqs.new) {
+		/* Going down, so change FID first */
+		change_FID(fid);
+		change_VID(vid);
+	} else {
+		/* Going up, so change VID first */
+		change_VID(vid);
+		change_FID(fid);
+	}
+
+
+	if (have_a0 == 1)
+		local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+}
+
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+
+static struct acpi_processor_performance *acpi_processor_perf;
+
+static int powernow_acpi_init(void)
+{
+	int i;
+	int retval = 0;
+	union powernow_acpi_control_t pc;
+
+	if (acpi_processor_perf != NULL && powernow_table != NULL) {
+		retval = -EINVAL;
+		goto err0;
+	}
+
+	acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
+				      GFP_KERNEL);
+	if (!acpi_processor_perf) {
+		retval = -ENOMEM;
+		goto err0;
+	}
+
+	if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+								GFP_KERNEL)) {
+		retval = -ENOMEM;
+		goto err05;
+	}
+
+	if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
+		retval = -EIO;
+		goto err1;
+	}
+
+	if (acpi_processor_perf->control_register.space_id !=
+			ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	if (acpi_processor_perf->status_register.space_id !=
+			ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	number_scales = acpi_processor_perf->state_count;
+
+	if (number_scales < 2) {
+		retval = -ENODEV;
+		goto err2;
+	}
+
+	powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
+				(number_scales + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		retval = -ENOMEM;
+		goto err2;
+	}
+
+	pc.val = (unsigned long) acpi_processor_perf->states[0].control;
+	for (i = 0; i < number_scales; i++) {
+		u8 fid, vid;
+		struct acpi_processor_px *state =
+			&acpi_processor_perf->states[i];
+		unsigned int speed, speed_mhz;
+
+		pc.val = (unsigned long) state->control;
+		pr_debug("acpi:  P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
+			 i,
+			 (u32) state->core_frequency,
+			 (u32) state->power,
+			 (u32) state->transition_latency,
+			 (u32) state->control,
+			 pc.bits.sgtc);
+
+		vid = pc.bits.vid;
+		fid = pc.bits.fid;
+
+		powernow_table[i].frequency = fsb * fid_codes[fid] / 10;
+		powernow_table[i].index = fid; /* lower 8 bits */
+		powernow_table[i].index |= (vid << 8); /* upper 8 bits */
+
+		speed = powernow_table[i].frequency;
+		speed_mhz = speed / 1000;
+
+		/* processor_perflib will multiply the MHz value by 1000 to
+		 * get a KHz value (e.g. 1266000). However, powernow-k7 works
+		 * with true KHz values (e.g. 1266768). To ensure that all
+		 * powernow frequencies are available, we must ensure that
+		 * ACPI doesn't restrict them, so we round up the MHz value
+		 * to ensure that perflib's computed KHz value is greater than
+		 * or equal to powernow's KHz value.
+		 */
+		if (speed % 1000 > 0)
+			speed_mhz++;
+
+		if ((fid_codes[fid] % 10) == 5) {
+			if (have_a0 == 1)
+				invalidate_entry(i);
+		}
+
+		pr_debug("   FID: 0x%x (%d.%dx [%dMHz])  "
+			 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
+			 fid_codes[fid] % 10, speed_mhz, vid,
+			 mobile_vid_table[vid]/1000,
+			 mobile_vid_table[vid]%1000);
+
+		if (state->core_frequency != speed_mhz) {
+			state->core_frequency = speed_mhz;
+			pr_debug("   Corrected ACPI frequency to %d\n",
+				speed_mhz);
+		}
+
+		if (latency < pc.bits.sgtc)
+			latency = pc.bits.sgtc;
+
+		if (speed < minimum_speed)
+			minimum_speed = speed;
+		if (speed > maximum_speed)
+			maximum_speed = speed;
+	}
+
+	powernow_table[i].frequency = CPUFREQ_TABLE_END;
+	powernow_table[i].index = 0;
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	return 0;
+
+err2:
+	acpi_processor_unregister_performance(acpi_processor_perf, 0);
+err1:
+	free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+err05:
+	kfree(acpi_processor_perf);
+err0:
+	printk(KERN_WARNING PFX "ACPI perflib can not be used on "
+			"this platform\n");
+	acpi_processor_perf = NULL;
+	return retval;
+}
+#else
+static int powernow_acpi_init(void)
+{
+	printk(KERN_INFO PFX "no support for ACPI processor found."
+	       "  Please recompile your kernel with ACPI processor\n");
+	return -EINVAL;
+}
+#endif
+
+static void print_pst_entry(struct pst_s *pst, unsigned int j)
+{
+	pr_debug("PST:%d (@%p)\n", j, pst);
+	pr_debug(" cpuid: 0x%x  fsb: %d  maxFID: 0x%x  startvid: 0x%x\n",
+		pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
+}
+
+static int powernow_decode_bios(int maxfid, int startvid)
+{
+	struct psb_s *psb;
+	struct pst_s *pst;
+	unsigned int i, j;
+	unsigned char *p;
+	unsigned int etuple;
+	unsigned int ret;
+
+	etuple = cpuid_eax(0x80000001);
+
+	for (i = 0xC0000; i < 0xffff0 ; i += 16) {
+
+		p = phys_to_virt(i);
+
+		if (memcmp(p, "AMDK7PNOW!",  10) == 0) {
+			pr_debug("Found PSB header at %p\n", p);
+			psb = (struct psb_s *) p;
+			pr_debug("Table version: 0x%x\n", psb->tableversion);
+			if (psb->tableversion != 0x12) {
+				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
+						" supported right now\n");
+				return -ENODEV;
+			}
+
+			pr_debug("Flags: 0x%x\n", psb->flags);
+			if ((psb->flags & 1) == 0)
+				pr_debug("Mobile voltage regulator\n");
+			else
+				pr_debug("Desktop voltage regulator\n");
+
+			latency = psb->settlingtime;
+			if (latency < 100) {
+				printk(KERN_INFO PFX "BIOS set settling time "
+						"to %d microseconds. "
+						"Should be at least 100. "
+						"Correcting.\n", latency);
+				latency = 100;
+			}
+			pr_debug("Settling Time: %d microseconds.\n",
+					psb->settlingtime);
+			pr_debug("Has %d PST tables. (Only dumping ones "
+					"relevant to this CPU).\n",
+					psb->numpst);
+
+			p += sizeof(struct psb_s);
+
+			pst = (struct pst_s *) p;
+
+			for (j = 0; j < psb->numpst; j++) {
+				pst = (struct pst_s *) p;
+				number_scales = pst->numpstates;
+
+				if ((etuple == pst->cpuid) &&
+				    check_fsb(pst->fsbspeed) &&
+				    (maxfid == pst->maxfid) &&
+				    (startvid == pst->startvid)) {
+					print_pst_entry(pst, j);
+					p = (char *)pst + sizeof(struct pst_s);
+					ret = get_ranges(p);
+					return ret;
+				} else {
+					unsigned int k;
+					p = (char *)pst + sizeof(struct pst_s);
+					for (k = 0; k < number_scales; k++)
+						p += 2;
+				}
+			}
+			printk(KERN_INFO PFX "No PST tables match this cpuid "
+					"(0x%x)\n", etuple);
+			printk(KERN_INFO PFX "This is indicative of a broken "
+					"BIOS.\n");
+
+			return -EINVAL;
+		}
+		p++;
+	}
+
+	return -ENODEV;
+}
+
+
+static int powernow_target(struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate;
+
+	if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
+				relation, &newstate))
+		return -EINVAL;
+
+	change_speed(newstate);
+
+	return 0;
+}
+
+
+static int powernow_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, powernow_table);
+}
+
+/*
+ * We use the fact that the bus frequency is somehow
+ * a multiple of 100000/3 khz, then we compute sgtc according
+ * to this multiple.
+ * That way, we match more how AMD thinks all of that work.
+ * We will then get the same kind of behaviour already tested under
+ * the "well-known" other OS.
+ */
+static int __cpuinit fixup_sgtc(void)
+{
+	unsigned int sgtc;
+	unsigned int m;
+
+	m = fsb / 3333;
+	if ((m % 10) >= 5)
+		m += 5;
+
+	m /= 10;
+
+	sgtc = 100 * m * latency;
+	sgtc = sgtc / 3;
+	if (sgtc > 0xfffff) {
+		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+		sgtc = 0xfffff;
+	}
+	return sgtc;
+}
+
+static unsigned int powernow_get(unsigned int cpu)
+{
+	union msr_fidvidstatus fidvidstatus;
+	unsigned int cfid;
+
+	if (cpu)
+		return 0;
+	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+	cfid = fidvidstatus.bits.CFID;
+
+	return fsb * fid_codes[cfid] / 10;
+}
+
+
+static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)
+{
+	printk(KERN_WARNING PFX
+		"%s laptop with broken PST tables in BIOS detected.\n",
+		d->ident);
+	printk(KERN_WARNING PFX
+		"You need to downgrade to 3A21 (09/09/2002), or try a newer "
+		"BIOS than 3A71 (01/20/2003)\n");
+	printk(KERN_WARNING PFX
+		"cpufreq scaling has been disabled as a result of this.\n");
+	return 0;
+}
+
+/*
+ * Some Athlon laptops have really fucked PST tables.
+ * A BIOS update is all that can save them.
+ * Mention this, and disable cpufreq.
+ */
+static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {
+	{
+		.callback = acer_cpufreq_pst,
+		.ident = "Acer Aspire",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"),
+			DMI_MATCH(DMI_BIOS_VERSION, "3A71"),
+		},
+	},
+	{ }
+};
+
+static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
+{
+	union msr_fidvidstatus fidvidstatus;
+	int result;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
+
+	recalibrate_cpu_khz();
+
+	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
+	if (!fsb) {
+		printk(KERN_WARNING PFX "can not determine bus frequency\n");
+		return -EINVAL;
+	}
+	pr_debug("FSB: %3dMHz\n", fsb/1000);
+
+	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
+		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
+				"Trying ACPI instead\n");
+		result = powernow_acpi_init();
+	} else {
+		result = powernow_decode_bios(fidvidstatus.bits.MFID,
+				fidvidstatus.bits.SVID);
+		if (result) {
+			printk(KERN_INFO PFX "Trying ACPI perflib\n");
+			maximum_speed = 0;
+			minimum_speed = -1;
+			latency = 0;
+			result = powernow_acpi_init();
+			if (result) {
+				printk(KERN_INFO PFX
+					"ACPI and legacy methods failed\n");
+			}
+		} else {
+			/* SGTC use the bus clock as timer */
+			latency = fixup_sgtc();
+			printk(KERN_INFO PFX "SGTC: %d\n", latency);
+		}
+	}
+
+	if (result)
+		return result;
+
+	printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
+				minimum_speed/1000, maximum_speed/1000);
+
+	policy->cpuinfo.transition_latency =
+		cpufreq_scale(2000000UL, fsb, latency);
+
+	policy->cur = powernow_get(0);
+
+	cpufreq_frequency_table_get_attr(powernow_table, policy->cpu);
+
+	return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
+}
+
+static int powernow_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+	if (acpi_processor_perf) {
+		acpi_processor_unregister_performance(acpi_processor_perf, 0);
+		free_cpumask_var(acpi_processor_perf->shared_cpu_map);
+		kfree(acpi_processor_perf);
+	}
+#endif
+
+	kfree(powernow_table);
+	return 0;
+}
+
+static struct freq_attr *powernow_table_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver powernow_driver = {
+	.verify		= powernow_verify,
+	.target		= powernow_target,
+	.get		= powernow_get,
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+	.bios_limit	= acpi_processor_get_bios_limit,
+#endif
+	.init		= powernow_cpu_init,
+	.exit		= powernow_cpu_exit,
+	.name		= "powernow-k7",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_table_attr,
+};
+
+static int __init powernow_init(void)
+{
+	if (check_powernow() == 0)
+		return -ENODEV;
+	return cpufreq_register_driver(&powernow_driver);
+}
+
+
+static void __exit powernow_exit(void)
+{
+	cpufreq_unregister_driver(&powernow_driver);
+}
+
+module_param(acpi_force,  int, 0444);
+MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
+
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
+MODULE_LICENSE("GPL");
+
+late_initcall(powernow_init);
+module_exit(powernow_exit);
+
diff --git a/drivers/cpufreq/powernow-k7.h b/drivers/cpufreq/powernow-k7.h
new file mode 100644
index 000000000000..35fb4eaf6e1c
--- /dev/null
+++ b/drivers/cpufreq/powernow-k7.h
@@ -0,0 +1,43 @@
+/*
+ *  (C) 2003 Dave Jones.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  AMD-specific information
+ *
+ */
+
+union msr_fidvidctl {
+	struct {
+		unsigned FID:5,			// 4:0
+		reserved1:3,	// 7:5
+		VID:5,			// 12:8
+		reserved2:3,	// 15:13
+		FIDC:1,			// 16
+		VIDC:1,			// 17
+		reserved3:2,	// 19:18
+		FIDCHGRATIO:1,	// 20
+		reserved4:11,	// 31-21
+		SGTC:20,		// 32:51
+		reserved5:12;	// 63:52
+	} bits;
+	unsigned long long val;
+};
+
+union msr_fidvidstatus {
+	struct {
+		unsigned CFID:5,			// 4:0
+		reserved1:3,	// 7:5
+		SFID:5,			// 12:8
+		reserved2:3,	// 15:13
+		MFID:5,			// 20:16
+		reserved3:11,	// 31:21
+		CVID:5,			// 36:32
+		reserved4:3,	// 39:37
+		SVID:5,			// 44:40
+		reserved5:3,	// 47:45
+		MVID:5,			// 52:48
+		reserved6:11;	// 63:53
+	} bits;
+	unsigned long long val;
+};
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
new file mode 100644
index 000000000000..83479b6fb9a1
--- /dev/null
+++ b/drivers/cpufreq/powernow-k8.c
@@ -0,0 +1,1607 @@
+/*
+ *   (c) 2003-2010 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ *
+ *  Support : mark.langsdorf@amd.com
+ *
+ *  Based on the powernow-k7.c module written by Dave Jones.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs
+ *  (C) 2004 Dominik Brodowski <linux@brodo.de>
+ *  (C) 2004 Pavel Machek <pavel@ucw.cz>
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon datasheets & sample CPUs kindly provided by AMD.
+ *
+ *  Valuable input gratefully received from Dave Jones, Pavel Machek,
+ *  Dominik Brodowski, Jacob Shin, and others.
+ *  Originally developed by Paul Devriendt.
+ *  Processor information obtained from Chapter 9 (Power and Thermal Management)
+ *  of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
+ *  Opteron Processors" available for download from www.amd.com
+ *
+ *  Tables for specific CPUs can be inferred from
+ *     http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/cpumask.h>
+#include <linux/sched.h>	/* for current / set_cpus_allowed() */
+#include <linux/io.h>
+#include <linux/delay.h>
+
+#include <asm/msr.h>
+
+#include <linux/acpi.h>
+#include <linux/mutex.h>
+#include <acpi/processor.h>
+
+#define PFX "powernow-k8: "
+#define VERSION "version 2.20.00"
+#include "powernow-k8.h"
+#include "mperf.h"
+
+/* serialize freq changes  */
+static DEFINE_MUTEX(fidvid_mutex);
+
+static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
+
+static int cpu_family = CPU_OPTERON;
+
+/* core performance boost */
+static bool cpb_capable, cpb_enabled;
+static struct msr __percpu *msrs;
+
+static struct cpufreq_driver cpufreq_amd64_driver;
+
+#ifndef CONFIG_SMP
+static inline const struct cpumask *cpu_core_mask(int cpu)
+{
+	return cpumask_of(0);
+}
+#endif
+
+/* Return a frequency in MHz, given an input fid */
+static u32 find_freq_from_fid(u32 fid)
+{
+	return 800 + (fid * 100);
+}
+
+/* Return a frequency in KHz, given an input fid */
+static u32 find_khz_freq_from_fid(u32 fid)
+{
+	return 1000 * find_freq_from_fid(fid);
+}
+
+static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
+		u32 pstate)
+{
+	return data[pstate].frequency;
+}
+
+/* Return the vco fid for an input fid
+ *
+ * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
+ * only from corresponding high fids. This returns "high" fid corresponding to
+ * "low" one.
+ */
+static u32 convert_fid_to_vco_fid(u32 fid)
+{
+	if (fid < HI_FID_TABLE_BOTTOM)
+		return 8 + (2 * fid);
+	else
+		return fid;
+}
+
+/*
+ * Return 1 if the pending bit is set. Unless we just instructed the processor
+ * to transition to a new state, seeing this bit set is really bad news.
+ */
+static int pending_bit_stuck(void)
+{
+	u32 lo, hi;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		return 0;
+
+	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
+}
+
+/*
+ * Update the global current fid / vid values from the status msr.
+ * Returns 1 on error.
+ */
+static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
+{
+	u32 lo, hi;
+	u32 i = 0;
+
+	if (cpu_family == CPU_HW_PSTATE) {
+		rdmsr(MSR_PSTATE_STATUS, lo, hi);
+		i = lo & HW_PSTATE_MASK;
+		data->currpstate = i;
+
+		/*
+		 * a workaround for family 11h erratum 311 might cause
+		 * an "out-of-range Pstate if the core is in Pstate-0
+		 */
+		if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps))
+			data->currpstate = HW_PSTATE_0;
+
+		return 0;
+	}
+	do {
+		if (i++ > 10000) {
+			pr_debug("detected change pending stuck\n");
+			return 1;
+		}
+		rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	} while (lo & MSR_S_LO_CHANGE_PENDING);
+
+	data->currvid = hi & MSR_S_HI_CURRENT_VID;
+	data->currfid = lo & MSR_S_LO_CURRENT_FID;
+
+	return 0;
+}
+
+/* the isochronous relief time */
+static void count_off_irt(struct powernow_k8_data *data)
+{
+	udelay((1 << data->irt) * 10);
+	return;
+}
+
+/* the voltage stabilization time */
+static void count_off_vst(struct powernow_k8_data *data)
+{
+	udelay(data->vstable * VST_UNITS_20US);
+	return;
+}
+
+/* need to init the control msr to a safe value (for each cpu) */
+static void fidvid_msr_init(void)
+{
+	u32 lo, hi;
+	u8 fid, vid;
+
+	rdmsr(MSR_FIDVID_STATUS, lo, hi);
+	vid = hi & MSR_S_HI_CURRENT_VID;
+	fid = lo & MSR_S_LO_CURRENT_FID;
+	lo = fid | (vid << MSR_C_LO_VID_SHIFT);
+	hi = MSR_C_HI_STP_GNT_BENIGN;
+	pr_debug("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
+	wrmsr(MSR_FIDVID_CTL, lo, hi);
+}
+
+/* write the new fid value along with the other control fields to the msr */
+static int write_new_fid(struct powernow_k8_data *data, u32 fid)
+{
+	u32 lo;
+	u32 savevid = data->currvid;
+	u32 i = 0;
+
+	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
+		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
+		return 1;
+	}
+
+	lo = fid;
+	lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
+	lo |= MSR_C_LO_INIT_FID_VID;
+
+	pr_debug("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
+		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
+
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+		if (i++ > 100) {
+			printk(KERN_ERR PFX
+				"Hardware error - pending bit very stuck - "
+				"no further pstate changes possible\n");
+			return 1;
+		}
+	} while (query_current_values_with_pending_wait(data));
+
+	count_off_irt(data);
+
+	if (savevid != data->currvid) {
+		printk(KERN_ERR PFX
+			"vid change on fid trans, old 0x%x, new 0x%x\n",
+			savevid, data->currvid);
+		return 1;
+	}
+
+	if (fid != data->currfid) {
+		printk(KERN_ERR PFX
+			"fid trans failed, fid 0x%x, curr 0x%x\n", fid,
+			data->currfid);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Write a new vid to the hardware */
+static int write_new_vid(struct powernow_k8_data *data, u32 vid)
+{
+	u32 lo;
+	u32 savefid = data->currfid;
+	int i = 0;
+
+	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
+		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
+		return 1;
+	}
+
+	lo = data->currfid;
+	lo |= (vid << MSR_C_LO_VID_SHIFT);
+	lo |= MSR_C_LO_INIT_FID_VID;
+
+	pr_debug("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
+		vid, lo, STOP_GRANT_5NS);
+
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+		if (i++ > 100) {
+			printk(KERN_ERR PFX "internal error - pending bit "
+					"very stuck - no further pstate "
+					"changes possible\n");
+			return 1;
+		}
+	} while (query_current_values_with_pending_wait(data));
+
+	if (savefid != data->currfid) {
+		printk(KERN_ERR PFX "fid changed on vid trans, old "
+			"0x%x new 0x%x\n",
+		       savefid, data->currfid);
+		return 1;
+	}
+
+	if (vid != data->currvid) {
+		printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
+				"curr 0x%x\n",
+				vid, data->currvid);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Reduce the vid by the max of step or reqvid.
+ * Decreasing vid codes represent increasing voltages:
+ * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
+ */
+static int decrease_vid_code_by_step(struct powernow_k8_data *data,
+		u32 reqvid, u32 step)
+{
+	if ((data->currvid - reqvid) > step)
+		reqvid = data->currvid - step;
+
+	if (write_new_vid(data, reqvid))
+		return 1;
+
+	count_off_vst(data);
+
+	return 0;
+}
+
+/* Change hardware pstate by single MSR write */
+static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
+{
+	wrmsr(MSR_PSTATE_CTRL, pstate, 0);
+	data->currpstate = pstate;
+	return 0;
+}
+
+/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
+static int transition_fid_vid(struct powernow_k8_data *data,
+		u32 reqfid, u32 reqvid)
+{
+	if (core_voltage_pre_transition(data, reqvid, reqfid))
+		return 1;
+
+	if (core_frequency_transition(data, reqfid))
+		return 1;
+
+	if (core_voltage_post_transition(data, reqvid))
+		return 1;
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
+		printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
+				"curr 0x%x 0x%x\n",
+				smp_processor_id(),
+				reqfid, reqvid, data->currfid, data->currvid);
+		return 1;
+	}
+
+	pr_debug("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n",
+		smp_processor_id(), data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 1 - core voltage transition ... setup voltage */
+static int core_voltage_pre_transition(struct powernow_k8_data *data,
+		u32 reqvid, u32 reqfid)
+{
+	u32 rvosteps = data->rvo;
+	u32 savefid = data->currfid;
+	u32 maxvid, lo, rvomult = 1;
+
+	pr_debug("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
+		"reqvid 0x%x, rvo 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid, reqvid, data->rvo);
+
+	if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
+		rvomult = 2;
+	rvosteps *= rvomult;
+	rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
+	maxvid = 0x1f & (maxvid >> 16);
+	pr_debug("ph1 maxvid=0x%x\n", maxvid);
+	if (reqvid < maxvid) /* lower numbers are higher voltages */
+		reqvid = maxvid;
+
+	while (data->currvid > reqvid) {
+		pr_debug("ph1: curr 0x%x, req vid 0x%x\n",
+			data->currvid, reqvid);
+		if (decrease_vid_code_by_step(data, reqvid, data->vidmvs))
+			return 1;
+	}
+
+	while ((rvosteps > 0) &&
+			((rvomult * data->rvo + data->currvid) > reqvid)) {
+		if (data->currvid == maxvid) {
+			rvosteps = 0;
+		} else {
+			pr_debug("ph1: changing vid for rvo, req 0x%x\n",
+				data->currvid - 1);
+			if (decrease_vid_code_by_step(data, data->currvid-1, 1))
+				return 1;
+			rvosteps--;
+		}
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savefid != data->currfid) {
+		printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
+				data->currfid);
+		return 1;
+	}
+
+	pr_debug("ph1 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 2 - core frequency transition */
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
+{
+	u32 vcoreqfid, vcocurrfid, vcofiddiff;
+	u32 fid_interval, savevid = data->currvid;
+
+	if (data->currfid == reqfid) {
+		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
+				data->currfid);
+		return 0;
+	}
+
+	pr_debug("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
+		"reqfid 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid, reqfid);
+
+	vcoreqfid = convert_fid_to_vco_fid(reqfid);
+	vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+	vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+	    : vcoreqfid - vcocurrfid;
+
+	if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
+		vcofiddiff = 0;
+
+	while (vcofiddiff > 2) {
+		(data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
+
+		if (reqfid > data->currfid) {
+			if (data->currfid > LO_FID_TABLE_TOP) {
+				if (write_new_fid(data,
+						data->currfid + fid_interval))
+					return 1;
+			} else {
+				if (write_new_fid
+				    (data,
+				     2 + convert_fid_to_vco_fid(data->currfid)))
+					return 1;
+			}
+		} else {
+			if (write_new_fid(data, data->currfid - fid_interval))
+				return 1;
+		}
+
+		vcocurrfid = convert_fid_to_vco_fid(data->currfid);
+		vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
+		    : vcoreqfid - vcocurrfid;
+	}
+
+	if (write_new_fid(data, reqfid))
+		return 1;
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (data->currfid != reqfid) {
+		printk(KERN_ERR PFX
+			"ph2: mismatch, failed fid transition, "
+			"curr 0x%x, req 0x%x\n",
+			data->currfid, reqfid);
+		return 1;
+	}
+
+	if (savevid != data->currvid) {
+		printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n",
+			savevid, data->currvid);
+		return 1;
+	}
+
+	pr_debug("ph2 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+/* Phase 3 - core voltage transition flow ... jump to the final vid. */
+static int core_voltage_post_transition(struct powernow_k8_data *data,
+		u32 reqvid)
+{
+	u32 savefid = data->currfid;
+	u32 savereqvid = reqvid;
+
+	pr_debug("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n",
+		smp_processor_id(),
+		data->currfid, data->currvid);
+
+	if (reqvid != data->currvid) {
+		if (write_new_vid(data, reqvid))
+			return 1;
+
+		if (savefid != data->currfid) {
+			printk(KERN_ERR PFX
+			       "ph3: bad fid change, save 0x%x, curr 0x%x\n",
+			       savefid, data->currfid);
+			return 1;
+		}
+
+		if (data->currvid != reqvid) {
+			printk(KERN_ERR PFX
+			       "ph3: failed vid transition\n, "
+			       "req 0x%x, curr 0x%x",
+			       reqvid, data->currvid);
+			return 1;
+		}
+	}
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if (savereqvid != data->currvid) {
+		pr_debug("ph3 failed, currvid 0x%x\n", data->currvid);
+		return 1;
+	}
+
+	if (savefid != data->currfid) {
+		pr_debug("ph3 failed, currfid changed 0x%x\n",
+			data->currfid);
+		return 1;
+	}
+
+	pr_debug("ph3 complete, currfid 0x%x, currvid 0x%x\n",
+		data->currfid, data->currvid);
+
+	return 0;
+}
+
+static void check_supported_cpu(void *_rc)
+{
+	u32 eax, ebx, ecx, edx;
+	int *rc = _rc;
+
+	*rc = -ENODEV;
+
+	if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
+		return;
+
+	eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+	if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
+	    ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
+		return;
+
+	if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
+		if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
+		    ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
+			printk(KERN_INFO PFX
+				"Processor cpuid %x not supported\n", eax);
+			return;
+		}
+
+		eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+		if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
+			printk(KERN_INFO PFX
+			       "No frequency change capabilities detected\n");
+			return;
+		}
+
+		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+		if ((edx & P_STATE_TRANSITION_CAPABLE)
+			!= P_STATE_TRANSITION_CAPABLE) {
+			printk(KERN_INFO PFX
+				"Power state transitions not supported\n");
+			return;
+		}
+	} else { /* must be a HW Pstate capable processor */
+		cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+		if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
+			cpu_family = CPU_HW_PSTATE;
+		else
+			return;
+	}
+
+	*rc = 0;
+}
+
+static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
+		u8 maxvid)
+{
+	unsigned int j;
+	u8 lastfid = 0xff;
+
+	for (j = 0; j < data->numps; j++) {
+		if (pst[j].vid > LEAST_VID) {
+			printk(KERN_ERR FW_BUG PFX "vid %d invalid : 0x%x\n",
+			       j, pst[j].vid);
+			return -EINVAL;
+		}
+		if (pst[j].vid < data->rvo) {
+			/* vid + rvo >= 0 */
+			printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
+			       " %d\n", j);
+			return -ENODEV;
+		}
+		if (pst[j].vid < maxvid + data->rvo) {
+			/* vid + rvo >= maxvid */
+			printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
+			       " %d\n", j);
+			return -ENODEV;
+		}
+		if (pst[j].fid > MAX_FID) {
+			printk(KERN_ERR FW_BUG PFX "maxfid exceeded with pstate"
+			       " %d\n", j);
+			return -ENODEV;
+		}
+		if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) {
+			/* Only first fid is allowed to be in "low" range */
+			printk(KERN_ERR FW_BUG PFX "two low fids - %d : "
+			       "0x%x\n", j, pst[j].fid);
+			return -EINVAL;
+		}
+		if (pst[j].fid < lastfid)
+			lastfid = pst[j].fid;
+	}
+	if (lastfid & 1) {
+		printk(KERN_ERR FW_BUG PFX "lastfid invalid\n");
+		return -EINVAL;
+	}
+	if (lastfid > LO_FID_TABLE_TOP)
+		printk(KERN_INFO FW_BUG PFX
+			"first fid not from lo freq table\n");
+
+	return 0;
+}
+
+static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
+		unsigned int entry)
+{
+	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+}
+
+static void print_basics(struct powernow_k8_data *data)
+{
+	int j;
+	for (j = 0; j < data->numps; j++) {
+		if (data->powernow_table[j].frequency !=
+				CPUFREQ_ENTRY_INVALID) {
+			if (cpu_family == CPU_HW_PSTATE) {
+				printk(KERN_INFO PFX
+					"   %d : pstate %d (%d MHz)\n", j,
+					data->powernow_table[j].index,
+					data->powernow_table[j].frequency/1000);
+			} else {
+				printk(KERN_INFO PFX
+					"fid 0x%x (%d MHz), vid 0x%x\n",
+					data->powernow_table[j].index & 0xff,
+					data->powernow_table[j].frequency/1000,
+					data->powernow_table[j].index >> 8);
+			}
+		}
+	}
+	if (data->batps)
+		printk(KERN_INFO PFX "Only %d pstates on battery\n",
+				data->batps);
+}
+
+static u32 freq_from_fid_did(u32 fid, u32 did)
+{
+	u32 mhz = 0;
+
+	if (boot_cpu_data.x86 == 0x10)
+		mhz = (100 * (fid + 0x10)) >> did;
+	else if (boot_cpu_data.x86 == 0x11)
+		mhz = (100 * (fid + 8)) >> did;
+	else
+		BUG();
+
+	return mhz * 1000;
+}
+
+static int fill_powernow_table(struct powernow_k8_data *data,
+		struct pst_s *pst, u8 maxvid)
+{
+	struct cpufreq_frequency_table *powernow_table;
+	unsigned int j;
+
+	if (data->batps) {
+		/* use ACPI support to get full speed on mains power */
+		printk(KERN_WARNING PFX
+			"Only %d pstates usable (use ACPI driver for full "
+			"range\n", data->batps);
+		data->numps = data->batps;
+	}
+
+	for (j = 1; j < data->numps; j++) {
+		if (pst[j-1].fid >= pst[j].fid) {
+			printk(KERN_ERR PFX "PST out of sequence\n");
+			return -EINVAL;
+		}
+	}
+
+	if (data->numps < 2) {
+		printk(KERN_ERR PFX "no p states to transition\n");
+		return -ENODEV;
+	}
+
+	if (check_pst_table(data, pst, maxvid))
+		return -EINVAL;
+
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+		* (data->numps + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		printk(KERN_ERR PFX "powernow_table memory alloc failure\n");
+		return -ENOMEM;
+	}
+
+	for (j = 0; j < data->numps; j++) {
+		int freq;
+		powernow_table[j].index = pst[j].fid; /* lower 8 bits */
+		powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
+		freq = find_khz_freq_from_fid(pst[j].fid);
+		powernow_table[j].frequency = freq;
+	}
+	powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
+	powernow_table[data->numps].index = 0;
+
+	if (query_current_values_with_pending_wait(data)) {
+		kfree(powernow_table);
+		return -EIO;
+	}
+
+	pr_debug("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+	data->powernow_table = powernow_table;
+	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+		print_basics(data);
+
+	for (j = 0; j < data->numps; j++)
+		if ((pst[j].fid == data->currfid) &&
+		    (pst[j].vid == data->currvid))
+			return 0;
+
+	pr_debug("currfid/vid do not match PST, ignoring\n");
+	return 0;
+}
+
+/* Find and validate the PSB/PST table in BIOS. */
+static int find_psb_table(struct powernow_k8_data *data)
+{
+	struct psb_s *psb;
+	unsigned int i;
+	u32 mvs;
+	u8 maxvid;
+	u32 cpst = 0;
+	u32 thiscpuid;
+
+	for (i = 0xc0000; i < 0xffff0; i += 0x10) {
+		/* Scan BIOS looking for the signature. */
+		/* It can not be at ffff0 - it is too big. */
+
+		psb = phys_to_virt(i);
+		if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0)
+			continue;
+
+		pr_debug("found PSB header at 0x%p\n", psb);
+
+		pr_debug("table vers: 0x%x\n", psb->tableversion);
+		if (psb->tableversion != PSB_VERSION_1_4) {
+			printk(KERN_ERR FW_BUG PFX "PSB table is not v1.4\n");
+			return -ENODEV;
+		}
+
+		pr_debug("flags: 0x%x\n", psb->flags1);
+		if (psb->flags1) {
+			printk(KERN_ERR FW_BUG PFX "unknown flags\n");
+			return -ENODEV;
+		}
+
+		data->vstable = psb->vstable;
+		pr_debug("voltage stabilization time: %d(*20us)\n",
+				data->vstable);
+
+		pr_debug("flags2: 0x%x\n", psb->flags2);
+		data->rvo = psb->flags2 & 3;
+		data->irt = ((psb->flags2) >> 2) & 3;
+		mvs = ((psb->flags2) >> 4) & 3;
+		data->vidmvs = 1 << mvs;
+		data->batps = ((psb->flags2) >> 6) & 3;
+
+		pr_debug("ramp voltage offset: %d\n", data->rvo);
+		pr_debug("isochronous relief time: %d\n", data->irt);
+		pr_debug("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs);
+
+		pr_debug("numpst: 0x%x\n", psb->num_tables);
+		cpst = psb->num_tables;
+		if ((psb->cpuid == 0x00000fc0) ||
+		    (psb->cpuid == 0x00000fe0)) {
+			thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
+			if ((thiscpuid == 0x00000fc0) ||
+			    (thiscpuid == 0x00000fe0))
+				cpst = 1;
+		}
+		if (cpst != 1) {
+			printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
+			return -ENODEV;
+		}
+
+		data->plllock = psb->plllocktime;
+		pr_debug("plllocktime: 0x%x (units 1us)\n", psb->plllocktime);
+		pr_debug("maxfid: 0x%x\n", psb->maxfid);
+		pr_debug("maxvid: 0x%x\n", psb->maxvid);
+		maxvid = psb->maxvid;
+
+		data->numps = psb->numps;
+		pr_debug("numpstates: 0x%x\n", data->numps);
+		return fill_powernow_table(data,
+				(struct pst_s *)(psb+1), maxvid);
+	}
+	/*
+	 * If you see this message, complain to BIOS manufacturer. If
+	 * he tells you "we do not support Linux" or some similar
+	 * nonsense, remember that Windows 2000 uses the same legacy
+	 * mechanism that the old Linux PSB driver uses. Tell them it
+	 * is broken with Windows 2000.
+	 *
+	 * The reference to the AMD documentation is chapter 9 in the
+	 * BIOS and Kernel Developer's Guide, which is available on
+	 * www.amd.com
+	 */
+	printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
+	printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
+		" and Cool'N'Quiet support is enabled in BIOS setup\n");
+	return -ENODEV;
+}
+
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
+		unsigned int index)
+{
+	u64 control;
+
+	if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+		return;
+
+	control = data->acpi_data.states[index].control;
+	data->irt = (control >> IRT_SHIFT) & IRT_MASK;
+	data->rvo = (control >> RVO_SHIFT) & RVO_MASK;
+	data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+	data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK;
+	data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK);
+	data->vstable = (control >> VST_SHIFT) & VST_MASK;
+}
+
+static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
+{
+	struct cpufreq_frequency_table *powernow_table;
+	int ret_val = -ENODEV;
+	u64 control, status;
+
+	if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+		pr_debug("register performance failed: bad ACPI data\n");
+		return -EIO;
+	}
+
+	/* verify the data contained in the ACPI structures */
+	if (data->acpi_data.state_count <= 1) {
+		pr_debug("No ACPI P-States\n");
+		goto err_out;
+	}
+
+	control = data->acpi_data.control_register.space_id;
+	status = data->acpi_data.status_register.space_id;
+
+	if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+	    (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+		pr_debug("Invalid control/status registers (%llx - %llx)\n",
+			control, status);
+		goto err_out;
+	}
+
+	/* fill in data->powernow_table */
+	powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
+		* (data->acpi_data.state_count + 1)), GFP_KERNEL);
+	if (!powernow_table) {
+		pr_debug("powernow_table memory alloc failure\n");
+		goto err_out;
+	}
+
+	/* fill in data */
+	data->numps = data->acpi_data.state_count;
+	powernow_k8_acpi_pst_values(data, 0);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		ret_val = fill_powernow_table_pstate(data, powernow_table);
+	else
+		ret_val = fill_powernow_table_fidvid(data, powernow_table);
+	if (ret_val)
+		goto err_out_mem;
+
+	powernow_table[data->acpi_data.state_count].frequency =
+		CPUFREQ_TABLE_END;
+	powernow_table[data->acpi_data.state_count].index = 0;
+	data->powernow_table = powernow_table;
+
+	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+		print_basics(data);
+
+	/* notify BIOS that we exist */
+	acpi_processor_notify_smm(THIS_MODULE);
+
+	if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+		printk(KERN_ERR PFX
+				"unable to alloc powernow_k8_data cpumask\n");
+		ret_val = -ENOMEM;
+		goto err_out_mem;
+	}
+
+	return 0;
+
+err_out_mem:
+	kfree(powernow_table);
+
+err_out:
+	acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+
+	/* data->acpi_data.state_count informs us at ->exit()
+	 * whether ACPI was used */
+	data->acpi_data.state_count = 0;
+
+	return ret_val;
+}
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data,
+		struct cpufreq_frequency_table *powernow_table)
+{
+	int i;
+	u32 hi = 0, lo = 0;
+	rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
+	data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
+
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		u32 index;
+
+		index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+		if (index > data->max_hw_pstate) {
+			printk(KERN_ERR PFX "invalid pstate %d - "
+					"bad value %d.\n", i, index);
+			printk(KERN_ERR PFX "Please report to BIOS "
+					"manufacturer\n");
+			invalidate_entry(powernow_table, i);
+			continue;
+		}
+		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
+		if (!(hi & HW_PSTATE_VALID_MASK)) {
+			pr_debug("invalid pstate %d, ignoring\n", index);
+			invalidate_entry(powernow_table, i);
+			continue;
+		}
+
+		powernow_table[i].index = index;
+
+		/* Frequency may be rounded for these */
+		if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+				 || boot_cpu_data.x86 == 0x11) {
+			powernow_table[i].frequency =
+				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
+		} else
+			powernow_table[i].frequency =
+				data->acpi_data.states[i].core_frequency * 1000;
+	}
+	return 0;
+}
+
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
+		struct cpufreq_frequency_table *powernow_table)
+{
+	int i;
+
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		u32 fid;
+		u32 vid;
+		u32 freq, index;
+		u64 status, control;
+
+		if (data->exttype) {
+			status =  data->acpi_data.states[i].status;
+			fid = status & EXT_FID_MASK;
+			vid = (status >> VID_SHIFT) & EXT_VID_MASK;
+		} else {
+			control =  data->acpi_data.states[i].control;
+			fid = control & FID_MASK;
+			vid = (control >> VID_SHIFT) & VID_MASK;
+		}
+
+		pr_debug("   %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
+
+		index = fid | (vid<<8);
+		powernow_table[i].index = index;
+
+		freq = find_khz_freq_from_fid(fid);
+		powernow_table[i].frequency = freq;
+
+		/* verify frequency is OK */
+		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
+			pr_debug("invalid freq %u kHz, ignoring\n", freq);
+			invalidate_entry(powernow_table, i);
+			continue;
+		}
+
+		/* verify voltage is OK -
+		 * BIOSs are using "off" to indicate invalid */
+		if (vid == VID_OFF) {
+			pr_debug("invalid vid %u, ignoring\n", vid);
+			invalidate_entry(powernow_table, i);
+			continue;
+		}
+
+		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
+			printk(KERN_INFO PFX "invalid freq entries "
+				"%u kHz vs. %u kHz\n", freq,
+				(unsigned int)
+				(data->acpi_data.states[i].core_frequency
+				 * 1000));
+			invalidate_entry(powernow_table, i);
+			continue;
+		}
+	}
+	return 0;
+}
+
+static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
+{
+	if (data->acpi_data.state_count)
+		acpi_processor_unregister_performance(&data->acpi_data,
+				data->cpu);
+	free_cpumask_var(data->acpi_data.shared_cpu_map);
+}
+
+static int get_transition_latency(struct powernow_k8_data *data)
+{
+	int max_latency = 0;
+	int i;
+	for (i = 0; i < data->acpi_data.state_count; i++) {
+		int cur_latency = data->acpi_data.states[i].transition_latency
+			+ data->acpi_data.states[i].bus_master_latency;
+		if (cur_latency > max_latency)
+			max_latency = cur_latency;
+	}
+	if (max_latency == 0) {
+		/*
+		 * Fam 11h and later may return 0 as transition latency. This
+		 * is intended and means "very fast". While cpufreq core and
+		 * governors currently can handle that gracefully, better set it
+		 * to 1 to avoid problems in the future.
+		 */
+		if (boot_cpu_data.x86 < 0x11)
+			printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
+				"latency\n");
+		max_latency = 1;
+	}
+	/* value in usecs, needs to be in nanoseconds */
+	return 1000 * max_latency;
+}
+
+/* Take a frequency, and issue the fid/vid transition command */
+static int transition_frequency_fidvid(struct powernow_k8_data *data,
+		unsigned int index)
+{
+	u32 fid = 0;
+	u32 vid = 0;
+	int res, i;
+	struct cpufreq_freqs freqs;
+
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+	/* fid/vid correctness check for k8 */
+	/* fid are the lower 8 bits of the index we stored into
+	 * the cpufreq frequency table in find_psb_table, vid
+	 * are the upper 8 bits.
+	 */
+	fid = data->powernow_table[index].index & 0xFF;
+	vid = (data->powernow_table[index].index & 0xFF00) >> 8;
+
+	pr_debug("table matched fid 0x%x, giving vid 0x%x\n", fid, vid);
+
+	if (query_current_values_with_pending_wait(data))
+		return 1;
+
+	if ((data->currvid == vid) && (data->currfid == fid)) {
+		pr_debug("target matches current values (fid 0x%x, vid 0x%x)\n",
+			fid, vid);
+		return 0;
+	}
+
+	pr_debug("cpu %d, changing to fid 0x%x, vid 0x%x\n",
+		smp_processor_id(), fid, vid);
+	freqs.old = find_khz_freq_from_fid(data->currfid);
+	freqs.new = find_khz_freq_from_fid(fid);
+
+	for_each_cpu(i, data->available_cores) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	res = transition_fid_vid(data, fid, vid);
+	freqs.new = find_khz_freq_from_fid(data->currfid);
+
+	for_each_cpu(i, data->available_cores) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+	return res;
+}
+
+/* Take a frequency, and issue the hardware pstate transition command */
+static int transition_frequency_pstate(struct powernow_k8_data *data,
+		unsigned int index)
+{
+	u32 pstate = 0;
+	int res, i;
+	struct cpufreq_freqs freqs;
+
+	pr_debug("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+	/* get MSR index for hardware pstate transition */
+	pstate = index & HW_PSTATE_MASK;
+	if (pstate > data->max_hw_pstate)
+		return 0;
+	freqs.old = find_khz_freq_from_pstate(data->powernow_table,
+			data->currpstate);
+	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
+
+	for_each_cpu(i, data->available_cores) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	res = transition_pstate(data, pstate);
+	freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
+
+	for_each_cpu(i, data->available_cores) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+	return res;
+}
+
+/* Driver entry point to switch to the target frequency */
+static int powernowk8_target(struct cpufreq_policy *pol,
+		unsigned targfreq, unsigned relation)
+{
+	cpumask_var_t oldmask;
+	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+	u32 checkfid;
+	u32 checkvid;
+	unsigned int newstate;
+	int ret = -EIO;
+
+	if (!data)
+		return -EINVAL;
+
+	checkfid = data->currfid;
+	checkvid = data->currvid;
+
+	/* only run on specific CPU from here on. */
+	/* This is poor form: use a workqueue or smp_call_function_single */
+	if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_copy(oldmask, tsk_cpus_allowed(current));
+	set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
+
+	if (smp_processor_id() != pol->cpu) {
+		printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
+		goto err_out;
+	}
+
+	if (pending_bit_stuck()) {
+		printk(KERN_ERR PFX "failing targ, change pending bit set\n");
+		goto err_out;
+	}
+
+	pr_debug("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
+		pol->cpu, targfreq, pol->min, pol->max, relation);
+
+	if (query_current_values_with_pending_wait(data))
+		goto err_out;
+
+	if (cpu_family != CPU_HW_PSTATE) {
+		pr_debug("targ: curr fid 0x%x, vid 0x%x\n",
+		data->currfid, data->currvid);
+
+		if ((checkvid != data->currvid) ||
+		    (checkfid != data->currfid)) {
+			printk(KERN_INFO PFX
+				"error - out of sync, fix 0x%x 0x%x, "
+				"vid 0x%x 0x%x\n",
+				checkfid, data->currfid,
+				checkvid, data->currvid);
+		}
+	}
+
+	if (cpufreq_frequency_table_target(pol, data->powernow_table,
+				targfreq, relation, &newstate))
+		goto err_out;
+
+	mutex_lock(&fidvid_mutex);
+
+	powernow_k8_acpi_pst_values(data, newstate);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		ret = transition_frequency_pstate(data, newstate);
+	else
+		ret = transition_frequency_fidvid(data, newstate);
+	if (ret) {
+		printk(KERN_ERR PFX "transition frequency failed\n");
+		ret = 1;
+		mutex_unlock(&fidvid_mutex);
+		goto err_out;
+	}
+	mutex_unlock(&fidvid_mutex);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+				newstate);
+	else
+		pol->cur = find_khz_freq_from_fid(data->currfid);
+	ret = 0;
+
+err_out:
+	set_cpus_allowed_ptr(current, oldmask);
+	free_cpumask_var(oldmask);
+	return ret;
+}
+
+/* Driver entry point to verify the policy and range of frequencies */
+static int powernowk8_verify(struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+
+	if (!data)
+		return -EINVAL;
+
+	return cpufreq_frequency_table_verify(pol, data->powernow_table);
+}
+
+struct init_on_cpu {
+	struct powernow_k8_data *data;
+	int rc;
+};
+
+static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu)
+{
+	struct init_on_cpu *init_on_cpu = _init_on_cpu;
+
+	if (pending_bit_stuck()) {
+		printk(KERN_ERR PFX "failing init, change pending bit set\n");
+		init_on_cpu->rc = -ENODEV;
+		return;
+	}
+
+	if (query_current_values_with_pending_wait(init_on_cpu->data)) {
+		init_on_cpu->rc = -ENODEV;
+		return;
+	}
+
+	if (cpu_family == CPU_OPTERON)
+		fidvid_msr_init();
+
+	init_on_cpu->rc = 0;
+}
+
+/* per CPU init entry point to the driver */
+static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
+{
+	static const char ACPI_PSS_BIOS_BUG_MSG[] =
+		KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n"
+		FW_BUG PFX "Try again with latest BIOS.\n";
+	struct powernow_k8_data *data;
+	struct init_on_cpu init_on_cpu;
+	int rc;
+	struct cpuinfo_x86 *c = &cpu_data(pol->cpu);
+
+	if (!cpu_online(pol->cpu))
+		return -ENODEV;
+
+	smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1);
+	if (rc)
+		return -ENODEV;
+
+	data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
+	if (!data) {
+		printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
+		return -ENOMEM;
+	}
+
+	data->cpu = pol->cpu;
+	data->currpstate = HW_PSTATE_INVALID;
+
+	if (powernow_k8_cpu_init_acpi(data)) {
+		/*
+		 * Use the PSB BIOS structure. This is only available on
+		 * an UP version, and is deprecated by AMD.
+		 */
+		if (num_online_cpus() != 1) {
+			printk_once(ACPI_PSS_BIOS_BUG_MSG);
+			goto err_out;
+		}
+		if (pol->cpu != 0) {
+			printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
+			       "CPU other than CPU0. Complain to your BIOS "
+			       "vendor.\n");
+			goto err_out;
+		}
+		rc = find_psb_table(data);
+		if (rc)
+			goto err_out;
+
+		/* Take a crude guess here.
+		 * That guess was in microseconds, so multiply with 1000 */
+		pol->cpuinfo.transition_latency = (
+			 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
+			 ((1 << data->irt) * 30)) * 1000;
+	} else /* ACPI _PSS objects available */
+		pol->cpuinfo.transition_latency = get_transition_latency(data);
+
+	/* only run on specific CPU from here on */
+	init_on_cpu.data = data;
+	smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu,
+				 &init_on_cpu, 1);
+	rc = init_on_cpu.rc;
+	if (rc != 0)
+		goto err_out_exit_acpi;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
+	else
+		cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
+	data->available_cores = pol->cpus;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		pol->cur = find_khz_freq_from_pstate(data->powernow_table,
+				data->currpstate);
+	else
+		pol->cur = find_khz_freq_from_fid(data->currfid);
+	pr_debug("policy current frequency %d kHz\n", pol->cur);
+
+	/* min/max the cpu is capable of */
+	if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) {
+		printk(KERN_ERR FW_BUG PFX "invalid powernow_table\n");
+		powernow_k8_cpu_exit_acpi(data);
+		kfree(data->powernow_table);
+		kfree(data);
+		return -EINVAL;
+	}
+
+	/* Check for APERF/MPERF support in hardware */
+	if (cpu_has(c, X86_FEATURE_APERFMPERF))
+		cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf;
+
+	cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
+
+	if (cpu_family == CPU_HW_PSTATE)
+		pr_debug("cpu_init done, current pstate 0x%x\n",
+				data->currpstate);
+	else
+		pr_debug("cpu_init done, current fid 0x%x, vid 0x%x\n",
+			data->currfid, data->currvid);
+
+	per_cpu(powernow_data, pol->cpu) = data;
+
+	return 0;
+
+err_out_exit_acpi:
+	powernow_k8_cpu_exit_acpi(data);
+
+err_out:
+	kfree(data);
+	return -ENODEV;
+}
+
+static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
+{
+	struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
+
+	if (!data)
+		return -EINVAL;
+
+	powernow_k8_cpu_exit_acpi(data);
+
+	cpufreq_frequency_table_put_attr(pol->cpu);
+
+	kfree(data->powernow_table);
+	kfree(data);
+	per_cpu(powernow_data, pol->cpu) = NULL;
+
+	return 0;
+}
+
+static void query_values_on_cpu(void *_err)
+{
+	int *err = _err;
+	struct powernow_k8_data *data = __this_cpu_read(powernow_data);
+
+	*err = query_current_values_with_pending_wait(data);
+}
+
+static unsigned int powernowk8_get(unsigned int cpu)
+{
+	struct powernow_k8_data *data = per_cpu(powernow_data, cpu);
+	unsigned int khz = 0;
+	int err;
+
+	if (!data)
+		return 0;
+
+	smp_call_function_single(cpu, query_values_on_cpu, &err, true);
+	if (err)
+		goto out;
+
+	if (cpu_family == CPU_HW_PSTATE)
+		khz = find_khz_freq_from_pstate(data->powernow_table,
+						data->currpstate);
+	else
+		khz = find_khz_freq_from_fid(data->currfid);
+
+
+out:
+	return khz;
+}
+
+static void _cpb_toggle_msrs(bool t)
+{
+	int cpu;
+
+	get_online_cpus();
+
+	rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+	for_each_cpu(cpu, cpu_online_mask) {
+		struct msr *reg = per_cpu_ptr(msrs, cpu);
+		if (t)
+			reg->l &= ~BIT(25);
+		else
+			reg->l |= BIT(25);
+	}
+	wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+	put_online_cpus();
+}
+
+/*
+ * Switch on/off core performance boosting.
+ *
+ * 0=disable
+ * 1=enable.
+ */
+static void cpb_toggle(bool t)
+{
+	if (!cpb_capable)
+		return;
+
+	if (t && !cpb_enabled) {
+		cpb_enabled = true;
+		_cpb_toggle_msrs(t);
+		printk(KERN_INFO PFX "Core Boosting enabled.\n");
+	} else if (!t && cpb_enabled) {
+		cpb_enabled = false;
+		_cpb_toggle_msrs(t);
+		printk(KERN_INFO PFX "Core Boosting disabled.\n");
+	}
+}
+
+static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf,
+				 size_t count)
+{
+	int ret = -EINVAL;
+	unsigned long val = 0;
+
+	ret = strict_strtoul(buf, 10, &val);
+	if (!ret && (val == 0 || val == 1) && cpb_capable)
+		cpb_toggle(val);
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf)
+{
+	return sprintf(buf, "%u\n", cpb_enabled);
+}
+
+#define define_one_rw(_name) \
+static struct freq_attr _name = \
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+define_one_rw(cpb);
+
+static struct freq_attr *powernow_k8_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	&cpb,
+	NULL,
+};
+
+static struct cpufreq_driver cpufreq_amd64_driver = {
+	.verify		= powernowk8_verify,
+	.target		= powernowk8_target,
+	.bios_limit	= acpi_processor_get_bios_limit,
+	.init		= powernowk8_cpu_init,
+	.exit		= __devexit_p(powernowk8_cpu_exit),
+	.get		= powernowk8_get,
+	.name		= "powernow-k8",
+	.owner		= THIS_MODULE,
+	.attr		= powernow_k8_attr,
+};
+
+/*
+ * Clear the boost-disable flag on the CPU_DOWN path so that this cpu
+ * cannot block the remaining ones from boosting. On the CPU_UP path we
+ * simply keep the boost-disable flag in sync with the current global
+ * state.
+ */
+static int cpb_notify(struct notifier_block *nb, unsigned long action,
+		      void *hcpu)
+{
+	unsigned cpu = (long)hcpu;
+	u32 lo, hi;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+
+		if (!cpb_enabled) {
+			rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+			lo |= BIT(25);
+			wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
+		}
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi);
+		lo &= ~BIT(25);
+		wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpb_nb = {
+	.notifier_call		= cpb_notify,
+};
+
+/* driver entry point for init */
+static int __cpuinit powernowk8_init(void)
+{
+	unsigned int i, supported_cpus = 0, cpu;
+	int rv;
+
+	for_each_online_cpu(i) {
+		int rc;
+		smp_call_function_single(i, check_supported_cpu, &rc, 1);
+		if (rc == 0)
+			supported_cpus++;
+	}
+
+	if (supported_cpus != num_online_cpus())
+		return -ENODEV;
+
+	printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n",
+		num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus);
+
+	if (boot_cpu_has(X86_FEATURE_CPB)) {
+
+		cpb_capable = true;
+
+		msrs = msrs_alloc();
+		if (!msrs) {
+			printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
+			return -ENOMEM;
+		}
+
+		register_cpu_notifier(&cpb_nb);
+
+		rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
+
+		for_each_cpu(cpu, cpu_online_mask) {
+			struct msr *reg = per_cpu_ptr(msrs, cpu);
+			cpb_enabled |= !(!!(reg->l & BIT(25)));
+		}
+
+		printk(KERN_INFO PFX "Core Performance Boosting: %s.\n",
+			(cpb_enabled ? "on" : "off"));
+	}
+
+	rv = cpufreq_register_driver(&cpufreq_amd64_driver);
+	if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
+		unregister_cpu_notifier(&cpb_nb);
+		msrs_free(msrs);
+		msrs = NULL;
+	}
+	return rv;
+}
+
+/* driver entry point for term */
+static void __exit powernowk8_exit(void)
+{
+	pr_debug("exit\n");
+
+	if (boot_cpu_has(X86_FEATURE_CPB)) {
+		msrs_free(msrs);
+		msrs = NULL;
+
+		unregister_cpu_notifier(&cpb_nb);
+	}
+
+	cpufreq_unregister_driver(&cpufreq_amd64_driver);
+}
+
+MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
+		"Mark Langsdorf <mark.langsdorf@amd.com>");
+MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
+MODULE_LICENSE("GPL");
+
+late_initcall(powernowk8_init);
+module_exit(powernowk8_exit);
diff --git a/drivers/cpufreq/powernow-k8.h b/drivers/cpufreq/powernow-k8.h
new file mode 100644
index 000000000000..3744d26cdc2b
--- /dev/null
+++ b/drivers/cpufreq/powernow-k8.h
@@ -0,0 +1,222 @@
+/*
+ *  (c) 2003-2006 Advanced Micro Devices, Inc.
+ *  Your use of this code is subject to the terms and conditions of the
+ *  GNU general public license version 2. See "COPYING" or
+ *  http://www.gnu.org/licenses/gpl.html
+ */
+
+enum pstate {
+	HW_PSTATE_INVALID = 0xff,
+	HW_PSTATE_0 = 0,
+	HW_PSTATE_1 = 1,
+	HW_PSTATE_2 = 2,
+	HW_PSTATE_3 = 3,
+	HW_PSTATE_4 = 4,
+	HW_PSTATE_5 = 5,
+	HW_PSTATE_6 = 6,
+	HW_PSTATE_7 = 7,
+};
+
+struct powernow_k8_data {
+	unsigned int cpu;
+
+	u32 numps;  /* number of p-states */
+	u32 batps;  /* number of p-states supported on battery */
+	u32 max_hw_pstate; /* maximum legal hardware pstate */
+
+	/* these values are constant when the PSB is used to determine
+	 * vid/fid pairings, but are modified during the ->target() call
+	 * when ACPI is used */
+	u32 rvo;     /* ramp voltage offset */
+	u32 irt;     /* isochronous relief time */
+	u32 vidmvs;  /* usable value calculated from mvs */
+	u32 vstable; /* voltage stabilization time, units 20 us */
+	u32 plllock; /* pll lock time, units 1 us */
+        u32 exttype; /* extended interface = 1 */
+
+	/* keep track of the current fid / vid or pstate */
+	u32 currvid;
+	u32 currfid;
+	enum pstate currpstate;
+
+	/* the powernow_table includes all frequency and vid/fid pairings:
+	 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
+	 * frequency is in kHz */
+	struct cpufreq_frequency_table  *powernow_table;
+
+	/* the acpi table needs to be kept. it's only available if ACPI was
+	 * used to determine valid frequency/vid/fid states */
+	struct acpi_processor_performance acpi_data;
+
+	/* we need to keep track of associated cores, but let cpufreq
+	 * handle hotplug events - so just point at cpufreq pol->cpus
+	 * structure */
+	struct cpumask *available_cores;
+};
+
+/* processor's cpuid instruction support */
+#define CPUID_PROCESSOR_SIGNATURE	1	/* function 1 */
+#define CPUID_XFAM			0x0ff00000	/* extended family */
+#define CPUID_XFAM_K8			0
+#define CPUID_XMOD			0x000f0000	/* extended model */
+#define CPUID_XMOD_REV_MASK		0x000c0000
+#define CPUID_XFAM_10H			0x00100000	/* family 0x10 */
+#define CPUID_USE_XFAM_XMOD		0x00000f00
+#define CPUID_GET_MAX_CAPABILITIES	0x80000000
+#define CPUID_FREQ_VOLT_CAPABILITIES	0x80000007
+#define P_STATE_TRANSITION_CAPABLE	6
+
+/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For     */
+/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and   */
+/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */
+/* the register number is placed in ecx, and the data is returned in edx:eax. */
+
+#define MSR_FIDVID_CTL      0xc0010041
+#define MSR_FIDVID_STATUS   0xc0010042
+
+/* Field definitions within the FID VID Low Control MSR : */
+#define MSR_C_LO_INIT_FID_VID     0x00010000
+#define MSR_C_LO_NEW_VID          0x00003f00
+#define MSR_C_LO_NEW_FID          0x0000003f
+#define MSR_C_LO_VID_SHIFT        8
+
+/* Field definitions within the FID VID High Control MSR : */
+#define MSR_C_HI_STP_GNT_TO	  0x000fffff
+
+/* Field definitions within the FID VID Low Status MSR : */
+#define MSR_S_LO_CHANGE_PENDING   0x80000000   /* cleared when completed */
+#define MSR_S_LO_MAX_RAMP_VID     0x3f000000
+#define MSR_S_LO_MAX_FID          0x003f0000
+#define MSR_S_LO_START_FID        0x00003f00
+#define MSR_S_LO_CURRENT_FID      0x0000003f
+
+/* Field definitions within the FID VID High Status MSR : */
+#define MSR_S_HI_MIN_WORKING_VID  0x3f000000
+#define MSR_S_HI_MAX_WORKING_VID  0x003f0000
+#define MSR_S_HI_START_VID        0x00003f00
+#define MSR_S_HI_CURRENT_VID      0x0000003f
+#define MSR_C_HI_STP_GNT_BENIGN	  0x00000001
+
+
+/* Hardware Pstate _PSS and MSR definitions */
+#define USE_HW_PSTATE		0x00000080
+#define HW_PSTATE_MASK 		0x00000007
+#define HW_PSTATE_VALID_MASK 	0x80000000
+#define HW_PSTATE_MAX_MASK	0x000000f0
+#define HW_PSTATE_MAX_SHIFT	4
+#define MSR_PSTATE_DEF_BASE 	0xc0010064 /* base of Pstate MSRs */
+#define MSR_PSTATE_STATUS 	0xc0010063 /* Pstate Status MSR */
+#define MSR_PSTATE_CTRL 	0xc0010062 /* Pstate control MSR */
+#define MSR_PSTATE_CUR_LIMIT	0xc0010061 /* pstate current limit MSR */
+
+/* define the two driver architectures */
+#define CPU_OPTERON 0
+#define CPU_HW_PSTATE 1
+
+
+/*
+ * There are restrictions frequencies have to follow:
+ * - only 1 entry in the low fid table ( <=1.4GHz )
+ * - lowest entry in the high fid table must be >= 2 * the entry in the
+ *   low fid table
+ * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry
+ *   in the low fid table
+ * - the parts can only step at <= 200 MHz intervals, odd fid values are
+ *   supported in revision G and later revisions.
+ * - lowest frequency must be >= interprocessor hypertransport link speed
+ *   (only applies to MP systems obviously)
+ */
+
+/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */
+#define LO_FID_TABLE_TOP     7	/* fid values marking the boundary    */
+#define HI_FID_TABLE_BOTTOM  8	/* between the low and high tables    */
+
+#define LO_VCOFREQ_TABLE_TOP    1400	/* corresponding vco frequency values */
+#define HI_VCOFREQ_TABLE_BOTTOM 1600
+
+#define MIN_FREQ_RESOLUTION  200 /* fids jump by 2 matching freq jumps by 200 */
+
+#define MAX_FID 0x2a	/* Spec only gives FID values as far as 5 GHz */
+#define LEAST_VID 0x3e	/* Lowest (numerically highest) useful vid value */
+
+#define MIN_FREQ 800	/* Min and max freqs, per spec */
+#define MAX_FREQ 5000
+
+#define INVALID_FID_MASK 0xffffffc0  /* not a valid fid if these bits are set */
+#define INVALID_VID_MASK 0xffffffc0  /* not a valid vid if these bits are set */
+
+#define VID_OFF 0x3f
+
+#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */
+
+#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */
+
+#define MAXIMUM_VID_STEPS 1  /* Current cpus only allow a single step of 25mV */
+#define VST_UNITS_20US 20   /* Voltage Stabilization Time is in units of 20us */
+
+/*
+ * Most values of interest are encoded in a single field of the _PSS
+ * entries: the "control" value.
+ */
+
+#define IRT_SHIFT      30
+#define RVO_SHIFT      28
+#define EXT_TYPE_SHIFT 27
+#define PLL_L_SHIFT    20
+#define MVS_SHIFT      18
+#define VST_SHIFT      11
+#define VID_SHIFT       6
+#define IRT_MASK        3
+#define RVO_MASK        3
+#define EXT_TYPE_MASK   1
+#define PLL_L_MASK   0x7f
+#define MVS_MASK        3
+#define VST_MASK     0x7f
+#define VID_MASK     0x1f
+#define FID_MASK     0x1f
+#define EXT_VID_MASK 0x3f
+#define EXT_FID_MASK 0x3f
+
+
+/*
+ * Version 1.4 of the PSB table. This table is constructed by BIOS and is
+ * to tell the OS's power management driver which VIDs and FIDs are
+ * supported by this particular processor.
+ * If the data in the PSB / PST is wrong, then this driver will program the
+ * wrong values into hardware, which is very likely to lead to a crash.
+ */
+
+#define PSB_ID_STRING      "AMDK7PNOW!"
+#define PSB_ID_STRING_LEN  10
+
+#define PSB_VERSION_1_4  0x14
+
+struct psb_s {
+	u8 signature[10];
+	u8 tableversion;
+	u8 flags1;
+	u16 vstable;
+	u8 flags2;
+	u8 num_tables;
+	u32 cpuid;
+	u8 plllocktime;
+	u8 maxfid;
+	u8 maxvid;
+	u8 numps;
+};
+
+/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */
+struct pst_s {
+	u8 fid;
+	u8 vid;
+};
+
+static int core_voltage_pre_transition(struct powernow_k8_data *data,
+	u32 reqvid, u32 regfid);
+static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
+static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
+
+static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
new file mode 100644
index 000000000000..1e205e6b1727
--- /dev/null
+++ b/drivers/cpufreq/sc520_freq.c
@@ -0,0 +1,192 @@
+/*
+ *	sc520_freq.c: cpufreq driver for the AMD Elan sc520
+ *
+ *	Copyright (C) 2005 Sean Young <sean@mess.org>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Based on elanfreq.c
+ *
+ *	2005-03-30: - initial revision
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/delay.h>
+#include <linux/cpufreq.h>
+#include <linux/timex.h>
+#include <linux/io.h>
+
+#include <asm/msr.h>
+
+#define MMCR_BASE	0xfffef000	/* The default base address */
+#define OFFS_CPUCTL	0x2   /* CPU Control Register */
+
+static __u8 __iomem *cpuctl;
+
+#define PFX "sc520_freq: "
+
+static struct cpufreq_frequency_table sc520_freq_table[] = {
+	{0x01,	100000},
+	{0x02,	133000},
+	{0,	CPUFREQ_TABLE_END},
+};
+
+static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
+{
+	u8 clockspeed_reg = *cpuctl;
+
+	switch (clockspeed_reg & 0x03) {
+	default:
+		printk(KERN_ERR PFX "error: cpuctl register has unexpected "
+				"value %02x\n", clockspeed_reg);
+	case 0x01:
+		return 100000;
+	case 0x02:
+		return 133000;
+	}
+}
+
+static void sc520_freq_set_cpu_state(unsigned int state)
+{
+
+	struct cpufreq_freqs	freqs;
+	u8 clockspeed_reg;
+
+	freqs.old = sc520_freq_get_cpu_frequency(0);
+	freqs.new = sc520_freq_table[state].frequency;
+	freqs.cpu = 0; /* AMD Elan is UP */
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+
+	pr_debug("attempting to set frequency to %i kHz\n",
+			sc520_freq_table[state].frequency);
+
+	local_irq_disable();
+
+	clockspeed_reg = *cpuctl & ~0x03;
+	*cpuctl = clockspeed_reg | sc520_freq_table[state].index;
+
+	local_irq_enable();
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+};
+
+static int sc520_freq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
+}
+
+static int sc520_freq_target(struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, sc520_freq_table,
+				target_freq, relation, &newstate))
+		return -EINVAL;
+
+	sc520_freq_set_cpu_state(newstate);
+
+	return 0;
+}
+
+
+/*
+ *	Module init and exit code
+ */
+
+static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	int result;
+
+	/* capability check */
+	if (c->x86_vendor != X86_VENDOR_AMD ||
+	    c->x86 != 4 || c->x86_model != 9)
+		return -ENODEV;
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = 1000000; /* 1ms */
+	policy->cur = sc520_freq_get_cpu_frequency(0);
+
+	result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
+	if (result)
+		return result;
+
+	cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
+
+	return 0;
+}
+
+
+static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+
+static struct freq_attr *sc520_freq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver sc520_freq_driver = {
+	.get	= sc520_freq_get_cpu_frequency,
+	.verify	= sc520_freq_verify,
+	.target	= sc520_freq_target,
+	.init	= sc520_freq_cpu_init,
+	.exit	= sc520_freq_cpu_exit,
+	.name	= "sc520_freq",
+	.owner	= THIS_MODULE,
+	.attr	= sc520_freq_attr,
+};
+
+
+static int __init sc520_freq_init(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	int err;
+
+	/* Test if we have the right hardware */
+	if (c->x86_vendor != X86_VENDOR_AMD ||
+	    c->x86 != 4 || c->x86_model != 9) {
+		pr_debug("no Elan SC520 processor found!\n");
+		return -ENODEV;
+	}
+	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
+	if (!cpuctl) {
+		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
+		return -ENOMEM;
+	}
+
+	err = cpufreq_register_driver(&sc520_freq_driver);
+	if (err)
+		iounmap(cpuctl);
+
+	return err;
+}
+
+
+static void __exit sc520_freq_exit(void)
+{
+	cpufreq_unregister_driver(&sc520_freq_driver);
+	iounmap(cpuctl);
+}
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sean Young <sean@mess.org>");
+MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU");
+
+module_init(sc520_freq_init);
+module_exit(sc520_freq_exit);
+
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
new file mode 100644
index 000000000000..6ea3455def21
--- /dev/null
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -0,0 +1,633 @@
+/*
+ * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
+ * M (part of the Centrino chipset).
+ *
+ * Since the original Pentium M, most new Intel CPUs support Enhanced
+ * SpeedStep.
+ *
+ * Despite the "SpeedStep" in the name, this is almost entirely unlike
+ * traditional SpeedStep.
+ *
+ * Modelled on speedstep.c
+ *
+ * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/sched.h>	/* current */
+#include <linux/delay.h>
+#include <linux/compiler.h>
+#include <linux/gfp.h>
+
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+
+#define PFX		"speedstep-centrino: "
+#define MAINTAINER	"cpufreq@vger.kernel.org"
+
+#define INTEL_MSR_RANGE	(0xffff)
+
+struct cpu_id
+{
+	__u8	x86;            /* CPU family */
+	__u8	x86_model;	/* model */
+	__u8	x86_mask;	/* stepping */
+};
+
+enum {
+	CPU_BANIAS,
+	CPU_DOTHAN_A1,
+	CPU_DOTHAN_A2,
+	CPU_DOTHAN_B0,
+	CPU_MP4HT_D0,
+	CPU_MP4HT_E0,
+};
+
+static const struct cpu_id cpu_ids[] = {
+	[CPU_BANIAS]	= { 6,  9, 5 },
+	[CPU_DOTHAN_A1]	= { 6, 13, 1 },
+	[CPU_DOTHAN_A2]	= { 6, 13, 2 },
+	[CPU_DOTHAN_B0]	= { 6, 13, 6 },
+	[CPU_MP4HT_D0]	= {15,  3, 4 },
+	[CPU_MP4HT_E0]	= {15,  4, 1 },
+};
+#define N_IDS	ARRAY_SIZE(cpu_ids)
+
+struct cpu_model
+{
+	const struct cpu_id *cpu_id;
+	const char	*model_name;
+	unsigned	max_freq; /* max clock in kHz */
+
+	struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */
+};
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+				  const struct cpu_id *x);
+
+/* Operating points for current CPU */
+static DEFINE_PER_CPU(struct cpu_model *, centrino_model);
+static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu);
+
+static struct cpufreq_driver centrino_driver;
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE
+
+/* Computes the correct form for IA32_PERF_CTL MSR for a particular
+   frequency/voltage operating point; frequency in MHz, volts in mV.
+   This is stored as "index" in the structure. */
+#define OP(mhz, mv)							\
+	{								\
+		.frequency = (mhz) * 1000,				\
+		.index = (((mhz)/100) << 8) | ((mv - 700) / 16)		\
+	}
+
+/*
+ * These voltage tables were derived from the Intel Pentium M
+ * datasheet, document 25261202.pdf, Table 5.  I have verified they
+ * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium
+ * M.
+ */
+
+/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */
+static struct cpufreq_frequency_table banias_900[] =
+{
+	OP(600,  844),
+	OP(800,  988),
+	OP(900, 1004),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */
+static struct cpufreq_frequency_table banias_1000[] =
+{
+	OP(600,   844),
+	OP(800,   972),
+	OP(900,   988),
+	OP(1000, 1004),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */
+static struct cpufreq_frequency_table banias_1100[] =
+{
+	OP( 600,  956),
+	OP( 800, 1020),
+	OP( 900, 1100),
+	OP(1000, 1164),
+	OP(1100, 1180),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+
+/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */
+static struct cpufreq_frequency_table banias_1200[] =
+{
+	OP( 600,  956),
+	OP( 800, 1004),
+	OP( 900, 1020),
+	OP(1000, 1100),
+	OP(1100, 1164),
+	OP(1200, 1180),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.30GHz (Banias) */
+static struct cpufreq_frequency_table banias_1300[] =
+{
+	OP( 600,  956),
+	OP( 800, 1260),
+	OP(1000, 1292),
+	OP(1200, 1356),
+	OP(1300, 1388),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.40GHz (Banias) */
+static struct cpufreq_frequency_table banias_1400[] =
+{
+	OP( 600,  956),
+	OP( 800, 1180),
+	OP(1000, 1308),
+	OP(1200, 1436),
+	OP(1400, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.50GHz (Banias) */
+static struct cpufreq_frequency_table banias_1500[] =
+{
+	OP( 600,  956),
+	OP( 800, 1116),
+	OP(1000, 1228),
+	OP(1200, 1356),
+	OP(1400, 1452),
+	OP(1500, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.60GHz (Banias) */
+static struct cpufreq_frequency_table banias_1600[] =
+{
+	OP( 600,  956),
+	OP( 800, 1036),
+	OP(1000, 1164),
+	OP(1200, 1276),
+	OP(1400, 1420),
+	OP(1600, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+
+/* Intel Pentium M processor 1.70GHz (Banias) */
+static struct cpufreq_frequency_table banias_1700[] =
+{
+	OP( 600,  956),
+	OP( 800, 1004),
+	OP(1000, 1116),
+	OP(1200, 1228),
+	OP(1400, 1308),
+	OP(1700, 1484),
+	{ .frequency = CPUFREQ_TABLE_END }
+};
+#undef OP
+
+#define _BANIAS(cpuid, max, name)	\
+{	.cpu_id		= cpuid,	\
+	.model_name	= "Intel(R) Pentium(R) M processor " name "MHz", \
+	.max_freq	= (max)*1000,	\
+	.op_points	= banias_##max,	\
+}
+#define BANIAS(max)	_BANIAS(&cpu_ids[CPU_BANIAS], max, #max)
+
+/* CPU models, their operating frequency range, and freq/voltage
+   operating points */
+static struct cpu_model models[] =
+{
+	_BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"),
+	BANIAS(1000),
+	BANIAS(1100),
+	BANIAS(1200),
+	BANIAS(1300),
+	BANIAS(1400),
+	BANIAS(1500),
+	BANIAS(1600),
+	BANIAS(1700),
+
+	/* NULL model_name is a wildcard */
+	{ &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL },
+	{ &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL },
+	{ &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL },
+	{ &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL },
+	{ &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL },
+
+	{ NULL, }
+};
+#undef _BANIAS
+#undef BANIAS
+
+static int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
+	struct cpu_model *model;
+
+	for(model = models; model->cpu_id != NULL; model++)
+		if (centrino_verify_cpu_id(cpu, model->cpu_id) &&
+		    (model->model_name == NULL ||
+		     strcmp(cpu->x86_model_id, model->model_name) == 0))
+			break;
+
+	if (model->cpu_id == NULL) {
+		/* No match at all */
+		pr_debug("no support for CPU model \"%s\": "
+		       "send /proc/cpuinfo to " MAINTAINER "\n",
+		       cpu->x86_model_id);
+		return -ENOENT;
+	}
+
+	if (model->op_points == NULL) {
+		/* Matched a non-match */
+		pr_debug("no table support for CPU model \"%s\"\n",
+		       cpu->x86_model_id);
+		pr_debug("try using the acpi-cpufreq driver\n");
+		return -ENOENT;
+	}
+
+	per_cpu(centrino_model, policy->cpu) = model;
+
+	pr_debug("found \"%s\": max frequency: %dkHz\n",
+	       model->model_name, model->max_freq);
+
+	return 0;
+}
+
+#else
+static inline int centrino_cpu_init_table(struct cpufreq_policy *policy)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */
+
+static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c,
+				  const struct cpu_id *x)
+{
+	if ((c->x86 == x->x86) &&
+	    (c->x86_model == x->x86_model) &&
+	    (c->x86_mask == x->x86_mask))
+		return 1;
+	return 0;
+}
+
+/* To be called only after centrino_model is initialized */
+static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe)
+{
+	int i;
+
+	/*
+	 * Extract clock in kHz from PERF_CTL value
+	 * for centrino, as some DSDTs are buggy.
+	 * Ideally, this can be done using the acpi_data structure.
+	 */
+	if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) ||
+	    (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) ||
+	    (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) {
+		msr = (msr >> 8) & 0xff;
+		return msr * 100000;
+	}
+
+	if ((!per_cpu(centrino_model, cpu)) ||
+	    (!per_cpu(centrino_model, cpu)->op_points))
+		return 0;
+
+	msr &= 0xffff;
+	for (i = 0;
+		per_cpu(centrino_model, cpu)->op_points[i].frequency
+							!= CPUFREQ_TABLE_END;
+	     i++) {
+		if (msr == per_cpu(centrino_model, cpu)->op_points[i].index)
+			return per_cpu(centrino_model, cpu)->
+							op_points[i].frequency;
+	}
+	if (failsafe)
+		return per_cpu(centrino_model, cpu)->op_points[i-1].frequency;
+	else
+		return 0;
+}
+
+/* Return the current CPU frequency in kHz */
+static unsigned int get_cur_freq(unsigned int cpu)
+{
+	unsigned l, h;
+	unsigned clock_freq;
+
+	rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h);
+	clock_freq = extract_clock(l, cpu, 0);
+
+	if (unlikely(clock_freq == 0)) {
+		/*
+		 * On some CPUs, we can see transient MSR values (which are
+		 * not present in _PSS), while CPU is doing some automatic
+		 * P-state transition (like TM2). Get the last freq set 
+		 * in PERF_CTL.
+		 */
+		rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h);
+		clock_freq = extract_clock(l, cpu, 1);
+	}
+	return clock_freq;
+}
+
+
+static int centrino_cpu_init(struct cpufreq_policy *policy)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
+	unsigned freq;
+	unsigned l, h;
+	int ret;
+	int i;
+
+	/* Only Intel makes Enhanced Speedstep-capable CPUs */
+	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
+	    !cpu_has(cpu, X86_FEATURE_EST))
+		return -ENODEV;
+
+	if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC))
+		centrino_driver.flags |= CPUFREQ_CONST_LOOPS;
+
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	for (i = 0; i < N_IDS; i++)
+		if (centrino_verify_cpu_id(cpu, &cpu_ids[i]))
+			break;
+
+	if (i != N_IDS)
+		per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i];
+
+	if (!per_cpu(centrino_cpu, policy->cpu)) {
+		pr_debug("found unsupported CPU with "
+		"Enhanced SpeedStep: send /proc/cpuinfo to "
+		MAINTAINER "\n");
+		return -ENODEV;
+	}
+
+	if (centrino_cpu_init_table(policy)) {
+		return -ENODEV;
+	}
+
+	/* Check to see if Enhanced SpeedStep is enabled, and try to
+	   enable it if not. */
+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
+		pr_debug("trying to enable Enhanced SpeedStep (%x)\n", l);
+		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+		/* check to see if it stuck */
+		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+			printk(KERN_INFO PFX
+				"couldn't enable Enhanced SpeedStep\n");
+			return -ENODEV;
+		}
+	}
+
+	freq = get_cur_freq(policy->cpu);
+	policy->cpuinfo.transition_latency = 10000;
+						/* 10uS transition latency */
+	policy->cur = freq;
+
+	pr_debug("centrino_cpu_init: cur=%dkHz\n", policy->cur);
+
+	ret = cpufreq_frequency_table_cpuinfo(policy,
+		per_cpu(centrino_model, policy->cpu)->op_points);
+	if (ret)
+		return (ret);
+
+	cpufreq_frequency_table_get_attr(
+		per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu);
+
+	return 0;
+}
+
+static int centrino_cpu_exit(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+
+	if (!per_cpu(centrino_model, cpu))
+		return -ENODEV;
+
+	cpufreq_frequency_table_put_attr(cpu);
+
+	per_cpu(centrino_model, cpu) = NULL;
+
+	return 0;
+}
+
+/**
+ * centrino_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within this model's frequency range at least one
+ * border included.
+ */
+static int centrino_verify (struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy,
+			per_cpu(centrino_model, policy->cpu)->op_points);
+}
+
+/**
+ * centrino_setpolicy - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int centrino_target (struct cpufreq_policy *policy,
+			    unsigned int target_freq,
+			    unsigned int relation)
+{
+	unsigned int    newstate = 0;
+	unsigned int	msr, oldmsr = 0, h = 0, cpu = policy->cpu;
+	struct cpufreq_freqs	freqs;
+	int			retval = 0;
+	unsigned int		j, k, first_cpu, tmp;
+	cpumask_var_t covered_cpus;
+
+	if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)))
+		return -ENOMEM;
+
+	if (unlikely(per_cpu(centrino_model, cpu) == NULL)) {
+		retval = -ENODEV;
+		goto out;
+	}
+
+	if (unlikely(cpufreq_frequency_table_target(policy,
+			per_cpu(centrino_model, cpu)->op_points,
+			target_freq,
+			relation,
+			&newstate))) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	first_cpu = 1;
+	for_each_cpu(j, policy->cpus) {
+		int good_cpu;
+
+		/* cpufreq holds the hotplug lock, so we are safe here */
+		if (!cpu_online(j))
+			continue;
+
+		/*
+		 * Support for SMP systems.
+		 * Make sure we are running on CPU that wants to change freq
+		 */
+		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+			good_cpu = cpumask_any_and(policy->cpus,
+						   cpu_online_mask);
+		else
+			good_cpu = j;
+
+		if (good_cpu >= nr_cpu_ids) {
+			pr_debug("couldn't limit to CPUs in this domain\n");
+			retval = -EAGAIN;
+			if (first_cpu) {
+				/* We haven't started the transition yet. */
+				goto out;
+			}
+			break;
+		}
+
+		msr = per_cpu(centrino_model, cpu)->op_points[newstate].index;
+
+		if (first_cpu) {
+			rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h);
+			if (msr == (oldmsr & 0xffff)) {
+				pr_debug("no change needed - msr was and needs "
+					"to be %x\n", oldmsr);
+				retval = 0;
+				goto out;
+			}
+
+			freqs.old = extract_clock(oldmsr, cpu, 0);
+			freqs.new = extract_clock(msr, cpu, 0);
+
+			pr_debug("target=%dkHz old=%d new=%d msr=%04x\n",
+				target_freq, freqs.old, freqs.new, msr);
+
+			for_each_cpu(k, policy->cpus) {
+				if (!cpu_online(k))
+					continue;
+				freqs.cpu = k;
+				cpufreq_notify_transition(&freqs,
+					CPUFREQ_PRECHANGE);
+			}
+
+			first_cpu = 0;
+			/* all but 16 LSB are reserved, treat them with care */
+			oldmsr &= ~0xffff;
+			msr &= 0xffff;
+			oldmsr |= msr;
+		}
+
+		wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h);
+		if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+			break;
+
+		cpumask_set_cpu(j, covered_cpus);
+	}
+
+	for_each_cpu(k, policy->cpus) {
+		if (!cpu_online(k))
+			continue;
+		freqs.cpu = k;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	if (unlikely(retval)) {
+		/*
+		 * We have failed halfway through the frequency change.
+		 * We have sent callbacks to policy->cpus and
+		 * MSRs have already been written on coverd_cpus.
+		 * Best effort undo..
+		 */
+
+		for_each_cpu(j, covered_cpus)
+			wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h);
+
+		tmp = freqs.new;
+		freqs.new = freqs.old;
+		freqs.old = tmp;
+		for_each_cpu(j, policy->cpus) {
+			if (!cpu_online(j))
+				continue;
+			cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+			cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+		}
+	}
+	retval = 0;
+
+out:
+	free_cpumask_var(covered_cpus);
+	return retval;
+}
+
+static struct freq_attr* centrino_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver centrino_driver = {
+	.name		= "centrino", /* should be speedstep-centrino,
+					 but there's a 16 char limit */
+	.init		= centrino_cpu_init,
+	.exit		= centrino_cpu_exit,
+	.verify		= centrino_verify,
+	.target		= centrino_target,
+	.get		= get_cur_freq,
+	.attr           = centrino_attr,
+	.owner		= THIS_MODULE,
+};
+
+
+/**
+ * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver
+ *
+ * Initializes the Enhanced SpeedStep support. Returns -ENODEV on
+ * unsupported devices, -ENOENT if there's no voltage table for this
+ * particular CPU model, -EINVAL on problems during initiatization,
+ * and zero on success.
+ *
+ * This is quite picky.  Not only does the CPU have to advertise the
+ * "est" flag in the cpuid capability flags, we look for a specific
+ * CPU model and stepping, and we need to have the exact model name in
+ * our voltage tables.  That is, be paranoid about not releasing
+ * someone's valuable magic smoke.
+ */
+static int __init centrino_init(void)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data(0);
+
+	if (!cpu_has(cpu, X86_FEATURE_EST))
+		return -ENODEV;
+
+	return cpufreq_register_driver(&centrino_driver);
+}
+
+static void __exit centrino_exit(void)
+{
+	cpufreq_unregister_driver(&centrino_driver);
+}
+
+MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
+MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors.");
+MODULE_LICENSE ("GPL");
+
+late_initcall(centrino_init);
+module_exit(centrino_exit);
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
new file mode 100644
index 000000000000..a748ce782fee
--- /dev/null
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -0,0 +1,448 @@
+/*
+ * (C) 2001  Dave Jones, Arjan van de ven.
+ * (C) 2002 - 2003  Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *  Based upon reverse engineered information, and on Intel documentation
+ *  for chipsets ICH2-M and ICH3-M.
+ *
+ *  Many thanks to Ducrot Bruno for finding and fixing the last
+ *  "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler
+ *  for extensive testing.
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+/*********************************************************************
+ *                        SPEEDSTEP - DEFINITIONS                    *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+
+#include "speedstep-lib.h"
+
+
+/* speedstep_chipset:
+ *   It is necessary to know which chipset is used. As accesses to
+ * this device occur at various places in this module, we need a
+ * static struct pci_dev * pointing to that device.
+ */
+static struct pci_dev *speedstep_chipset_dev;
+
+
+/* speedstep_processor
+ */
+static enum speedstep_processor speedstep_processor;
+
+static u32 pmbase;
+
+/*
+ *   There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+	{SPEEDSTEP_HIGH,	0},
+	{SPEEDSTEP_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+
+/**
+ * speedstep_find_register - read the PMBASE address
+ *
+ * Returns: -ENODEV if no register could be found
+ */
+static int speedstep_find_register(void)
+{
+	if (!speedstep_chipset_dev)
+		return -ENODEV;
+
+	/* get PMBASE */
+	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
+	if (!(pmbase & 0x01)) {
+		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		return -ENODEV;
+	}
+
+	pmbase &= 0xFFFFFFFE;
+	if (!pmbase) {
+		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		return -ENODEV;
+	}
+
+	pr_debug("pmbase is 0x%x\n", pmbase);
+	return 0;
+}
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ *   Tries to change the SpeedStep state.  Can be called from
+ *   smp_call_function_single.
+ */
+static void speedstep_set_state(unsigned int state)
+{
+	u8 pm2_blk;
+	u8 value;
+	unsigned long flags;
+
+	if (state > 0x1)
+		return;
+
+	/* Disable IRQs */
+	local_irq_save(flags);
+
+	/* read state */
+	value = inb(pmbase + 0x50);
+
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+	/* write new state */
+	value &= 0xFE;
+	value |= state;
+
+	pr_debug("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase);
+
+	/* Disable bus master arbitration */
+	pm2_blk = inb(pmbase + 0x20);
+	pm2_blk |= 0x01;
+	outb(pm2_blk, (pmbase + 0x20));
+
+	/* Actual transition */
+	outb(value, (pmbase + 0x50));
+
+	/* Restore bus master arbitration */
+	pm2_blk &= 0xfe;
+	outb(pm2_blk, (pmbase + 0x20));
+
+	/* check if transition was successful */
+	value = inb(pmbase + 0x50);
+
+	/* Enable IRQs */
+	local_irq_restore(flags);
+
+	pr_debug("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
+
+	if (state == (value & 0x1))
+		pr_debug("change to %u MHz succeeded\n",
+			speedstep_get_frequency(speedstep_processor) / 1000);
+	else
+		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
+
+	return;
+}
+
+/* Wrapper for smp_call_function_single. */
+static void _speedstep_set_state(void *_state)
+{
+	speedstep_set_state(*(unsigned int *)_state);
+}
+
+/**
+ * speedstep_activate - activate SpeedStep control in the chipset
+ *
+ *   Tries to activate the SpeedStep status and control registers.
+ * Returns -EINVAL on an unsupported chipset, and zero on success.
+ */
+static int speedstep_activate(void)
+{
+	u16 value = 0;
+
+	if (!speedstep_chipset_dev)
+		return -EINVAL;
+
+	pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value);
+	if (!(value & 0x08)) {
+		value |= 0x08;
+		pr_debug("activating SpeedStep (TM) registers\n");
+		pci_write_config_word(speedstep_chipset_dev, 0x00A0, value);
+	}
+
+	return 0;
+}
+
+
+/**
+ * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic
+ *
+ *   Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to
+ * the LPC bridge / PM module which contains all power-management
+ * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
+ * chipset, or zero on failure.
+ */
+static unsigned int speedstep_detect_chipset(void)
+{
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801DB_12,
+			      PCI_ANY_ID, PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev)
+		return 4; /* 4-M */
+
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801CA_12,
+			      PCI_ANY_ID, PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev)
+		return 3; /* 3-M */
+
+
+	speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82801BA_10,
+			      PCI_ANY_ID, PCI_ANY_ID,
+			      NULL);
+	if (speedstep_chipset_dev) {
+		/* speedstep.c causes lockups on Dell Inspirons 8000 and
+		 * 8100 which use a pretty old revision of the 82815
+		 * host brige. Abort on these systems.
+		 */
+		static struct pci_dev *hostbridge;
+
+		hostbridge  = pci_get_subsys(PCI_VENDOR_ID_INTEL,
+			      PCI_DEVICE_ID_INTEL_82815_MC,
+			      PCI_ANY_ID, PCI_ANY_ID,
+			      NULL);
+
+		if (!hostbridge)
+			return 2; /* 2-M */
+
+		if (hostbridge->revision < 5) {
+			pr_debug("hostbridge does not support speedstep\n");
+			speedstep_chipset_dev = NULL;
+			pci_dev_put(hostbridge);
+			return 0;
+		}
+
+		pci_dev_put(hostbridge);
+		return 2; /* 2-M */
+	}
+
+	return 0;
+}
+
+static void get_freq_data(void *_speed)
+{
+	unsigned int *speed = _speed;
+
+	*speed = speedstep_get_frequency(speedstep_processor);
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+	unsigned int speed;
+
+	/* You're supposed to ensure CPU is online. */
+	if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
+		BUG();
+
+	pr_debug("detected %u kHz as current frequency\n", speed);
+	return speed;
+}
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: the target frequency
+ * @relation: how that frequency relates to achieved frequency
+ *	(CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
+ *
+ * Sets a new CPUFreq policy.
+ */
+static int speedstep_target(struct cpufreq_policy *policy,
+			     unsigned int target_freq,
+			     unsigned int relation)
+{
+	unsigned int newstate = 0, policy_cpu;
+	struct cpufreq_freqs freqs;
+	int i;
+
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+				target_freq, relation, &newstate))
+		return -EINVAL;
+
+	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
+	freqs.old = speedstep_get(policy_cpu);
+	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.cpu = policy->cpu;
+
+	pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
+
+	/* no transition necessary */
+	if (freqs.old == freqs.new)
+		return 0;
+
+	for_each_cpu(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
+				 true);
+
+	for_each_cpu(i, policy->cpus) {
+		freqs.cpu = i;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+struct get_freqs {
+	struct cpufreq_policy *policy;
+	int ret;
+};
+
+static void get_freqs_on_cpu(void *_get_freqs)
+{
+	struct get_freqs *get_freqs = _get_freqs;
+
+	get_freqs->ret =
+		speedstep_get_freqs(speedstep_processor,
+			    &speedstep_freqs[SPEEDSTEP_LOW].frequency,
+			    &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
+			    &get_freqs->policy->cpuinfo.transition_latency,
+			    &speedstep_set_state);
+}
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+	int result;
+	unsigned int policy_cpu, speed;
+	struct get_freqs gf;
+
+	/* only run on CPU to be set, or on its sibling */
+#ifdef CONFIG_SMP
+	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+#endif
+	policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
+
+	/* detect low and high frequency and transition latency */
+	gf.policy = policy;
+	smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1);
+	if (gf.ret)
+		return gf.ret;
+
+	/* get current speed setting */
+	speed = speedstep_get(policy_cpu);
+	if (!speed)
+		return -EIO;
+
+	pr_debug("currently at %s speed setting - %i MHz\n",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+		? "low" : "high",
+		(speed / 1000));
+
+	/* cpuinfo and default policy values */
+	policy->cur = speed;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+	if (result)
+		return result;
+
+	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+	return 0;
+}
+
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static struct freq_attr *speedstep_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+
+static struct cpufreq_driver speedstep_driver = {
+	.name	= "speedstep-ich",
+	.verify	= speedstep_verify,
+	.target	= speedstep_target,
+	.init	= speedstep_cpu_init,
+	.exit	= speedstep_cpu_exit,
+	.get	= speedstep_get,
+	.owner	= THIS_MODULE,
+	.attr	= speedstep_attr,
+};
+
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * devices, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+	/* detect processor */
+	speedstep_processor = speedstep_detect_processor();
+	if (!speedstep_processor) {
+		pr_debug("Intel(R) SpeedStep(TM) capable processor "
+				"not found\n");
+		return -ENODEV;
+	}
+
+	/* detect chipset */
+	if (!speedstep_detect_chipset()) {
+		pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
+				"(yet) available.\n");
+		return -ENODEV;
+	}
+
+	/* activate speedstep support */
+	if (speedstep_activate()) {
+		pci_dev_put(speedstep_chipset_dev);
+		return -EINVAL;
+	}
+
+	if (speedstep_find_register())
+		return -ENODEV;
+
+	return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ *   Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+	pci_dev_put(speedstep_chipset_dev);
+	cpufreq_unregister_driver(&speedstep_driver);
+}
+
+
+MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
+		"Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
+		"with ICH-M southbridges.");
+MODULE_LICENSE("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
new file mode 100644
index 000000000000..8af2d2fd9d51
--- /dev/null
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -0,0 +1,478 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+
+#include <asm/msr.h>
+#include <asm/tsc.h>
+#include "speedstep-lib.h"
+
+#define PFX "speedstep-lib: "
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+static int relaxed_check;
+#else
+#define relaxed_check 0
+#endif
+
+/*********************************************************************
+ *                   GET PROCESSOR CORE SPEED IN KHZ                 *
+ *********************************************************************/
+
+static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
+{
+	/* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
+	struct {
+		unsigned int ratio;	/* Frequency Multiplier (x10) */
+		u8 bitmap;		/* power on configuration bits
+					[27, 25:22] (in MSR 0x2a) */
+	} msr_decode_mult[] = {
+		{ 30, 0x01 },
+		{ 35, 0x05 },
+		{ 40, 0x02 },
+		{ 45, 0x06 },
+		{ 50, 0x00 },
+		{ 55, 0x04 },
+		{ 60, 0x0b },
+		{ 65, 0x0f },
+		{ 70, 0x09 },
+		{ 75, 0x0d },
+		{ 80, 0x0a },
+		{ 85, 0x26 },
+		{ 90, 0x20 },
+		{ 100, 0x2b },
+		{ 0, 0xff }	/* error or unknown value */
+	};
+
+	/* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
+	struct {
+		unsigned int value;	/* Front Side Bus speed in MHz */
+		u8 bitmap;		/* power on configuration bits [18: 19]
+					(in MSR 0x2a) */
+	} msr_decode_fsb[] = {
+		{  66, 0x0 },
+		{ 100, 0x2 },
+		{ 133, 0x1 },
+		{   0, 0xff}
+	};
+
+	u32 msr_lo, msr_tmp;
+	int i = 0, j = 0;
+
+	/* read MSR 0x2a - we only need the low 32 bits */
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	pr_debug("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+	msr_tmp = msr_lo;
+
+	/* decode the FSB */
+	msr_tmp &= 0x00c0000;
+	msr_tmp >>= 18;
+	while (msr_tmp != msr_decode_fsb[i].bitmap) {
+		if (msr_decode_fsb[i].bitmap == 0xff)
+			return 0;
+		i++;
+	}
+
+	/* decode the multiplier */
+	if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
+		pr_debug("workaround for early PIIIs\n");
+		msr_lo &= 0x03c00000;
+	} else
+		msr_lo &= 0x0bc00000;
+	msr_lo >>= 22;
+	while (msr_lo != msr_decode_mult[j].bitmap) {
+		if (msr_decode_mult[j].bitmap == 0xff)
+			return 0;
+		j++;
+	}
+
+	pr_debug("speed is %u\n",
+		(msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
+
+	return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
+}
+
+
+static unsigned int pentiumM_get_frequency(void)
+{
+	u32 msr_lo, msr_tmp;
+
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	pr_debug("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp);
+
+	/* see table B-2 of 24547212.pdf */
+	if (msr_lo & 0x00040000) {
+		printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
+				msr_lo, msr_tmp);
+		return 0;
+	}
+
+	msr_tmp = (msr_lo >> 22) & 0x1f;
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
+			msr_tmp, (msr_tmp * 100 * 1000));
+
+	return msr_tmp * 100 * 1000;
+}
+
+static unsigned int pentium_core_get_frequency(void)
+{
+	u32 fsb = 0;
+	u32 msr_lo, msr_tmp;
+	int ret;
+
+	rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
+	/* see table B-2 of 25366920.pdf */
+	switch (msr_lo & 0x07) {
+	case 5:
+		fsb = 100000;
+		break;
+	case 1:
+		fsb = 133333;
+		break;
+	case 3:
+		fsb = 166667;
+		break;
+	case 2:
+		fsb = 200000;
+		break;
+	case 0:
+		fsb = 266667;
+		break;
+	case 4:
+		fsb = 333333;
+		break;
+	default:
+		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
+	}
+
+	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
+	pr_debug("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
+			msr_lo, msr_tmp);
+
+	msr_tmp = (msr_lo >> 22) & 0x1f;
+	pr_debug("bits 22-26 are 0x%x, speed is %u\n",
+			msr_tmp, (msr_tmp * fsb));
+
+	ret = (msr_tmp * fsb);
+	return ret;
+}
+
+
+static unsigned int pentium4_get_frequency(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 msr_lo, msr_hi, mult;
+	unsigned int fsb = 0;
+	unsigned int ret;
+	u8 fsb_code;
+
+	/* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
+	 * to System Bus Frequency Ratio Field in the Processor Frequency
+	 * Configuration Register of the MSR. Therefore the current
+	 * frequency cannot be calculated and has to be measured.
+	 */
+	if (c->x86_model < 2)
+		return cpu_khz;
+
+	rdmsr(0x2c, msr_lo, msr_hi);
+
+	pr_debug("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi);
+
+	/* decode the FSB: see IA-32 Intel (C) Architecture Software
+	 * Developer's Manual, Volume 3: System Prgramming Guide,
+	 * revision #12 in Table B-1: MSRs in the Pentium 4 and
+	 * Intel Xeon Processors, on page B-4 and B-5.
+	 */
+	fsb_code = (msr_lo >> 16) & 0x7;
+	switch (fsb_code) {
+	case 0:
+		fsb = 100 * 1000;
+		break;
+	case 1:
+		fsb = 13333 * 10;
+		break;
+	case 2:
+		fsb = 200 * 1000;
+		break;
+	}
+
+	if (!fsb)
+		printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
+				"Please send an e-mail to <linux@brodo.de>\n");
+
+	/* Multiplier. */
+	mult = msr_lo >> 24;
+
+	pr_debug("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
+			fsb, mult, (fsb * mult));
+
+	ret = (fsb * mult);
+	return ret;
+}
+
+
+/* Warning: may get called from smp_call_function_single. */
+unsigned int speedstep_get_frequency(enum speedstep_processor processor)
+{
+	switch (processor) {
+	case SPEEDSTEP_CPU_PCORE:
+		return pentium_core_get_frequency();
+	case SPEEDSTEP_CPU_PM:
+		return pentiumM_get_frequency();
+	case SPEEDSTEP_CPU_P4D:
+	case SPEEDSTEP_CPU_P4M:
+		return pentium4_get_frequency();
+	case SPEEDSTEP_CPU_PIII_T:
+	case SPEEDSTEP_CPU_PIII_C:
+	case SPEEDSTEP_CPU_PIII_C_EARLY:
+		return pentium3_get_frequency(processor);
+	default:
+		return 0;
+	};
+	return 0;
+}
+EXPORT_SYMBOL_GPL(speedstep_get_frequency);
+
+
+/*********************************************************************
+ *                 DETECT SPEEDSTEP-CAPABLE PROCESSOR                *
+ *********************************************************************/
+
+unsigned int speedstep_detect_processor(void)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	u32 ebx, msr_lo, msr_hi;
+
+	pr_debug("x86: %x, model: %x\n", c->x86, c->x86_model);
+
+	if ((c->x86_vendor != X86_VENDOR_INTEL) ||
+	    ((c->x86 != 6) && (c->x86 != 0xF)))
+		return 0;
+
+	if (c->x86 == 0xF) {
+		/* Intel Mobile Pentium 4-M
+		 * or Intel Mobile Pentium 4 with 533 MHz FSB */
+		if (c->x86_model != 2)
+			return 0;
+
+		ebx = cpuid_ebx(0x00000001);
+		ebx &= 0x000000FF;
+
+		pr_debug("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask);
+
+		switch (c->x86_mask) {
+		case 4:
+			/*
+			 * B-stepping [M-P4-M]
+			 * sample has ebx = 0x0f, production has 0x0e.
+			 */
+			if ((ebx == 0x0e) || (ebx == 0x0f))
+				return SPEEDSTEP_CPU_P4M;
+			break;
+		case 7:
+			/*
+			 * C-stepping [M-P4-M]
+			 * needs to have ebx=0x0e, else it's a celeron:
+			 * cf. 25130917.pdf / page 7, footnote 5 even
+			 * though 25072120.pdf / page 7 doesn't say
+			 * samples are only of B-stepping...
+			 */
+			if (ebx == 0x0e)
+				return SPEEDSTEP_CPU_P4M;
+			break;
+		case 9:
+			/*
+			 * D-stepping [M-P4-M or M-P4/533]
+			 *
+			 * this is totally strange: CPUID 0x0F29 is
+			 * used by M-P4-M, M-P4/533 and(!) Celeron CPUs.
+			 * The latter need to be sorted out as they don't
+			 * support speedstep.
+			 * Celerons with CPUID 0x0F29 may have either
+			 * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything
+			 * specific.
+			 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
+			 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
+			 * also, M-P4M HTs have ebx=0x8, too
+			 * For now, they are distinguished by the model_id
+			 * string
+			 */
+			if ((ebx == 0x0e) ||
+				(strstr(c->x86_model_id,
+				    "Mobile Intel(R) Pentium(R) 4") != NULL))
+				return SPEEDSTEP_CPU_P4M;
+			break;
+		default:
+			break;
+		}
+		return 0;
+	}
+
+	switch (c->x86_model) {
+	case 0x0B: /* Intel PIII [Tualatin] */
+		/* cpuid_ebx(1) is 0x04 for desktop PIII,
+		 * 0x06 for mobile PIII-M */
+		ebx = cpuid_ebx(0x00000001);
+		pr_debug("ebx is %x\n", ebx);
+
+		ebx &= 0x000000FF;
+
+		if (ebx != 0x06)
+			return 0;
+
+		/* So far all PIII-M processors support SpeedStep. See
+		 * Intel's 24540640.pdf of June 2003
+		 */
+		return SPEEDSTEP_CPU_PIII_T;
+
+	case 0x08: /* Intel PIII [Coppermine] */
+
+		/* all mobile PIII Coppermines have FSB 100 MHz
+		 * ==> sort out a few desktop PIIIs. */
+		rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
+		pr_debug("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
+				msr_lo, msr_hi);
+		msr_lo &= 0x00c0000;
+		if (msr_lo != 0x0080000)
+			return 0;
+
+		/*
+		 * If the processor is a mobile version,
+		 * platform ID has bit 50 set
+		 * it has SpeedStep technology if either
+		 * bit 56 or 57 is set
+		 */
+		rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
+		pr_debug("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
+				msr_lo, msr_hi);
+		if ((msr_hi & (1<<18)) &&
+		    (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
+			if (c->x86_mask == 0x01) {
+				pr_debug("early PIII version\n");
+				return SPEEDSTEP_CPU_PIII_C_EARLY;
+			} else
+				return SPEEDSTEP_CPU_PIII_C;
+		}
+
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(speedstep_detect_processor);
+
+
+/*********************************************************************
+ *                     DETECT SPEEDSTEP SPEEDS                       *
+ *********************************************************************/
+
+unsigned int speedstep_get_freqs(enum speedstep_processor processor,
+				  unsigned int *low_speed,
+				  unsigned int *high_speed,
+				  unsigned int *transition_latency,
+				  void (*set_state) (unsigned int state))
+{
+	unsigned int prev_speed;
+	unsigned int ret = 0;
+	unsigned long flags;
+	struct timeval tv1, tv2;
+
+	if ((!processor) || (!low_speed) || (!high_speed) || (!set_state))
+		return -EINVAL;
+
+	pr_debug("trying to determine both speeds\n");
+
+	/* get current speed */
+	prev_speed = speedstep_get_frequency(processor);
+	if (!prev_speed)
+		return -EIO;
+
+	pr_debug("previous speed is %u\n", prev_speed);
+
+	local_irq_save(flags);
+
+	/* switch to low state */
+	set_state(SPEEDSTEP_LOW);
+	*low_speed = speedstep_get_frequency(processor);
+	if (!*low_speed) {
+		ret = -EIO;
+		goto out;
+	}
+
+	pr_debug("low speed is %u\n", *low_speed);
+
+	/* start latency measurement */
+	if (transition_latency)
+		do_gettimeofday(&tv1);
+
+	/* switch to high state */
+	set_state(SPEEDSTEP_HIGH);
+
+	/* end latency measurement */
+	if (transition_latency)
+		do_gettimeofday(&tv2);
+
+	*high_speed = speedstep_get_frequency(processor);
+	if (!*high_speed) {
+		ret = -EIO;
+		goto out;
+	}
+
+	pr_debug("high speed is %u\n", *high_speed);
+
+	if (*low_speed == *high_speed) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* switch to previous state, if necessary */
+	if (*high_speed != prev_speed)
+		set_state(SPEEDSTEP_LOW);
+
+	if (transition_latency) {
+		*transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC +
+			tv2.tv_usec - tv1.tv_usec;
+		pr_debug("transition latency is %u uSec\n", *transition_latency);
+
+		/* convert uSec to nSec and add 20% for safety reasons */
+		*transition_latency *= 1200;
+
+		/* check if the latency measurement is too high or too low
+		 * and set it to a safe value (500uSec) in that case
+		 */
+		if (*transition_latency > 10000000 ||
+		    *transition_latency < 50000) {
+			printk(KERN_WARNING PFX "frequency transition "
+					"measured seems out of range (%u "
+					"nSec), falling back to a safe one of"
+					"%u nSec.\n",
+					*transition_latency, 500000);
+			*transition_latency = 500000;
+		}
+	}
+
+out:
+	local_irq_restore(flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(speedstep_get_freqs);
+
+#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
+module_param(relaxed_check, int, 0444);
+MODULE_PARM_DESC(relaxed_check,
+		"Don't do all checks for speedstep capability.");
+#endif
+
+MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
+MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/speedstep-lib.h b/drivers/cpufreq/speedstep-lib.h
new file mode 100644
index 000000000000..70d9cea1219d
--- /dev/null
+++ b/drivers/cpufreq/speedstep-lib.h
@@ -0,0 +1,49 @@
+/*
+ * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ *  Library for common functions for Intel SpeedStep v.1 and v.2 support
+ *
+ *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
+ */
+
+
+
+/* processors */
+enum speedstep_processor {
+	SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001,  /* Coppermine core */
+	SPEEDSTEP_CPU_PIII_C	   = 0x00000002,  /* Coppermine core */
+	SPEEDSTEP_CPU_PIII_T	   = 0x00000003,  /* Tualatin core */
+	SPEEDSTEP_CPU_P4M	   = 0x00000004,  /* P4-M  */
+/* the following processors are not speedstep-capable and are not auto-detected
+ * in speedstep_detect_processor(). However, their speed can be detected using
+ * the speedstep_get_frequency() call. */
+	SPEEDSTEP_CPU_PM	   = 0xFFFFFF03,  /* Pentium M  */
+	SPEEDSTEP_CPU_P4D	   = 0xFFFFFF04,  /* desktop P4  */
+	SPEEDSTEP_CPU_PCORE	   = 0xFFFFFF05,  /* Core */
+};
+
+/* speedstep states -- only two of them */
+
+#define SPEEDSTEP_HIGH	0x00000000
+#define SPEEDSTEP_LOW	0x00000001
+
+
+/* detect a speedstep-capable processor */
+extern enum speedstep_processor speedstep_detect_processor(void);
+
+/* detect the current speed (in khz) of the processor */
+extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
+
+
+/* detect the low and high speeds of the processor. The callback
+ * set_state"'s first argument is either SPEEDSTEP_HIGH or
+ * SPEEDSTEP_LOW; the second argument is zero so that no
+ * cpufreq_notify_transition calls are initiated.
+ */
+extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
+	unsigned int *low_speed,
+	unsigned int *high_speed,
+	unsigned int *transition_latency,
+	void (*set_state) (unsigned int state));
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
new file mode 100644
index 000000000000..c76ead3490bf
--- /dev/null
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -0,0 +1,464 @@
+/*
+ * Intel SpeedStep SMI driver.
+ *
+ * (C) 2003  Hiroshi Miura <miura@da-cha.org>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+
+/*********************************************************************
+ *                        SPEEDSTEP - DEFINITIONS                    *
+ *********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <asm/ist.h>
+
+#include "speedstep-lib.h"
+
+/* speedstep system management interface port/command.
+ *
+ * These parameters are got from IST-SMI BIOS call.
+ * If user gives it, these are used.
+ *
+ */
+static int smi_port;
+static int smi_cmd;
+static unsigned int smi_sig;
+
+/* info about the processor */
+static enum speedstep_processor speedstep_processor;
+
+/*
+ * There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+static struct cpufreq_frequency_table speedstep_freqs[] = {
+	{SPEEDSTEP_HIGH,	0},
+	{SPEEDSTEP_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+#define GET_SPEEDSTEP_OWNER 0
+#define GET_SPEEDSTEP_STATE 1
+#define SET_SPEEDSTEP_STATE 2
+#define GET_SPEEDSTEP_FREQS 4
+
+/* how often shall the SMI call be tried if it failed, e.g. because
+ * of DMA activity going on? */
+#define SMI_TRIES 5
+
+/**
+ * speedstep_smi_ownership
+ */
+static int speedstep_smi_ownership(void)
+{
+	u32 command, result, magic, dummy;
+	u32 function = GET_SPEEDSTEP_OWNER;
+	unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+	magic = virt_to_phys(magic_data);
+
+	pr_debug("trying to obtain ownership with command %x at port %x\n",
+			command, smi_port);
+
+	__asm__ __volatile__(
+		"push %%ebp\n"
+		"out %%al, (%%dx)\n"
+		"pop %%ebp\n"
+		: "=D" (result),
+		  "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+		  "=S" (dummy)
+		: "a" (command), "b" (function), "c" (0), "d" (smi_port),
+		  "D" (0), "S" (magic)
+		: "memory"
+	);
+
+	pr_debug("result is %x\n", result);
+
+	return result;
+}
+
+/**
+ * speedstep_smi_get_freqs - get SpeedStep preferred & current freq.
+ * @low: the low frequency value is placed here
+ * @high: the high frequency value is placed here
+ *
+ * Only available on later SpeedStep-enabled systems, returns false results or
+ * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
+ * shows that the latter occurs if !(ist_info.event & 0xFFFF).
+ */
+static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
+{
+	u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
+	u32 state = 0;
+	u32 function = GET_SPEEDSTEP_FREQS;
+
+	if (!(ist_info.event & 0xFFFF)) {
+		pr_debug("bug #1422 -- can't read freqs from BIOS\n");
+		return -ENODEV;
+	}
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	pr_debug("trying to determine frequencies with command %x at port %x\n",
+			command, smi_port);
+
+	__asm__ __volatile__(
+		"push %%ebp\n"
+		"out %%al, (%%dx)\n"
+		"pop %%ebp"
+		: "=a" (result),
+		  "=b" (high_mhz),
+		  "=c" (low_mhz),
+		  "=d" (state), "=D" (edi), "=S" (dummy)
+		: "a" (command),
+		  "b" (function),
+		  "c" (state),
+		  "d" (smi_port), "S" (0), "D" (0)
+	);
+
+	pr_debug("result %x, low_freq %u, high_freq %u\n",
+			result, low_mhz, high_mhz);
+
+	/* abort if results are obviously incorrect... */
+	if ((high_mhz + low_mhz) < 600)
+		return -EINVAL;
+
+	*high = high_mhz * 1000;
+	*low  = low_mhz  * 1000;
+
+	return result;
+}
+
+/**
+ * speedstep_get_state - set the SpeedStep state
+ * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static int speedstep_get_state(void)
+{
+	u32 function = GET_SPEEDSTEP_STATE;
+	u32 result, state, edi, command, dummy;
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	pr_debug("trying to determine current setting with command %x "
+		"at port %x\n", command, smi_port);
+
+	__asm__ __volatile__(
+		"push %%ebp\n"
+		"out %%al, (%%dx)\n"
+		"pop %%ebp\n"
+		: "=a" (result),
+		  "=b" (state), "=D" (edi),
+		  "=c" (dummy), "=d" (dummy), "=S" (dummy)
+		: "a" (command), "b" (function), "c" (0),
+		  "d" (smi_port), "S" (0), "D" (0)
+	);
+
+	pr_debug("state is %x, result is %x\n", state, result);
+
+	return state & 1;
+}
+
+
+/**
+ * speedstep_set_state - set the SpeedStep state
+ * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
+ *
+ */
+static void speedstep_set_state(unsigned int state)
+{
+	unsigned int result = 0, command, new_state, dummy;
+	unsigned long flags;
+	unsigned int function = SET_SPEEDSTEP_STATE;
+	unsigned int retry = 0;
+
+	if (state > 0x1)
+		return;
+
+	/* Disable IRQs */
+	local_irq_save(flags);
+
+	command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
+
+	pr_debug("trying to set frequency to state %u "
+		"with command %x at port %x\n",
+		state, command, smi_port);
+
+	do {
+		if (retry) {
+			pr_debug("retry %u, previous result %u, waiting...\n",
+					retry, result);
+			mdelay(retry * 50);
+		}
+		retry++;
+		__asm__ __volatile__(
+			"push %%ebp\n"
+			"out %%al, (%%dx)\n"
+			"pop %%ebp"
+			: "=b" (new_state), "=D" (result),
+			  "=c" (dummy), "=a" (dummy),
+			  "=d" (dummy), "=S" (dummy)
+			: "a" (command), "b" (function), "c" (state),
+			  "d" (smi_port), "S" (0), "D" (0)
+			);
+	} while ((new_state != state) && (retry <= SMI_TRIES));
+
+	/* enable IRQs */
+	local_irq_restore(flags);
+
+	if (new_state == state)
+		pr_debug("change to %u MHz succeeded after %u tries "
+			"with result %u\n",
+			(speedstep_freqs[new_state].frequency / 1000),
+			retry, result);
+	else
+		printk(KERN_ERR "cpufreq: change to state %u "
+			"failed with new_state %u and result %u\n",
+			state, new_state, result);
+
+	return;
+}
+
+
+/**
+ * speedstep_target - set a new CPUFreq policy
+ * @policy: new policy
+ * @target_freq: new freq
+ * @relation:
+ *
+ * Sets a new CPUFreq policy/freq.
+ */
+static int speedstep_target(struct cpufreq_policy *policy,
+			unsigned int target_freq, unsigned int relation)
+{
+	unsigned int newstate = 0;
+	struct cpufreq_freqs freqs;
+
+	if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
+				target_freq, relation, &newstate))
+		return -EINVAL;
+
+	freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
+	freqs.new = speedstep_freqs[newstate].frequency;
+	freqs.cpu = 0; /* speedstep.c is UP only driver */
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	speedstep_set_state(newstate);
+	cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+
+	return 0;
+}
+
+
+/**
+ * speedstep_verify - verifies a new CPUFreq policy
+ * @policy: new policy
+ *
+ * Limit must be within speedstep_low_freq and speedstep_high_freq, with
+ * at least one border included.
+ */
+static int speedstep_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
+}
+
+
+static int speedstep_cpu_init(struct cpufreq_policy *policy)
+{
+	int result;
+	unsigned int speed, state;
+	unsigned int *low, *high;
+
+	/* capability check */
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	result = speedstep_smi_ownership();
+	if (result) {
+		pr_debug("fails in acquiring ownership of a SMI interface.\n");
+		return -EINVAL;
+	}
+
+	/* detect low and high frequency */
+	low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
+	high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
+
+	result = speedstep_smi_get_freqs(low, high);
+	if (result) {
+		/* fall back to speedstep_lib.c dection mechanism:
+		 * try both states out */
+		pr_debug("could not detect low and high frequencies "
+				"by SMI call.\n");
+		result = speedstep_get_freqs(speedstep_processor,
+				low, high,
+				NULL,
+				&speedstep_set_state);
+
+		if (result) {
+			pr_debug("could not detect two different speeds"
+					" -- aborting.\n");
+			return result;
+		} else
+			pr_debug("workaround worked.\n");
+	}
+
+	/* get current speed setting */
+	state = speedstep_get_state();
+	speed = speedstep_freqs[state].frequency;
+
+	pr_debug("currently at %s speed setting - %i MHz\n",
+		(speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
+		? "low" : "high",
+		(speed / 1000));
+
+	/* cpuinfo and default policy values */
+	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+	policy->cur = speed;
+
+	result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
+	if (result)
+		return result;
+
+	cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
+
+	return 0;
+}
+
+static int speedstep_cpu_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	return 0;
+}
+
+static unsigned int speedstep_get(unsigned int cpu)
+{
+	if (cpu)
+		return -ENODEV;
+	return speedstep_get_frequency(speedstep_processor);
+}
+
+
+static int speedstep_resume(struct cpufreq_policy *policy)
+{
+	int result = speedstep_smi_ownership();
+
+	if (result)
+		pr_debug("fails in re-acquiring ownership of a SMI interface.\n");
+
+	return result;
+}
+
+static struct freq_attr *speedstep_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver speedstep_driver = {
+	.name		= "speedstep-smi",
+	.verify		= speedstep_verify,
+	.target		= speedstep_target,
+	.init		= speedstep_cpu_init,
+	.exit		= speedstep_cpu_exit,
+	.get		= speedstep_get,
+	.resume		= speedstep_resume,
+	.owner		= THIS_MODULE,
+	.attr		= speedstep_attr,
+};
+
+/**
+ * speedstep_init - initializes the SpeedStep CPUFreq driver
+ *
+ *   Initializes the SpeedStep support. Returns -ENODEV on unsupported
+ * BIOS, -EINVAL on problems during initiatization, and zero on
+ * success.
+ */
+static int __init speedstep_init(void)
+{
+	speedstep_processor = speedstep_detect_processor();
+
+	switch (speedstep_processor) {
+	case SPEEDSTEP_CPU_PIII_T:
+	case SPEEDSTEP_CPU_PIII_C:
+	case SPEEDSTEP_CPU_PIII_C_EARLY:
+		break;
+	default:
+		speedstep_processor = 0;
+	}
+
+	if (!speedstep_processor) {
+		pr_debug("No supported Intel CPU detected.\n");
+		return -ENODEV;
+	}
+
+	pr_debug("signature:0x%.8ulx, command:0x%.8ulx, "
+		"event:0x%.8ulx, perf_level:0x%.8ulx.\n",
+		ist_info.signature, ist_info.command,
+		ist_info.event, ist_info.perf_level);
+
+	/* Error if no IST-SMI BIOS or no PARM
+		 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
+	if ((ist_info.signature !=  0x47534943) && (
+	    (smi_port == 0) || (smi_cmd == 0)))
+		return -ENODEV;
+
+	if (smi_sig == 1)
+		smi_sig = 0x47534943;
+	else
+		smi_sig = ist_info.signature;
+
+	/* setup smi_port from MODLULE_PARM or BIOS */
+	if ((smi_port > 0xff) || (smi_port < 0))
+		return -EINVAL;
+	else if (smi_port == 0)
+		smi_port = ist_info.command & 0xff;
+
+	if ((smi_cmd > 0xff) || (smi_cmd < 0))
+		return -EINVAL;
+	else if (smi_cmd == 0)
+		smi_cmd = (ist_info.command >> 16) & 0xff;
+
+	return cpufreq_register_driver(&speedstep_driver);
+}
+
+
+/**
+ * speedstep_exit - unregisters SpeedStep support
+ *
+ *   Unregisters SpeedStep support.
+ */
+static void __exit speedstep_exit(void)
+{
+	cpufreq_unregister_driver(&speedstep_driver);
+}
+
+module_param(smi_port, int, 0444);
+module_param(smi_cmd,  int, 0444);
+module_param(smi_sig, uint, 0444);
+
+MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
+		"-- Intel's default setting is 0xb2");
+MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
+		"-- Intel's default setting is 0x82");
+MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
+		"SMI interface.");
+
+MODULE_AUTHOR("Hiroshi Miura");
+MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
+MODULE_LICENSE("GPL");
+
+module_init(speedstep_init);
+module_exit(speedstep_exit);
-- 
cgit v1.2.3


From 44075d95e2567ce7b454bc1a4cf264ff6afebe65 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 11 May 2011 00:38:50 +0000
Subject: powerpc/kvm: Fix kvmppc_core_pending_dec

The vcpu->arch.pending_exceptions field is a bitfield indexed by
interrupt priority number as returned by kvmppc_book3s_vec2irqprio.
However, kvmppc_core_pending_dec was using an interrupt vector shifted
by 7 as the bit index.  Fix it to use the irqprio value for the
decrementer interrupt instead.  This problem was found by code
inspection.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c961de40c676..0f95b5cce033 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -236,7 +236,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
-	return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+	return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 593adf317cf165f7c66facf2285db9d4befbd1c0 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 11 May 2011 00:39:50 +0000
Subject: powerpc/kvm: Fix the build for 32-bit Book 3S (classic) processors

Commits a5d4f3ad3a ("powerpc: Base support for exceptions using
HSRR0/1") and 673b189a2e ("powerpc: Always use SPRN_SPRG_HSCRATCH0
when running in HV mode") cause compile and link errors for 32-bit
classic Book 3S processors when KVM is enabled.  This fixes these
errors.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/reg.h       | 5 +++++
 arch/powerpc/kvm/book3s_rmhandlers.S | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fdec59333053..c5cae0dd176c 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -824,6 +824,11 @@
 	FTR_SECTION_ELSE_NESTED(66);			\
 	mtspr	SPRN_SPRG_HSCRATCH0,rX;			\
 	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66)
+
+#else /* CONFIG_PPC_BOOK3S_64 */
+#define GET_SCRATCH0(rX)	mfspr	rX,SPRN_SPRG_SCRATCH0
+#define SET_SCRATCH0(rX)	mtspr	SPRN_SPRG_SCRATCH0,rX
+
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E_64
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index ae99af66ca34..1a1b34487e71 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -112,7 +112,9 @@ INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_MACHINE_CHECK
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_DATA_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_INST_STORAGE
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL
+#ifdef CONFIG_PPC_BOOK3S_64
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_EXTERNAL_HV
+#endif
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_ALIGNMENT
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_PROGRAM
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_FP_UNAVAIL
-- 
cgit v1.2.3


From a2d063ac216c1618bfc2b4d40b7176adffa63511 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 19 May 2011 21:34:58 -0400
Subject: extable, core_kernel_data(): Make sure all archs define _sdata

A new utility function (core_kernel_data()) is used to determine if a
passed in address is part of core kernel data or not. It may or may not
return true for RO data, but this utility must work for RW data.

Thus both _sdata and _edata must be defined and continuous,
without .init sections that may later be freed and replaced by
volatile memory (memory that can be freed).

This utility function is used to determine if data is safe from
ever being freed. Thus it should return true for all RW global
data that is not in a module or has been allocated, or false
otherwise.

Also change core_kernel_data() back to the more precise _sdata condition
and document the function.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Hirokazu Takata <takata@linux-m32r.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: JamesE.J.Bottomley <jejb@parisc-linux.org>
Link: http://lkml.kernel.org/r/1305855298.1465.19.camel@gandalf.stny.rr.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
----
 arch/alpha/kernel/vmlinux.lds.S   |    1 +
 arch/m32r/kernel/vmlinux.lds.S    |    1 +
 arch/m68k/kernel/vmlinux-std.lds  |    2 ++
 arch/m68k/kernel/vmlinux-sun3.lds |    1 +
 arch/mips/kernel/vmlinux.lds.S    |    1 +
 arch/parisc/kernel/vmlinux.lds.S  |    3 +++
 kernel/extable.c                  |   12 +++++++++++-
 7 files changed, 20 insertions(+), 1 deletion(-)
---
 arch/alpha/kernel/vmlinux.lds.S   |  1 +
 arch/m32r/kernel/vmlinux.lds.S    |  1 +
 arch/m68k/kernel/vmlinux-std.lds  |  2 ++
 arch/m68k/kernel/vmlinux-sun3.lds |  1 +
 arch/mips/kernel/vmlinux.lds.S    |  1 +
 arch/parisc/kernel/vmlinux.lds.S  |  3 +++
 kernel/extable.c                  | 12 +++++++++++-
 7 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 433be2a24f31..3d890a98a08b 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -46,6 +46,7 @@ SECTIONS
 	__init_end = .;
 	/* Freed after init ends here */
 
+	_sdata = .;	/* Start of rw data section */
 	_data = .;
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index c194d64cdbb9..cf95aec77460 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -44,6 +44,7 @@ SECTIONS
   EXCEPTION_TABLE(16)
   NOTES
 
+  _sdata = .;			/* Start of data section */
   RODATA
   RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
   _edata = .;			/* End of data section */
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 878be5f38cad..d0993594f558 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -25,6 +25,8 @@ SECTIONS
 
   EXCEPTION_TABLE(16)
 
+  _sdata = .;			/* Start of data section */
+
   RODATA
 
   RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 1ad6b7ad2c17..8080469ee6c1 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -25,6 +25,7 @@ SECTIONS
   _etext = .;			/* End of text section */
 
   EXCEPTION_TABLE(16) :data
+  _sdata = .;			/* Start of rw data section */
   RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data
   /* End of data goes *here* so that freeing init code works properly. */
   _edata = .;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index cd2ca544454b..01af3876cf90 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -65,6 +65,7 @@ SECTIONS
 	NOTES :text :note
 	.dummy : { *(.dummy) } :text
 
+	_sdata = .;			/* Start of data section */
 	RODATA
 
 	/* writeable */
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 8f1e4efd143e..2d9a5c7c76f5 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -69,6 +69,9 @@ SECTIONS
 	/* End of text section */
 	_etext = .;
 
+	/* Start of data section */
+	_sdata = .;
+
 	RODATA
 
 	/* writeable */
diff --git a/kernel/extable.c b/kernel/extable.c
index d44aac0c3faa..5339705b8241 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -72,9 +72,19 @@ int core_kernel_text(unsigned long addr)
 	return 0;
 }
 
+/**
+ * core_kernel_data - tell if addr points to kernel data
+ * @addr: address to test
+ *
+ * Returns true if @addr passed in is from the core kernel data
+ * section.
+ *
+ * Note: On some archs it may return true for core RODATA, and false
+ *  for others. But will always be true for core RW data.
+ */
 int core_kernel_data(unsigned long addr)
 {
-	if (addr >= (unsigned long)_stext &&
+	if (addr >= (unsigned long)_sdata &&
 	    addr < (unsigned long)_edata)
 		return 1;
 	return 0;
-- 
cgit v1.2.3


From 208b3a4c196e733b9cec006dc132cfc149b2810a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 20 May 2011 17:50:18 +1000
Subject: powerpc: Fix hard CPU IDs detection

commit 9d07bc841c9779b4d7902e417f4e509996ce805d
"powerpc: Properly handshake CPUs going out of boot spin loop"

Would cause a miscalculation of the hard CPU ID. It removes breaking
out of the loop when finding a match with a processor, thus the "i"
used as an index in the intserv array is always incorrect

This broke interrupt on my PowerMac laptop.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 5311a26dcf46..48aeb55faae9 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -278,6 +278,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	int i, nthreads;
 	unsigned long len;
 	int found = -1;
+	int found_thread = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -301,9 +302,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		 * version 2 of the kexec param format adds the phys cpuid of
 		 * booted proc.
 		 */
-		if (initial_boot_params && initial_boot_params->version >= 2) {
-			if (intserv[i] == initial_boot_params->boot_cpuid_phys)
+		if (initial_boot_params->version >= 2) {
+			if (intserv[i] == initial_boot_params->boot_cpuid_phys) {
 				found = boot_cpu_count;
+				found_thread = i;
+			}
 		} else {
 			/*
 			 * Check if it's the boot-cpu, set it's hw index now,
@@ -322,9 +325,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 
 	if (found >= 0) {
 		DBG("boot cpu: logical %d physical %d\n", found,
-			intserv[i]);
+			intserv[found_thread]);
 		boot_cpuid = found;
-		set_hard_smp_processor_id(found, intserv[i]);
+		set_hard_smp_processor_id(found, intserv[found_thread]);
 
 		/*
 		 * PAPR defines "logical" PVR values for cpus that
-- 
cgit v1.2.3


From 30f7276cb35a22743d709a460ae3639aad50366a Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 20 May 2011 10:38:53 -0700
Subject: [IA64] define "_sdata" symbol

core_kernel_data() wants to know if an address looks like kernel
data. IA64 has had _edata forever, but never needed _sdata until
now.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/vmlinux.lds.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 787de4a77d82..53c0ba004e9e 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -209,6 +209,7 @@ SECTIONS {
 	data : {
 	} :data
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
+		_sdata  =  .;
 		INIT_TASK_DATA(PAGE_SIZE)
 		CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
 		READ_MOSTLY_DATA(SMP_CACHE_BYTES)
-- 
cgit v1.2.3


From 268bb0ce3e87872cb9290c322b0d35bce230d88f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 20 May 2011 12:50:29 -0700
Subject: sanitize <linux/prefetch.h> usage

Commit e66eed651fd1 ("list: remove prefetching from regular list
iterators") removed the include of prefetch.h from list.h, which
uncovered several cases that had apparently relied on that rather
obscure header file dependency.

So this fixes things up a bit, using

   grep -L linux/prefetch.h $(git grep -l '[^a-z_]prefetchw*(' -- '*.[ch]')
   grep -L 'prefetchw*(' $(git grep -l 'linux/prefetch.h' -- '*.[ch]')

to guide us in finding files that either need <linux/prefetch.h>
inclusion, or have it despite not needing it.

There are more of them around (mostly network drivers), but this gets
many core ones.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/hp/common/sba_iommu.c      | 1 +
 arch/ia64/mm/fault.c                 | 1 +
 arch/powerpc/lib/sstep.c             | 1 +
 arch/sh/kernel/cpu/sh4/sq.c          | 1 +
 arch/x86/include/asm/uaccess.h       | 1 -
 arch/x86/include/asm/uaccess_32.h    | 1 -
 arch/x86/include/asm/uaccess_64.h    | 1 -
 arch/x86/mm/fault.c                  | 1 +
 drivers/misc/sgi-gru/grufault.c      | 1 +
 drivers/misc/sgi-gru/grumain.c       | 1 +
 drivers/net/acenic.c                 | 1 +
 drivers/net/ehea/ehea_main.c         | 1 +
 drivers/staging/pohmelfs/inode.c     | 1 +
 drivers/usb/gadget/goku_udc.c        | 1 +
 drivers/usb/gadget/imx_udc.c         | 1 +
 drivers/usb/gadget/omap_udc.c        | 1 +
 drivers/usb/gadget/pxa25x_udc.c      | 1 +
 drivers/usb/gadget/pxa27x_udc.c      | 1 +
 drivers/usb/host/isp1362-hcd.c       | 1 +
 drivers/usb/host/sl811-hcd.c         | 1 +
 drivers/video/udlfb.c                | 1 +
 fs/btrfs/extent_io.c                 | 1 +
 fs/dcache.c                          | 1 +
 fs/logfs/dev_bdev.c                  | 1 +
 include/asm-generic/xor.h            | 2 +-
 kernel/rcutiny.c                     | 1 +
 kernel/rcutree.c                     | 1 +
 mm/page_alloc.c                      | 1 +
 mm/prio_tree.c                       | 1 +
 mm/slab.c                            | 1 +
 mm/vmscan.c                          | 1 +
 net/core/dst.c                       | 1 +
 net/core/pktgen.c                    | 1 +
 net/core/skbuff.c                    | 1 +
 tools/perf/util/include/linux/list.h | 2 +-
 35 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 4ce8d1358fee..c04dd576f333 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -37,6 +37,7 @@
 #include <linux/crash_dump.h>
 #include <linux/iommu-helper.h>
 #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 0799fea4c588..20b359376128 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/prefetch.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index ae5189ab0049..f73daa6f3970 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
+#include <linux/prefetch.h>
 #include <asm/sstep.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index 14726eef1ce0..f0907995b4c9 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/io.h>
+#include <linux/prefetch.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <cpu/sq.h>
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 99f0ad753f32..99ddd148a760 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -6,7 +6,6 @@
 #include <linux/errno.h>
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
-#include <linux/prefetch.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 088d09fb1615..566e803cc602 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -6,7 +6,6 @@
  */
 #include <linux/errno.h>
 #include <linux/thread_info.h>
-#include <linux/prefetch.h>
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 316708d5af92..1c66d30971ad 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -6,7 +6,6 @@
  */
 #include <linux/compiler.h>
 #include <linux/errno.h>
-#include <linux/prefetch.h>
 #include <linux/lockdep.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 20e3f8702d1e..bcb394dfbb35 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -12,6 +12,7 @@
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
 #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
+#include <linux/prefetch.h>		/* prefetchw			*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 38657cdaf54d..c4acac74725c 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -33,6 +33,7 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/security.h>
+#include <linux/prefetch.h>
 #include <asm/pgtable.h>
 #include "gru.h"
 #include "grutables.h"
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index f8538bbd0bfa..ae16c8cb4f3e 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -28,6 +28,7 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/err.h>
+#include <linux/prefetch.h>
 #include <asm/uv/uv_hub.h>
 #include "gru.h"
 #include "grutables.h"
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index ee648fe5d96f..01560bb67a7a 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -68,6 +68,7 @@
 #include <linux/sockios.h>
 #include <linux/firmware.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 #include <linux/if_vlan.h>
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index cf79cf759e13..2c60435f2beb 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -41,6 +41,7 @@
 #include <linux/memory.h>
 #include <asm/kexec.h>
 #include <linux/mutex.h>
+#include <linux/prefetch.h>
 
 #include <net/ip.h>
 
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index c93ef207b0b4..c0f0ac7c1cdb 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/writeback.h>
+#include <linux/prefetch.h>
 
 #include "netfs.h"
 
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c
index 48a760220baf..bf6e11c758d5 100644
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -38,6 +38,7 @@
 #include <linux/device.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/drivers/usb/gadget/imx_udc.c b/drivers/usb/gadget/imx_udc.c
index 5408186afc35..ade40066decf 100644
--- a/drivers/usb/gadget/imx_udc.c
+++ b/drivers/usb/gadget/imx_udc.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index cb5cd422f3f5..82fd24935332 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -44,6 +44,7 @@
 #include <linux/usb/otg.h>
 #include <linux/dma-mapping.h>
 #include <linux/clk.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c
index 444b60aa15e9..365c02fc25fc 100644
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -46,6 +46,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/io.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <asm/dma.h>
diff --git a/drivers/usb/gadget/pxa27x_udc.c b/drivers/usb/gadget/pxa27x_udc.c
index 78a39a41547d..57607696735c 100644
--- a/drivers/usb/gadget/pxa27x_udc.c
+++ b/drivers/usb/gadget/pxa27x_udc.c
@@ -32,6 +32,7 @@
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #include <asm/byteorder.h>
 #include <mach/hardware.h>
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index f97570a847ca..9c37dad3e816 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -81,6 +81,7 @@
 #include <linux/pm.h>
 #include <linux/io.h>
 #include <linux/bitmap.h>
+#include <linux/prefetch.h>
 
 #include <asm/irq.h>
 #include <asm/system.h>
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 18b7099a8125..fafccc2fd331 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -47,6 +47,7 @@
 #include <linux/usb/sl811.h>
 #include <linux/usb/hcd.h>
 #include <linux/platform_device.h>
+#include <linux/prefetch.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 68041d9dc260..695066b5b2e6 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -27,6 +27,7 @@
 #include <linux/fb.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 #include <linux/delay.h>
 #include <video/udlfb.h>
 #include "edid.h"
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ba41da59e31b..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
+#include <linux/prefetch.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "compat.h"
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..18b2a1f10ed8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
 #include <linux/hardirq.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
+#include <linux/prefetch.h>
 #include "internal.h"
 
 /*
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/gfp.h>
+#include <linux/prefetch.h>
 
 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
 
diff --git a/include/asm-generic/xor.h b/include/asm-generic/xor.h
index aaab875e1a35..6028fb862254 100644
--- a/include/asm-generic/xor.h
+++ b/include/asm-generic/xor.h
@@ -13,7 +13,7 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <asm/processor.h>
+#include <linux/prefetch.h>
 
 static void
 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 421abfd3641d..7bbac7d0f5ab 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/cpu.h>
+#include <linux/prefetch.h>
 
 /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
 static struct task_struct *rcu_kthread_task;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e486f7c3ffb8..f07d2f03181a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -49,6 +49,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/wait.h>
 #include <linux/kthread.h>
+#include <linux/prefetch.h>
 
 #include "rcutree.h"
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f8bce264df6..d49df7840541 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -54,6 +54,7 @@
 #include <trace/events/kmem.h>
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
+#include <linux/prefetch.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
index 603ae98d9694..799dcfd7cd8c 100644
--- a/mm/prio_tree.c
+++ b/mm/prio_tree.c
@@ -13,6 +13,7 @@
 
 #include <linux/mm.h>
 #include <linux/prio_tree.h>
+#include <linux/prefetch.h>
 
 /*
  * See lib/prio_tree.c for details on the general radix priority search tree
diff --git a/mm/slab.c b/mm/slab.c
index 46a9c163a92f..bcfa4987c8ae 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -115,6 +115,7 @@
 #include	<linux/debugobjects.h>
 #include	<linux/kmemcheck.h>
 #include	<linux/memory.h>
+#include	<linux/prefetch.h>
 
 #include	<asm/cacheflush.h>
 #include	<asm/tlbflush.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8bfd45050a61..c9177202c8ce 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -42,6 +42,7 @@
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
 #include <linux/oom.h>
+#include <linux/prefetch.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
diff --git a/net/core/dst.c b/net/core/dst.c
index 91104d35de7d..0a3920bf3613 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -19,6 +19,7 @@
 #include <linux/types.h>
 #include <net/net_namespace.h>
 #include <linux/sched.h>
+#include <linux/prefetch.h>
 
 #include <net/dst.h>
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index aeeece72b72f..6ed9e27d8202 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -156,6 +156,7 @@
 #include <linux/wait.h>
 #include <linux/etherdevice.h>
 #include <linux/kthread.h>
+#include <linux/prefetch.h>
 #include <net/net_namespace.h>
 #include <net/checksum.h>
 #include <net/ipv6.h>
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7ebeed0a877c..960ea899c864 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -57,6 +57,7 @@
 #include <linux/init.h>
 #include <linux/scatterlist.h>
 #include <linux/errqueue.h>
+#include <linux/prefetch.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index 356c7e467b83..99358d61e9a5 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -23,5 +23,5 @@ static inline void list_del_range(struct list_head *begin,
  * @head: the head for your list.
  */
 #define list_for_each_from(pos, head) \
-	for (; prefetch(pos->next), pos != (head); pos = pos->next)
+	for (; pos != (head); pos = pos->next)
 #endif
-- 
cgit v1.2.3


From 6de06f313a65d0ecabf055e708d082002b568866 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Date: Fri, 20 May 2011 16:22:25 -0400
Subject: powerpc: Fix 32-bit SMP build

Commit 69e3cea8d5fd526 ("powerpc/smp: Make start_secondary_resume
available to all CPU variants") introduced start_secondary_resume to
misc_32.S, however it uses a 64-bit instruction which is not valid on
32-bit platforms.  Use 'stw' instead.

Reported-by: Richard Cochran <richardcochran@gmail.com>
Tested-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/misc_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 402560e957bd..998a10028608 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -700,7 +700,7 @@ _GLOBAL(start_secondary_resume)
 	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
 	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
 	li	r3,0
-	std	r3,0(r1)		/* Zero the stack frame pointer	*/
+	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
 	bl	start_secondary
 	b	.
 #endif /* CONFIG_SMP */
-- 
cgit v1.2.3