summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 00:47:29 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 00:47:29 +0400
commite6d9bfc63813882c896bf7ea6f6b14ca7b50b755 (patch)
tree68decf00726f6f415cee04a62e68cc60b37f380b /arch/powerpc/kernel
parent235c7b9feb8779c7c289ed614324baebf3651bf9 (diff)
parent0888839c5b62c44a55ac9d28acc273ba663c65ea (diff)
downloadlinux-e6d9bfc63813882c896bf7ea6f6b14ca7b50b755.tar.xz
Merge branch 'powernv-cpuidle' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc non-virtualized cpuidle from Ben Herrenschmidt: "This is the branch I mentioned in my other pull request which contains our improved cpuidle support for the "powernv" platform (non-virtualized). It adds support for the "fast sleep" feature of the processor which provides higher power savings than our usual "nap" mode but at the cost of losing the timers while asleep, and thus exploits the new timer broadcast framework to work around that limitation. It's based on a tip timer tree that you seem to have already merged" * 'powernv-cpuidle' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: cpuidle/powernv: Parse device tree to setup idle states cpuidle/powernv: Add "Fast-Sleep" CPU idle state powerpc/powernv: Add OPAL call to resync timebase on wakeup powerpc/powernv: Add context management for Fast Sleep powerpc: Split timer_interrupt() into timer handling and interrupt handling routines powerpc: Implement tick broadcast IPI as a fixed IPI message powerpc: Free up the slot of PPC_MSG_CALL_FUNC_SINGLE IPI message
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S10
-rw-r--r--arch/powerpc/kernel/idle_power7.S90
-rw-r--r--arch/powerpc/kernel/smp.c25
-rw-r--r--arch/powerpc/kernel/time.c90
4 files changed, 151 insertions, 64 deletions
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c34c3c827ad..d9c650ec7dac 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -121,9 +121,10 @@ BEGIN_FTR_SECTION
cmpwi cr1,r13,2
/* Total loss of HV state is fatal, we could try to use the
* PIR to locate a PACA, then use an emergency stack etc...
- * but for now, let's just stay stuck here
+ * OPAL v3 based powernv platforms have new idle states
+ * which fall in this catagory.
*/
- bgt cr1,.
+ bgt cr1,8f
GET_PACA(r13)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -141,6 +142,11 @@ BEGIN_FTR_SECTION
beq cr1,2f
b .power7_wakeup_noloss
2: b .power7_wakeup_loss
+
+ /* Fast Sleep wakeup on PowerNV */
+8: GET_PACA(r13)
+ b .power7_wakeup_tb_loss
+
9:
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif /* CONFIG_PPC_P7_NAP */
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 3fdef0f0c67f..c3ab86975614 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -17,20 +17,31 @@
#include <asm/ppc-opcode.h>
#include <asm/hw_irq.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/opal.h>
#undef DEBUG
- .text
+/* Idle state entry routines */
-_GLOBAL(power7_idle)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- /* fall through */
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
-_GLOBAL(power7_nap)
+ .text
+
+/*
+ * Pass requested state in r3:
+ * 0 - nap
+ * 1 - sleep
+ */
+_GLOBAL(power7_powersave_common)
+ /* Use r3 to pass state nap/sleep/winkle */
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
* stick a pointer to it in PACAR1. We really only
@@ -79,8 +90,8 @@ _GLOBAL(power7_nap)
/* Continue saving state */
SAVE_GPR(2, r1)
SAVE_NVGPRS(r1)
- mfcr r3
- std r3,_CCR(r1)
+ mfcr r4
+ std r4,_CCR(r1)
std r9,_MSR(r1)
std r1,PACAR1(r13)
@@ -90,15 +101,56 @@ _GLOBAL(power7_enter_nap_mode)
li r4,KVM_HWTHREAD_IN_NAP
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
+ cmpwi cr0,r3,1
+ beq 2f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
- /* Magic NAP mode enter sequence */
- std r0,0(r1)
- ptesync
- ld r0,0(r1)
-1: cmp cr0,r0,r0
- bne 1b
- PPC_NAP
- b .
+_GLOBAL(power7_idle)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+ /* fall through */
+
+_GLOBAL(power7_nap)
+ li r3,0
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_sleep)
+ li r3,1
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_wakeup_tb_loss)
+ ld r2,PACATOC(r13);
+ ld r1,PACAR1(r13)
+
+ /* Time base re-sync */
+ li r0,OPAL_RESYNC_TIMEBASE
+ LOAD_REG_ADDR(r11,opal);
+ ld r12,8(r11);
+ ld r2,0(r11);
+ mtctr r12
+ bctrl
+
+ /* TODO: Check r3 for failure */
+
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mfspr r3,SPRN_SRR1 /* Return SRR1 */
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
_GLOBAL(power7_wakeup_loss)
ld r1,PACAR1(r13)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ac2621af3154..e2a4232c5871 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -35,6 +35,7 @@
#include <asm/ptrace.h>
#include <linux/atomic.h>
#include <asm/irq.h>
+#include <asm/hw_irq.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/prom.h>
@@ -145,9 +146,9 @@ static irqreturn_t reschedule_action(int irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t call_function_single_action(int irq, void *data)
+static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
{
- generic_smp_call_function_single_interrupt();
+ tick_broadcast_ipi_handler();
return IRQ_HANDLED;
}
@@ -168,14 +169,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data)
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
- [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action,
+ [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
[PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
};
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
- [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single",
+ [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
[PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
};
@@ -251,8 +252,8 @@ irqreturn_t smp_ipi_demux(void)
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
scheduler_ipi();
- if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNC_SINGLE))
- generic_smp_call_function_single_interrupt();
+ if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
+ tick_broadcast_ipi_handler();
if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
debug_ipi_action(0, NULL);
} while (info->messages);
@@ -280,7 +281,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
- do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+ do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -291,6 +292,16 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask)
+ do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
+}
+#endif
+
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
void smp_send_debugger_break(void)
{
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3dab20acf34..122a580f7322 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -42,6 +42,7 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
+#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -106,7 +107,7 @@ struct clock_event_device decrementer_clockevent = {
.irq = 0,
.set_next_event = decrementer_set_next_event,
.set_mode = decrementer_set_mode,
- .features = CLOCK_EVT_FEAT_ONESHOT,
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
};
EXPORT_SYMBOL(decrementer_clockevent);
@@ -478,6 +479,47 @@ void arch_irq_work_raise(void)
#endif /* CONFIG_IRQ_WORK */
+void __timer_interrupt(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+ struct clock_event_device *evt = &__get_cpu_var(decrementers);
+ u64 now;
+
+ trace_timer_interrupt_entry(regs);
+
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ irq_work_run();
+ }
+
+ now = get_tb_or_rtc();
+ if (now >= *next_tb) {
+ *next_tb = ~(u64)0;
+ if (evt->event_handler)
+ evt->event_handler(evt);
+ __get_cpu_var(irq_stat).timer_irqs_event++;
+ } else {
+ now = *next_tb - now;
+ if (now <= DECREMENTER_MAX)
+ set_dec((int)now);
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
+ __get_cpu_var(irq_stat).timer_irqs_others++;
+ }
+
+#ifdef CONFIG_PPC64
+ /* collect purr register values often, for accurate calculations */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+ cu->current_tb = mfspr(SPRN_PURR);
+ }
+#endif
+
+ trace_timer_interrupt_exit(regs);
+}
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
@@ -486,8 +528,6 @@ void timer_interrupt(struct pt_regs * regs)
{
struct pt_regs *old_regs;
u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
- struct clock_event_device *evt = &__get_cpu_var(decrementers);
- u64 now;
/* Ensure a positive value is written to the decrementer, or else
* some CPUs will continue to take decrementer exceptions.
@@ -519,39 +559,7 @@ void timer_interrupt(struct pt_regs * regs)
old_regs = set_irq_regs(regs);
irq_enter();
- trace_timer_interrupt_entry(regs);
-
- if (test_irq_work_pending()) {
- clear_irq_work_pending();
- irq_work_run();
- }
-
- now = get_tb_or_rtc();
- if (now >= *next_tb) {
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
- __get_cpu_var(irq_stat).timer_irqs_event++;
- } else {
- now = *next_tb - now;
- if (now <= DECREMENTER_MAX)
- set_dec((int)now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
- __get_cpu_var(irq_stat).timer_irqs_others++;
- }
-
-#ifdef CONFIG_PPC64
- /* collect purr register values often, for accurate calculations */
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
- cu->current_tb = mfspr(SPRN_PURR);
- }
-#endif
-
- trace_timer_interrupt_exit(regs);
-
+ __timer_interrupt();
irq_exit();
set_irq_regs(old_regs);
}
@@ -825,6 +833,15 @@ static void decrementer_set_mode(enum clock_event_mode mode,
decrementer_set_next_event(DECREMENTER_MAX, dev);
}
+/* Interrupt handler for the timer broadcast IPI */
+void tick_broadcast_ipi_handler(void)
+{
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ *next_tb = get_tb_or_rtc();
+ __timer_interrupt();
+}
+
static void register_decrementer_clockevent(int cpu)
{
struct clock_event_device *dec = &per_cpu(decrementers, cpu);
@@ -928,6 +945,7 @@ void __init time_init(void)
clocksource_init();
init_decrementer_clockevent();
+ tick_setup_hrtimer_broadcast();
}