summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig15
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/entry_64.S7
-rw-r--r--arch/powerpc/kernel/head_64.S9
-rw-r--r--arch/powerpc/kernel/irq.c30
-rw-r--r--arch/powerpc/kernel/process.c7
-rw-r--r--arch/powerpc/kernel/smp.c4
-rw-r--r--arch/powerpc/kernel/time.c236
8 files changed, 297 insertions, 14 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fb0dcb994b84..d112aed2999b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -250,6 +250,21 @@ config PPC_STD_MMU_32
def_bool y
depends on PPC_STD_MMU && PPC32
+config VIRT_CPU_ACCOUNTING
+ bool "Deterministic task and CPU time accounting"
+ depends on PPC64
+ default y
+ help
+ Select this option to enable more accurate task and CPU time
+ accounting. This is done by reading a CPU counter on each
+ kernel entry and exit and on transitions within the kernel
+ between system, softirq and hardirq state, so there is a
+ small performance impact. This also enables accounting of
+ stolen time on logically-partitioned systems running on
+ IBM POWER5-based machines.
+
+ If in doubt, say Y here.
+
config SMP
depends on PPC_STD_MMU
bool "Symmetric multi-processing support"
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 840aad43a98b..18810ac55bcc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -137,6 +137,9 @@ int main(void)
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+ DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
+ DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
+ DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 79a0c910f0d8..8f606c1889fa 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -61,6 +61,7 @@ system_call_common:
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
+ ACCOUNT_CPU_USER_ENTRY(r10, r11)
std r2,GPR2(r1)
std r3,GPR3(r1)
std r4,GPR4(r1)
@@ -168,8 +169,9 @@ syscall_error_cont:
stdcx. r0,0,r1 /* to clear the reservation */
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
- beq- 1f /* only restore r13 if */
- ld r13,GPR13(r1) /* returning to usermode */
+ beq- 1f
+ ACCOUNT_CPU_USER_EXIT(r11, r12)
+ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
1: ld r2,GPR2(r1)
li r12,MSR_RI
andc r11,r10,r12
@@ -536,6 +538,7 @@ restore:
* userspace
*/
beq 1f
+ ACCOUNT_CPU_USER_EXIT(r3, r4)
REST_GPR(13, r1)
1:
ld r3,_CTR(r1)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 2b21ec499285..be3ae7733577 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -277,6 +277,7 @@ exception_marker:
std r10,0(r1); /* make stack chain pointer */ \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r10,GPR1(r1); /* save r1 in stackframe */ \
+ ACCOUNT_CPU_USER_ENTRY(r9, r10); \
std r2,GPR2(r1); /* save r2 in stackframe */ \
SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
@@ -844,6 +845,14 @@ fast_exception_return:
ld r11,_NIP(r1)
andi. r3,r12,MSR_RI /* check if RI is set */
beq- unrecov_fer
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+ andi. r3,r12,MSR_PR
+ beq 2f
+ ACCOUNT_CPU_USER_EXIT(r3, r4)
+2:
+#endif
+
ld r3,_CCR(r1)
ld r4,_LINK(r1)
ld r5,_CTR(r1)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index edb2b00edbd2..24dc8117b822 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -369,6 +369,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
return NO_IRQ;
}
+#endif /* CONFIG_PPC64 */
#ifdef CONFIG_IRQSTACKS
struct thread_info *softirq_ctx[NR_CPUS];
@@ -392,10 +393,24 @@ void irq_ctx_init(void)
}
}
+static inline void do_softirq_onstack(void)
+{
+ struct thread_info *curtp, *irqtp;
+
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ call_do_softirq(irqtp);
+ irqtp->task = NULL;
+}
+
+#else
+#define do_softirq_onstack() __do_softirq()
+#endif /* CONFIG_IRQSTACKS */
+
void do_softirq(void)
{
unsigned long flags;
- struct thread_info *curtp, *irqtp;
if (in_interrupt())
return;
@@ -403,19 +418,18 @@ void do_softirq(void)
local_irq_save(flags);
if (local_softirq_pending()) {
- curtp = current_thread_info();
- irqtp = softirq_ctx[smp_processor_id()];
- irqtp->task = curtp->task;
- call_do_softirq(irqtp);
- irqtp->task = NULL;
+ account_system_vtime(current);
+ local_bh_disable();
+ do_softirq_onstack();
+ account_system_vtime(current);
+ __local_bh_enable();
}
local_irq_restore(flags);
}
EXPORT_SYMBOL(do_softirq);
-#endif /* CONFIG_IRQSTACKS */
-
+#ifdef CONFIG_PPC64
static int __init setup_noirqdistrib(char *str)
{
distribute_irqs = 0;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd774c3c9302..1770a066c217 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -45,9 +45,9 @@
#include <asm/mmu.h>
#include <asm/prom.h>
#include <asm/machdep.h>
+#include <asm/time.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
-#include <asm/time.h>
#endif
extern unsigned long _get_SP(void);
@@ -328,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
#endif
local_irq_save(flags);
+
+ account_system_vtime(current);
+ account_process_vtime(current);
+ calculate_steal_time();
+
last = _switch(old_thread, new_thread);
local_irq_restore(flags);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 13595a64f013..805eaedbc308 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused)
smp_ops->take_timebase();
if (system_state > SYSTEM_BOOTING)
- per_cpu(last_jiffy, cpu) = get_tb();
+ snapshot_timebase();
spin_lock(&call_lock);
cpu_set(cpu, cpu_online_map);
@@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
set_cpus_allowed(current, old_mask);
+ snapshot_timebases();
+
dump_numa_cpu_topology();
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2a7ddc579379..0b34db28916f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -51,6 +51,7 @@
#include <linux/percpu.h>
#include <linux/rtc.h>
#include <linux/jiffies.h>
+#include <linux/posix-timers.h>
#include <asm/io.h>
#include <asm/processor.h>
@@ -135,6 +136,220 @@ unsigned long tb_last_stamp;
*/
DEFINE_PER_CPU(unsigned long, last_jiffy);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Factors for converting from cputime_t (timebase ticks) to
+ * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * These are all stored as 0.64 fixed-point binary fractions.
+ */
+u64 __cputime_jiffies_factor;
+u64 __cputime_msec_factor;
+u64 __cputime_sec_factor;
+u64 __cputime_clockt_factor;
+
+static void calc_cputime_factors(void)
+{
+ struct div_result res;
+
+ div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
+ __cputime_jiffies_factor = res.result_low;
+ div128_by_32(1000, 0, tb_ticks_per_sec, &res);
+ __cputime_msec_factor = res.result_low;
+ div128_by_32(1, 0, tb_ticks_per_sec, &res);
+ __cputime_sec_factor = res.result_low;
+ div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
+ __cputime_clockt_factor = res.result_low;
+}
+
+/*
+ * Read the PURR on systems that have it, otherwise the timebase.
+ */
+static u64 read_purr(void)
+{
+ if (cpu_has_feature(CPU_FTR_PURR))
+ return mfspr(SPRN_PURR);
+ return mftb();
+}
+
+/*
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+ u64 now, delta;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ now = read_purr();
+ delta = now - get_paca()->startpurr;
+ get_paca()->startpurr = now;
+ if (!in_interrupt()) {
+ delta += get_paca()->system_time;
+ get_paca()->system_time = 0;
+ }
+ account_system_time(tsk, 0, delta);
+ local_irq_restore(flags);
+}
+
+/*
+ * Transfer the user and system times accumulated in the paca
+ * by the exception entry and exit code to the generic process
+ * user and system time records.
+ * Must be called with interrupts disabled.
+ */
+void account_process_vtime(struct task_struct *tsk)
+{
+ cputime_t utime;
+
+ utime = get_paca()->user_time;
+ get_paca()->user_time = 0;
+ account_user_time(tsk, utime);
+}
+
+static void account_process_time(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ account_process_vtime(current);
+ run_local_timers();
+ if (rcu_pending(cpu))
+ rcu_check_callbacks(cpu, user_mode(regs));
+ scheduler_tick();
+ run_posix_cpu_timers(current);
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+/*
+ * Stuff for accounting stolen time.
+ */
+struct cpu_purr_data {
+ int initialized; /* thread is running */
+ u64 tb0; /* timebase at origin time */
+ u64 purr0; /* PURR at origin time */
+ u64 tb; /* last TB value read */
+ u64 purr; /* last PURR value read */
+ u64 stolen; /* stolen time so far */
+ spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
+
+static void snapshot_tb_and_purr(void *data)
+{
+ struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
+
+ p->tb0 = mftb();
+ p->purr0 = mfspr(SPRN_PURR);
+ p->tb = p->tb0;
+ p->purr = 0;
+ wmb();
+ p->initialized = 1;
+}
+
+/*
+ * Called during boot when all cpus have come up.
+ */
+void snapshot_timebases(void)
+{
+ int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_PURR))
+ return;
+ for_each_cpu(cpu)
+ spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock);
+ on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
+}
+
+void calculate_steal_time(void)
+{
+ u64 tb, purr, t0;
+ s64 stolen;
+ struct cpu_purr_data *p0, *pme, *phim;
+ int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_PURR))
+ return;
+ cpu = smp_processor_id();
+ pme = &per_cpu(cpu_purr_data, cpu);
+ if (!pme->initialized)
+ return; /* this can happen in early boot */
+ p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+ phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+ spin_lock(&p0->lock);
+ tb = mftb();
+ purr = mfspr(SPRN_PURR) - pme->purr0;
+ if (!phim->initialized || !cpu_online(cpu ^ 1)) {
+ stolen = (tb - pme->tb) - (purr - pme->purr);
+ } else {
+ t0 = pme->tb0;
+ if (phim->tb0 < t0)
+ t0 = phim->tb0;
+ stolen = phim->tb - t0 - phim->purr - purr - p0->stolen;
+ }
+ if (stolen > 0) {
+ account_steal_time(current, stolen);
+ p0->stolen += stolen;
+ }
+ pme->tb = tb;
+ pme->purr = purr;
+ spin_unlock(&p0->lock);
+}
+
+/*
+ * Must be called before the cpu is added to the online map when
+ * a cpu is being brought up at runtime.
+ */
+static void snapshot_purr(void)
+{
+ int cpu;
+ u64 purr;
+ struct cpu_purr_data *p0, *pme, *phim;
+ unsigned long flags;
+
+ if (!cpu_has_feature(CPU_FTR_PURR))
+ return;
+ cpu = smp_processor_id();
+ pme = &per_cpu(cpu_purr_data, cpu);
+ p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+ phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+ spin_lock_irqsave(&p0->lock, flags);
+ pme->tb = pme->tb0 = mftb();
+ purr = mfspr(SPRN_PURR);
+ if (!phim->initialized) {
+ pme->purr = 0;
+ pme->purr0 = purr;
+ } else {
+ /* set p->purr and p->purr0 for no change in p0->stolen */
+ pme->purr = phim->tb - phim->tb0 - phim->purr - p0->stolen;
+ pme->purr0 = purr - pme->purr;
+ }
+ pme->initialized = 1;
+ spin_unlock_irqrestore(&p0->lock, flags);
+}
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#define calc_cputime_factors()
+#define account_process_time(regs) update_process_times(user_mode(regs))
+#define calculate_steal_time() do { } while (0)
+#endif
+
+#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
+#define snapshot_purr() do { } while (0)
+#endif
+
+/*
+ * Called when a cpu comes up after the system has finished booting,
+ * i.e. as a result of a hotplug cpu action.
+ */
+void snapshot_timebase(void)
+{
+ __get_cpu_var(last_jiffy) = get_tb();
+ snapshot_purr();
+}
+
void __delay(unsigned long loops)
{
unsigned long start;
@@ -382,6 +597,7 @@ static void iSeries_tb_recal(void)
new_tb_ticks_per_jiffy, sign, tick_diff );
tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
tb_ticks_per_sec = new_tb_ticks_per_sec;
+ calc_cputime_factors();
div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
@@ -430,6 +646,7 @@ void timer_interrupt(struct pt_regs * regs)
irq_enter();
profile_tick(CPU_PROFILING, regs);
+ calculate_steal_time();
#ifdef CONFIG_PPC_ISERIES
get_lppaca()->int_dword.fields.decr_int = 0;
@@ -451,7 +668,7 @@ void timer_interrupt(struct pt_regs * regs)
* is the case.
*/
if (!cpu_is_offline(cpu))
- update_process_times(user_mode(regs));
+ account_process_time(regs);
/*
* No need to check whether cpu is offline here; boot_cpuid
@@ -508,13 +725,27 @@ void wakeup_decrementer(void)
void __init smp_space_timers(unsigned int max_cpus)
{
int i;
+ unsigned long half = tb_ticks_per_jiffy / 2;
unsigned long offset = tb_ticks_per_jiffy / max_cpus;
unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
previous_tb -= tb_ticks_per_jiffy;
+ /*
+ * The stolen time calculation for POWER5 shared-processor LPAR
+ * systems works better if the two threads' timebase interrupts
+ * are staggered by half a jiffy with respect to each other.
+ */
for_each_cpu(i) {
- if (i != boot_cpuid) {
+ if (i == boot_cpuid)
+ continue;
+ if (i == (boot_cpuid ^ 1))
+ per_cpu(last_jiffy, i) =
+ per_cpu(last_jiffy, boot_cpuid) - half;
+ else if (i & 1)
+ per_cpu(last_jiffy, i) =
+ per_cpu(last_jiffy, i ^ 1) + half;
+ else {
previous_tb += offset;
per_cpu(last_jiffy, i) = previous_tb;
}
@@ -706,6 +937,7 @@ void __init time_init(void)
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 1000000;
tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
+ calc_cputime_factors();
/*
* Calculate the length of each tick in ns. It will not be