diff options
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/Kconfig | 15 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 7 | ||||
-rw-r--r-- | arch/powerpc/kernel/head_64.S | 9 | ||||
-rw-r--r-- | arch/powerpc/kernel/irq.c | 30 | ||||
-rw-r--r-- | arch/powerpc/kernel/process.c | 7 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 236 |
8 files changed, 297 insertions, 14 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fb0dcb994b84..d112aed2999b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -250,6 +250,21 @@ config PPC_STD_MMU_32 def_bool y depends on PPC_STD_MMU && PPC32 +config VIRT_CPU_ACCOUNTING + bool "Deterministic task and CPU time accounting" + depends on PPC64 + default y + help + Select this option to enable more accurate task and CPU time + accounting. This is done by reading a CPU counter on each + kernel entry and exit and on transitions within the kernel + between system, softirq and hardirq state, so there is a + small performance impact. This also enables accounting of + stolen time on logically-partitioned systems running on + IBM POWER5-based machines. + + If in doubt, say Y here. + config SMP depends on PPC_STD_MMU bool "Symmetric multi-processing support" diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 840aad43a98b..18810ac55bcc 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -137,6 +137,9 @@ int main(void) DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); + DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); + DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 79a0c910f0d8..8f606c1889fa 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -61,6 +61,7 @@ system_call_common: std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) + ACCOUNT_CPU_USER_ENTRY(r10, r11) std r2,GPR2(r1) std r3,GPR3(r1) std r4,GPR4(r1) @@ -168,8 +169,9 @@ syscall_error_cont: stdcx. r0,0,r1 /* to clear the reservation */ andi. r6,r8,MSR_PR ld r4,_LINK(r1) - beq- 1f /* only restore r13 if */ - ld r13,GPR13(r1) /* returning to usermode */ + beq- 1f + ACCOUNT_CPU_USER_EXIT(r11, r12) + ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 1: ld r2,GPR2(r1) li r12,MSR_RI andc r11,r10,r12 @@ -536,6 +538,7 @@ restore: * userspace */ beq 1f + ACCOUNT_CPU_USER_EXIT(r3, r4) REST_GPR(13, r1) 1: ld r3,_CTR(r1) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2b21ec499285..be3ae7733577 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -277,6 +277,7 @@ exception_marker: std r10,0(r1); /* make stack chain pointer */ \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r10,GPR1(r1); /* save r1 in stackframe */ \ + ACCOUNT_CPU_USER_ENTRY(r9, r10); \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ @@ -844,6 +845,14 @@ fast_exception_return: ld r11,_NIP(r1) andi. r3,r12,MSR_RI /* check if RI is set */ beq- unrecov_fer + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + andi. r3,r12,MSR_PR + beq 2f + ACCOUNT_CPU_USER_EXIT(r3, r4) +2: +#endif + ld r3,_CCR(r1) ld r4,_LINK(r1) ld r5,_CTR(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index edb2b00edbd2..24dc8117b822 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -369,6 +369,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) return NO_IRQ; } +#endif /* CONFIG_PPC64 */ #ifdef CONFIG_IRQSTACKS struct thread_info *softirq_ctx[NR_CPUS]; @@ -392,10 +393,24 @@ void irq_ctx_init(void) } } +static inline void do_softirq_onstack(void) +{ + struct thread_info *curtp, *irqtp; + + curtp = current_thread_info(); + irqtp = softirq_ctx[smp_processor_id()]; + irqtp->task = curtp->task; + call_do_softirq(irqtp); + irqtp->task = NULL; +} + +#else +#define do_softirq_onstack() __do_softirq() +#endif /* CONFIG_IRQSTACKS */ + void do_softirq(void) { unsigned long flags; - struct thread_info *curtp, *irqtp; if (in_interrupt()) return; @@ -403,19 +418,18 @@ void do_softirq(void) local_irq_save(flags); if (local_softirq_pending()) { - curtp = current_thread_info(); - irqtp = softirq_ctx[smp_processor_id()]; - irqtp->task = curtp->task; - call_do_softirq(irqtp); - irqtp->task = NULL; + account_system_vtime(current); + local_bh_disable(); + do_softirq_onstack(); + account_system_vtime(current); + __local_bh_enable(); } local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); -#endif /* CONFIG_IRQSTACKS */ - +#ifdef CONFIG_PPC64 static int __init setup_noirqdistrib(char *str) { distribute_irqs = 0; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dd774c3c9302..1770a066c217 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -45,9 +45,9 @@ #include <asm/mmu.h> #include <asm/prom.h> #include <asm/machdep.h> +#include <asm/time.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> -#include <asm/time.h> #endif extern unsigned long _get_SP(void); @@ -328,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif local_irq_save(flags); + + account_system_vtime(current); + account_process_vtime(current); + calculate_steal_time(); + last = _switch(old_thread, new_thread); local_irq_restore(flags); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 13595a64f013..805eaedbc308 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused) smp_ops->take_timebase(); if (system_state > SYSTEM_BOOTING) - per_cpu(last_jiffy, cpu) = get_tb(); + snapshot_timebase(); spin_lock(&call_lock); cpu_set(cpu, cpu_online_map); @@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus) set_cpus_allowed(current, old_mask); + snapshot_timebases(); + dump_numa_cpu_topology(); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2a7ddc579379..0b34db28916f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -51,6 +51,7 @@ #include <linux/percpu.h> #include <linux/rtc.h> #include <linux/jiffies.h> +#include <linux/posix-timers.h> #include <asm/io.h> #include <asm/processor.h> @@ -135,6 +136,220 @@ unsigned long tb_last_stamp; */ DEFINE_PER_CPU(unsigned long, last_jiffy); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +/* + * Factors for converting from cputime_t (timebase ticks) to + * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * These are all stored as 0.64 fixed-point binary fractions. + */ +u64 __cputime_jiffies_factor; +u64 __cputime_msec_factor; +u64 __cputime_sec_factor; +u64 __cputime_clockt_factor; + +static void calc_cputime_factors(void) +{ + struct div_result res; + + div128_by_32(HZ, 0, tb_ticks_per_sec, &res); + __cputime_jiffies_factor = res.result_low; + div128_by_32(1000, 0, tb_ticks_per_sec, &res); + __cputime_msec_factor = res.result_low; + div128_by_32(1, 0, tb_ticks_per_sec, &res); + __cputime_sec_factor = res.result_low; + div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); + __cputime_clockt_factor = res.result_low; +} + +/* + * Read the PURR on systems that have it, otherwise the timebase. + */ +static u64 read_purr(void) +{ + if (cpu_has_feature(CPU_FTR_PURR)) + return mfspr(SPRN_PURR); + return mftb(); +} + +/* + * Account time for a transition between system, hard irq + * or soft irq state. + */ +void account_system_vtime(struct task_struct *tsk) +{ + u64 now, delta; + unsigned long flags; + + local_irq_save(flags); + now = read_purr(); + delta = now - get_paca()->startpurr; + get_paca()->startpurr = now; + if (!in_interrupt()) { + delta += get_paca()->system_time; + get_paca()->system_time = 0; + } + account_system_time(tsk, 0, delta); + local_irq_restore(flags); +} + +/* + * Transfer the user and system times accumulated in the paca + * by the exception entry and exit code to the generic process + * user and system time records. + * Must be called with interrupts disabled. + */ +void account_process_vtime(struct task_struct *tsk) +{ + cputime_t utime; + + utime = get_paca()->user_time; + get_paca()->user_time = 0; + account_user_time(tsk, utime); +} + +static void account_process_time(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + account_process_vtime(current); + run_local_timers(); + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_mode(regs)); + scheduler_tick(); + run_posix_cpu_timers(current); +} + +#ifdef CONFIG_PPC_SPLPAR +/* + * Stuff for accounting stolen time. + */ +struct cpu_purr_data { + int initialized; /* thread is running */ + u64 tb0; /* timebase at origin time */ + u64 purr0; /* PURR at origin time */ + u64 tb; /* last TB value read */ + u64 purr; /* last PURR value read */ + u64 stolen; /* stolen time so far */ + spinlock_t lock; +}; + +static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data); + +static void snapshot_tb_and_purr(void *data) +{ + struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); + + p->tb0 = mftb(); + p->purr0 = mfspr(SPRN_PURR); + p->tb = p->tb0; + p->purr = 0; + wmb(); + p->initialized = 1; +} + +/* + * Called during boot when all cpus have come up. + */ +void snapshot_timebases(void) +{ + int cpu; + + if (!cpu_has_feature(CPU_FTR_PURR)) + return; + for_each_cpu(cpu) + spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock); + on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1); +} + +void calculate_steal_time(void) +{ + u64 tb, purr, t0; + s64 stolen; + struct cpu_purr_data *p0, *pme, *phim; + int cpu; + + if (!cpu_has_feature(CPU_FTR_PURR)) + return; + cpu = smp_processor_id(); + pme = &per_cpu(cpu_purr_data, cpu); + if (!pme->initialized) + return; /* this can happen in early boot */ + p0 = &per_cpu(cpu_purr_data, cpu & ~1); + phim = &per_cpu(cpu_purr_data, cpu ^ 1); + spin_lock(&p0->lock); + tb = mftb(); + purr = mfspr(SPRN_PURR) - pme->purr0; + if (!phim->initialized || !cpu_online(cpu ^ 1)) { + stolen = (tb - pme->tb) - (purr - pme->purr); + } else { + t0 = pme->tb0; + if (phim->tb0 < t0) + t0 = phim->tb0; + stolen = phim->tb - t0 - phim->purr - purr - p0->stolen; + } + if (stolen > 0) { + account_steal_time(current, stolen); + p0->stolen += stolen; + } + pme->tb = tb; + pme->purr = purr; + spin_unlock(&p0->lock); +} + +/* + * Must be called before the cpu is added to the online map when + * a cpu is being brought up at runtime. + */ +static void snapshot_purr(void) +{ + int cpu; + u64 purr; + struct cpu_purr_data *p0, *pme, *phim; + unsigned long flags; + + if (!cpu_has_feature(CPU_FTR_PURR)) + return; + cpu = smp_processor_id(); + pme = &per_cpu(cpu_purr_data, cpu); + p0 = &per_cpu(cpu_purr_data, cpu & ~1); + phim = &per_cpu(cpu_purr_data, cpu ^ 1); + spin_lock_irqsave(&p0->lock, flags); + pme->tb = pme->tb0 = mftb(); + purr = mfspr(SPRN_PURR); + if (!phim->initialized) { + pme->purr = 0; + pme->purr0 = purr; + } else { + /* set p->purr and p->purr0 for no change in p0->stolen */ + pme->purr = phim->tb - phim->tb0 - phim->purr - p0->stolen; + pme->purr0 = purr - pme->purr; + } + pme->initialized = 1; + spin_unlock_irqrestore(&p0->lock, flags); +} + +#endif /* CONFIG_PPC_SPLPAR */ + +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#define calc_cputime_factors() +#define account_process_time(regs) update_process_times(user_mode(regs)) +#define calculate_steal_time() do { } while (0) +#endif + +#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)) +#define snapshot_purr() do { } while (0) +#endif + +/* + * Called when a cpu comes up after the system has finished booting, + * i.e. as a result of a hotplug cpu action. + */ +void snapshot_timebase(void) +{ + __get_cpu_var(last_jiffy) = get_tb(); + snapshot_purr(); +} + void __delay(unsigned long loops) { unsigned long start; @@ -382,6 +597,7 @@ static void iSeries_tb_recal(void) new_tb_ticks_per_jiffy, sign, tick_diff ); tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; tb_ticks_per_sec = new_tb_ticks_per_sec; + calc_cputime_factors(); div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; tb_to_xs = divres.result_low; @@ -430,6 +646,7 @@ void timer_interrupt(struct pt_regs * regs) irq_enter(); profile_tick(CPU_PROFILING, regs); + calculate_steal_time(); #ifdef CONFIG_PPC_ISERIES get_lppaca()->int_dword.fields.decr_int = 0; @@ -451,7 +668,7 @@ void timer_interrupt(struct pt_regs * regs) * is the case. */ if (!cpu_is_offline(cpu)) - update_process_times(user_mode(regs)); + account_process_time(regs); /* * No need to check whether cpu is offline here; boot_cpuid @@ -508,13 +725,27 @@ void wakeup_decrementer(void) void __init smp_space_timers(unsigned int max_cpus) { int i; + unsigned long half = tb_ticks_per_jiffy / 2; unsigned long offset = tb_ticks_per_jiffy / max_cpus; unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid); /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ previous_tb -= tb_ticks_per_jiffy; + /* + * The stolen time calculation for POWER5 shared-processor LPAR + * systems works better if the two threads' timebase interrupts + * are staggered by half a jiffy with respect to each other. + */ for_each_cpu(i) { - if (i != boot_cpuid) { + if (i == boot_cpuid) + continue; + if (i == (boot_cpuid ^ 1)) + per_cpu(last_jiffy, i) = + per_cpu(last_jiffy, boot_cpuid) - half; + else if (i & 1) + per_cpu(last_jiffy, i) = + per_cpu(last_jiffy, i ^ 1) + half; + else { previous_tb += offset; per_cpu(last_jiffy, i) = previous_tb; } @@ -706,6 +937,7 @@ void __init time_init(void) tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + calc_cputime_factors(); /* * Calculate the length of each tick in ns. It will not be |