From 976d7d3f79a997b223f2ed8eabef7e12e469b5cf Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 16 May 2013 10:32:09 +0100 Subject: arm64: kernel: build MPIDR_EL1 hash function data structure On ARM64 SMP systems, cores are identified by their MPIDR_EL1 register. The MPIDR_EL1 guidelines in the ARM ARM do not provide strict enforcement of MPIDR_EL1 layout, only recommendations that, if followed, split the MPIDR_EL1 on ARM 64 bit platforms in four affinity levels. In multi-cluster systems like big.LITTLE, if the affinity guidelines are followed, the MPIDR_EL1 can not be considered a linear index. This means that the association between logical CPU in the kernel and the HW CPU identifier becomes somewhat more complicated requiring methods like hashing to associate a given MPIDR_EL1 to a CPU logical index, in order for the look-up to be carried out in an efficient and scalable way. This patch provides a function in the kernel that starting from the cpu_logical_map, implement collision-free hashing of MPIDR_EL1 values by checking all significative bits of MPIDR_EL1 affinity level bitfields. The hashing can then be carried out through bits shifting and ORing; the resulting hash algorithm is a collision-free though not minimal hash that can be executed with few assembly instructions. The mpidr_el1 is filtered through a mpidr mask that is built by checking all bits that toggle in the set of MPIDR_EL1s corresponding to possible CPUs. Bits that do not toggle do not carry information so they do not contribute to the resulting hash. Pseudo code: /* check all bits that toggle, so they are required */ for (i = 1, mpidr_el1_mask = 0; i < num_possible_cpus(); i++) mpidr_el1_mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); /* * Build shifts to be applied to aff0, aff1, aff2, aff3 values to hash the * mpidr_el1 * fls() returns the last bit set in a word, 0 if none * ffs() returns the first bit set in a word, 0 if none */ fs0 = mpidr_el1_mask[7:0] ? ffs(mpidr_el1_mask[7:0]) - 1 : 0; fs1 = mpidr_el1_mask[15:8] ? ffs(mpidr_el1_mask[15:8]) - 1 : 0; fs2 = mpidr_el1_mask[23:16] ? ffs(mpidr_el1_mask[23:16]) - 1 : 0; fs3 = mpidr_el1_mask[39:32] ? ffs(mpidr_el1_mask[39:32]) - 1 : 0; ls0 = fls(mpidr_el1_mask[7:0]); ls1 = fls(mpidr_el1_mask[15:8]); ls2 = fls(mpidr_el1_mask[23:16]); ls3 = fls(mpidr_el1_mask[39:32]); bits0 = ls0 - fs0; bits1 = ls1 - fs1; bits2 = ls2 - fs2; bits3 = ls3 - fs3; aff0_shift = fs0; aff1_shift = 8 + fs1 - bits0; aff2_shift = 16 + fs2 - (bits0 + bits1); aff3_shift = 32 + fs3 - (bits0 + bits1 + bits2); u32 hash(u64 mpidr_el1) { u32 l[4]; u64 mpidr_el1_masked = mpidr_el1 & mpidr_el1_mask; l[0] = mpidr_el1_masked & 0xff; l[1] = mpidr_el1_masked & 0xff00; l[2] = mpidr_el1_masked & 0xff0000; l[3] = mpidr_el1_masked & 0xff00000000; return (l[0] >> aff0_shift | l[1] >> aff1_shift | l[2] >> aff2_shift | l[3] >> aff3_shift); } The hashing algorithm relies on the inherent properties set in the ARM ARM recommendations for the MPIDR_EL1. Exotic configurations, where for instance the MPIDR_EL1 values at a given affinity level have large holes, can end up requiring big hash tables since the compression of values that can be achieved through shifting is somewhat crippled when holes are present. Kernel warns if the number of buckets of the resulting hash table exceeds the number of possible CPUs by a factor of 4, which is a symptom of a very sparse HW MPIDR_EL1 configuration. The hash algorithm is quite simple and can easily be implemented in assembly code, to be used in code paths where the kernel virtual address space is not set-up (ie cpu_resume) and instruction and data fetches are strongly ordered so code must be compact and must carry out few data accesses. Signed-off-by: Lorenzo Pieralisi --- arch/arm64/kernel/setup.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bd9bbd0e44ed..87ddfce35cb5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -113,6 +113,75 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) return phys_id == cpu_logical_map(cpu); } +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity, fs[4], bits[4], ls; + u64 mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits %#llx\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 4; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR_EL1 by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 32 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR_EL1 through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + + fs[3] - (bits[2] + bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.shift_aff[3], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + static void __init setup_processor(void) { struct cpu_info *cpu_info; @@ -236,6 +305,7 @@ void __init setup_arch(char **cmdline_p) cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); + smp_build_mpidr_hash(); #endif #ifdef CONFIG_VT -- cgit v1.2.3 From 95322526ef62b84adb469c27535ab0252a369a85 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 22 Jul 2013 12:22:13 +0100 Subject: arm64: kernel: cpu_{suspend/resume} implementation Kernel subsystems like CPU idle and suspend to RAM require a generic mechanism to suspend a processor, save its context and put it into a quiescent state. The cpu_{suspend}/{resume} implementation provides such a framework through a kernel interface allowing to save/restore registers, flush the context to DRAM and suspend/resume to/from low-power states where processor context may be lost. The CPU suspend implementation relies on the suspend protocol registered in CPU operations to carry out a suspend request after context is saved and flushed to DRAM. The cpu_suspend interface: int cpu_suspend(unsigned long arg); allows to pass an opaque parameter that is handed over to the suspend CPU operations back-end so that it can take action according to the semantics attached to it. The arg parameter allows suspend to RAM and CPU idle drivers to communicate to suspend protocol back-ends; it requires standardization so that the interface can be reused seamlessly across systems, paving the way for generic drivers. Context memory is allocated on the stack, whose address is stashed in a per-cpu variable to keep track of it and passed to core functions that save/restore the registers required by the architecture. Even though, upon successful execution, the cpu_suspend function shuts down the suspending processor, the warm boot resume mechanism, based on the cpu_resume function, makes the resume path operate as a cpu_suspend function return, so that cpu_suspend can be treated as a C function by the caller, which simplifies coding the PM drivers that rely on the cpu_suspend API. Upon context save, the minimal amount of memory is flushed to DRAM so that it can be retrieved when the MMU is off and caches are not searched. The suspend CPU operation, depending on the required operations (eg CPU vs Cluster shutdown) is in charge of flushing the cache hierarchy either implicitly (by calling firmware implementations like PSCI) or explicitly by executing the required cache maintainance functions. Debug exceptions are disabled during cpu_{suspend}/{resume} operations so that debug registers can be saved and restored properly preventing preemption from debug agents enabled in the kernel. Signed-off-by: Lorenzo Pieralisi --- arch/arm64/include/asm/cpu_ops.h | 6 ++ arch/arm64/include/asm/suspend.h | 9 ++ arch/arm64/kernel/asm-offsets.c | 11 +++ arch/arm64/kernel/sleep.S | 184 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/suspend.c | 109 +++++++++++++++++++++++ 5 files changed, 319 insertions(+) create mode 100644 arch/arm64/kernel/sleep.S create mode 100644 arch/arm64/kernel/suspend.c (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index c4cdb5e5b73d..152413076503 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -39,6 +39,9 @@ struct device_node; * from the cpu to be killed. * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. + * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing + * to wrong parameters or error conditions. Called from the + * CPU being suspended. Must be called with IRQs disabled. */ struct cpu_operations { const char *name; @@ -50,6 +53,9 @@ struct cpu_operations { int (*cpu_disable)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); #endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + int (*cpu_suspend)(unsigned long); +#endif }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index a88558e223da..e9c149c042e0 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -15,4 +15,13 @@ struct cpu_suspend_ctx { u64 ctx_regs[NR_CTX_REGS]; u64 sp; } __aligned(16); + +struct sleep_save_sp { + phys_addr_t *save_ptr_stash; + phys_addr_t save_ptr_stash_phys; +}; + +extern void cpu_resume(void); +extern int cpu_suspend(unsigned long); + #endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 666e231d410b..646f888387cd 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -137,6 +139,15 @@ int main(void) DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); + DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); + DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); + DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); #endif return 0; } diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 000000000000..b1925729c692 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include +#include +#include +#include + + .text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + * u32 aff0, aff1, aff2, aff3; + * u64 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * aff2 = mpidr_masked & 0xff00000000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask + and \mpidr, \mpidr, \mask // mask out MPIDR bits + and \dst, \mpidr, #0xff // mask=aff0 + lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 + and \mask, \mpidr, #0xff00 // mask = aff1 + lsr \mask ,\mask, \rs1 + orr \dst, \dst, \mask // dst|=(aff1>>rs1) + and \mask, \mpidr, #0xff0000 // mask = aff2 + lsr \mask ,\mask, \rs2 + orr \dst, \dst, \mask // dst|=(aff2>>rs2) + and \mask, \mpidr, #0xff00000000 // mask = aff3 + lsr \mask ,\mask, \rs3 + orr \dst, \dst, \mask // dst|=(aff3>>rs3) + .endm +/* + * Save CPU state for a suspend. This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + * x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) + stp x29, lr, [sp, #-96]! + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + mov x2, sp + sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx + mov x1, sp + /* + * x1 now points to struct cpu_suspend_ctx allocated on the stack + */ + str x2, [x1, #CPU_CTX_SP] + ldr x2, =sleep_save_sp + ldr x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP + mrs x7, mpidr_el1 + ldr x9, =mpidr_hash + ldr x10, [x9, #MPIDR_HASH_MASK] + /* + * Following code relies on the struct mpidr_hash + * members size. + */ + ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 + add x2, x2, x8, lsl #3 +#endif + bl __cpu_suspend_finisher + /* + * Never gets here, unless suspend fails. + * Successful cpu_suspend should return from cpu_resume, returning + * through this code path is considered an error + * If the return value is set to 0 force x0 = -EOPNOTSUPP + * to make sure a proper error condition is propagated + */ + cmp x0, #0 + mov x3, #-EOPNOTSUPP + csel x0, x3, x0, eq + add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(__cpu_suspend) + .ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) + ldr x3, =cpu_resume_after_mmu + msr sctlr_el1, x0 // restore sctlr_el1 + isb + br x3 // global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: + mov x0, #0 // return zero on success + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(cpu_resume_after_mmu) + + .data +ENTRY(cpu_resume) + bl el2_setup // if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP + mrs x1, mpidr_el1 + adr x4, mpidr_hash_ptr + ldr x5, [x4] + add x8, x4, x5 // x8 = struct mpidr_hash phys address + /* retrieve mpidr_hash members to compute the hash */ + ldr x2, [x8, #MPIDR_HASH_MASK] + ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 + /* x7 contains hash index, let's use it to grab context pointer */ +#else + mov x7, xzr +#endif + adr x0, sleep_save_sp + ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr x0, [x0, x7, lsl #3] + /* load sp from context */ + ldr x2, [x0, #CPU_CTX_SP] + adr x1, sleep_idmap_phys + /* load physical address of identity map page table in x1 */ + ldr x1, [x1] + mov sp, x2 + /* + * cpu_do_resume expects x0 to contain context physical address + * pointer and x1 to contain physical address of 1:1 page tables + */ + bl cpu_do_resume // PC relative jump, MMU off + b cpu_resume_mmu // Resume MMU, never returns +ENDPROC(cpu_resume) + + .align 3 +mpidr_hash_ptr: + /* + * offset of mpidr_hash symbol from current location + * used to obtain run-time mpidr_hash address with MMU off + */ + .quad mpidr_hash - . +/* + * physical address of identity mapped page tables + */ + .type sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) + .quad 0 +/* + * struct sleep_save_sp { + * phys_addr_t *save_ptr_stash; + * phys_addr_t save_ptr_stash_phys; + * }; + */ + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) + .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 000000000000..e074b1c32723 --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + * must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, + phys_addr_t *save_ptr) +{ + int cpu = smp_processor_id(); + + *save_ptr = virt_to_phys(ptr); + + cpu_do_suspend(ptr); + /* + * Only flush the context that must be retrieved with the MMU + * off. VA primitives ensure the flush is applied to all + * cache levels so context is pushed to DRAM. + */ + __flush_dcache_area(ptr, sizeof(*ptr)); + __flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + return cpu_ops[cpu]->cpu_suspend(arg); +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ + struct mm_struct *mm = current->active_mm; + int ret, cpu = smp_processor_id(); + unsigned long flags; + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + + /* + * From this point debug exceptions are disabled to prevent + * updates to mdscr register (saved and restored along with + * general purpose registers) from kernel debuggers. + */ + local_dbg_save(flags); + + /* + * mm context saved on the stack, it will be restored when + * the cpu comes out of reset through the identity mapped + * page tables, so that the thread address space is properly + * set-up on function return. + */ + ret = __cpu_suspend(arg); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + } + + /* + * Restore pstate flags. OS lock and mdscr have been already + * restored, so from this point onwards, debugging is fully + * renabled if it was enabled when core started shutdown. + */ + local_dbg_restore(flags); + + return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ + void *ctx_ptr; + + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sleep_idmap_phys = virt_to_phys(idmap_pg_dir); + __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); + __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + + return 0; +} +early_initcall(cpu_suspend_init); -- cgit v1.2.3 From fb1ab1ab3889fc23ed90e452502662311ebdf229 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 19 Jul 2013 17:48:08 +0100 Subject: arm64: kernel: implement fpsimd CPU PM notifier When a CPU enters a low power state, its FP register content is lost. This patch adds a notifier to save the FP context on CPU shutdown and restore it on CPU resume. The context is saved and restored only if the suspending thread is not a kernel thread, mirroring the current context switch behaviour. Signed-off-by: Lorenzo Pieralisi --- arch/arm64/kernel/fpsimd.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index bb785d23dbde..4aef42a04bdc 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -113,6 +114,39 @@ EXPORT_SYMBOL(kernel_neon_end); #endif /* CONFIG_KERNEL_MODE_NEON */ +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm) + fpsimd_save_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_EXIT: + if (current->mm) + fpsimd_load_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + /* * FP/SIMD support code initialisation. */ @@ -131,6 +165,8 @@ static int __init fpsimd_init(void) else elf_hwcap |= HWCAP_ASIMD; + fpsimd_pm_init(); + return 0; } late_initcall(fpsimd_init); -- cgit v1.2.3 From 2f04304587544dd14277413ebff12fa0f4fc932c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 13 Aug 2013 10:45:19 +0100 Subject: arm64: kernel: refactor code to install/uninstall breakpoints Most of the code executed to install and uninstall breakpoints is common and can be factored out in a function that through a runtime operations type provides the requested implementation. This patch creates a common function that can be used to install/uninstall breakpoints and defines the set of operations that can be carried out through it. Reviewed-by: Will Deacon Signed-off-by: Lorenzo Pieralisi --- arch/arm64/kernel/hw_breakpoint.c | 142 +++++++++++++++++++++++--------------- 1 file changed, 88 insertions(+), 54 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index ff516f6691e4..e89459123fa3 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -169,15 +169,63 @@ static enum debug_el debug_exception_level(int privilege) } } -/* - * Install a perf counter breakpoint. +enum hw_breakpoint_ops { + HW_BREAKPOINT_INSTALL, + HW_BREAKPOINT_UNINSTALL +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + * operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + * slot index on success + * -ENOSPC if no slot is available/matches + * -EINVAL on wrong operations parameter */ -int arch_install_hw_breakpoint(struct perf_event *bp) +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, + struct perf_event *bp, + enum hw_breakpoint_ops ops) +{ + int i; + struct perf_event **slot; + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + switch (ops) { + case HW_BREAKPOINT_INSTALL: + if (!*slot) { + *slot = bp; + return i; + } + break; + case HW_BREAKPOINT_UNINSTALL: + if (*slot == bp) { + *slot = NULL; + return i; + } + break; + default: + pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); + return -EINVAL; + } + } + return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, + enum hw_breakpoint_ops ops) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; + struct perf_event **slots; struct debug_info *debug_info = ¤t->thread.debug; int i, max_slots, ctrl_reg, val_reg, reg_enable; + enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); u32 ctrl; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { @@ -196,67 +244,53 @@ int arch_install_hw_breakpoint(struct perf_event *bp) reg_enable = !debug_info->wps_disabled; } - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; + i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); - if (!*slot) { - *slot = bp; - break; - } - } + if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) + return i; - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return -ENOSPC; + switch (ops) { + case HW_BREAKPOINT_INSTALL: + /* + * Ensure debug monitors are enabled at the correct exception + * level. + */ + enable_debug_monitors(dbg_el); - /* Ensure debug monitors are enabled at the correct exception level. */ - enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); - /* Setup the address register. */ - write_wb_reg(val_reg, i, info->address); + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, + reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + break; + case HW_BREAKPOINT_UNINSTALL: + /* Reset the control register. */ + write_wb_reg(ctrl_reg, i, 0); - /* Setup the control register. */ - ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + /* + * Release the debug monitors for the correct exception + * level. + */ + disable_debug_monitors(dbg_el); + break; + } return 0; } -void arch_uninstall_hw_breakpoint(struct perf_event *bp) +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; - int i, max_slots, base; - - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - /* Breakpoint */ - base = AARCH64_DBG_REG_BCR; - slots = this_cpu_ptr(bp_on_reg); - max_slots = core_num_brps; - } else { - /* Watchpoint */ - base = AARCH64_DBG_REG_WCR; - slots = this_cpu_ptr(wp_on_reg); - max_slots = core_num_wrps; - } - - /* Remove the breakpoint. */ - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return; - - /* Reset the control register. */ - write_wb_reg(base, i, 0); + return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} - /* Release the debug monitors for the correct exception level. */ - disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL); } static int get_hbp_len(u8 hbp_len) -- cgit v1.2.3 From 60fc6942f6ac124503ed7f8506736a8f56b4ca7e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 5 Aug 2013 15:20:35 +0100 Subject: arm64: kernel: implement HW breakpoints CPU PM notifier When a CPU is shutdown either through CPU idle or suspend to RAM, the content of HW breakpoint registers must be reset or restored to proper values when CPU resume from low power states. This patch adds debug register restore operations to the HW breakpoint control function and implements a CPU PM notifier that allows to restore the content of HW breakpoint registers to allow proper suspend/resume operations. Signed-off-by: Lorenzo Pieralisi --- arch/arm64/kernel/hw_breakpoint.c | 79 ++++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index e89459123fa3..bcaaac9e14d6 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,6 +20,7 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include #include #include #include @@ -171,7 +172,8 @@ static enum debug_el debug_exception_level(int privilege) enum hw_breakpoint_ops { HW_BREAKPOINT_INSTALL, - HW_BREAKPOINT_UNINSTALL + HW_BREAKPOINT_UNINSTALL, + HW_BREAKPOINT_RESTORE }; /** @@ -210,6 +212,10 @@ static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, return i; } break; + case HW_BREAKPOINT_RESTORE: + if (*slot == bp) + return i; + break; default: pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); return -EINVAL; @@ -256,7 +262,8 @@ static int hw_breakpoint_control(struct perf_event *bp, * level. */ enable_debug_monitors(dbg_el); - + /* Fall through */ + case HW_BREAKPOINT_RESTORE: /* Setup the address register. */ write_wb_reg(val_reg, i, info->address); @@ -840,18 +847,36 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void reset_ctrl_regs(void *unused) +static void hw_breakpoint_reset(void *unused) { int i; - - for (i = 0; i < core_num_brps; ++i) { - write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + struct perf_event **slots; + /* + * When a CPU goes through cold-boot, it does not have any installed + * slot, so it is safe to share the same function for restoring and + * resetting breakpoints; when a CPU is hotplugged in, it goes + * through the slots, which are all empty, hence it just resets control + * and value for debug registers. + * When this function is triggered on warm-boot through a CPU PM + * notifier some slots might be initialized; if so they are + * reprogrammed according to the debug slots content. + */ + for (slots = this_cpu_ptr(bp_on_reg), i = 0; i < core_num_brps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } } - for (i = 0; i < core_num_wrps; ++i) { - write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + for (slots = this_cpu_ptr(wp_on_reg), i = 0; i < core_num_wrps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } } } @@ -861,7 +886,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, { int cpu = (long)hcpu; if (action == CPU_ONLINE) - smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } @@ -869,6 +894,33 @@ static struct notifier_block hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; +#ifdef CONFIG_CPU_PM +static int hw_breakpoint_cpu_pm_notify(struct notifier_block *self, + unsigned long action, + void *v) +{ + if (action == CPU_PM_EXIT) { + hw_breakpoint_reset(NULL); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static struct notifier_block hw_breakpoint_cpu_pm_nb = { + .notifier_call = hw_breakpoint_cpu_pm_notify, +}; + +static void __init hw_breakpoint_pm_init(void) +{ + cpu_pm_register_notifier(&hw_breakpoint_cpu_pm_nb); +} +#else +static inline void hw_breakpoint_pm_init(void) +{ +} +#endif + /* * One-time initialisation. */ @@ -884,8 +936,8 @@ static int __init arch_hw_breakpoint_init(void) * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ - smp_call_function(reset_ctrl_regs, NULL, 1); - reset_ctrl_regs(NULL); + smp_call_function(hw_breakpoint_reset, NULL, 1); + hw_breakpoint_reset(NULL); /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, @@ -895,6 +947,7 @@ static int __init arch_hw_breakpoint_init(void) /* Register hotplug notifier. */ register_cpu_notifier(&hw_breakpoint_reset_nb); + hw_breakpoint_pm_init(); return 0; } -- cgit v1.2.3 From 1f85008e74768a88e1ddb96cc1fe45bb2378166c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 4 Sep 2013 10:55:17 +0100 Subject: arm64: enable generic clockevent broadcast On platforms with power management capabilities, timers that are shut down when a CPU enters deep C-states must be emulated using an always-on timer and a timer IPI to relay the timer IRQ to target CPUs on an SMP system. This patch enables the generic clockevents broadcast infrastructure for arm64, by providing the required Kconfig entries and adding the timer IPI infrastructure. Acked-by: Daniel Lezcano Signed-off-by: Lorenzo Pieralisi --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/kernel/smp.c | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d4dd22ee4b7..baab4eb8bfe4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2,6 +2,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS @@ -12,6 +13,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 990c051e7829..ae4801d77514 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 4 +#define NR_IPI 5 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a0c2ca602cf8..0b8c859e744f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -61,6 +61,7 @@ enum ipi_msg_type { IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_TIMER, }; /* @@ -447,6 +448,7 @@ static const char *ipi_types[NR_IPI] = { S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_TIMER, "Timer broadcast interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -532,6 +534,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + irq_enter(); + tick_receive_broadcast(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -544,6 +554,13 @@ void smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_TIMER); +} +#endif + void smp_send_stop(void) { unsigned long timeout; -- cgit v1.2.3 From b8824dfe1b3fe1d4ef3d7ee78a071b8290d3b153 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 17 Jul 2013 10:12:24 +0100 Subject: arm64: kernel: add CPU idle call When CPU idle is enabled, the architectural idle call should go through the idle subsystem to allow CPUs to enter idle states defined by the platform CPU idle back-end operations. This patch, mirroring other archs behaviour, adds the CPU idle call to the architectural arch_cpu_idle implementation for arm64. Acked-by: Daniel Lezcano Signed-off-by: Lorenzo Pieralisi --- arch/arm64/kernel/process.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index de17c89985db..50491ec4de34 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -98,8 +99,10 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - cpu_do_idle(); - local_irq_enable(); + if (cpuidle_idle_call()) { + cpu_do_idle(); + local_irq_enable(); + } } #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3 From 166936bace056dfc11452d794209f39a5e9b0fb4 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 7 Nov 2013 18:37:14 +0000 Subject: arm64: kernel: add PM build infrastructure This patch adds the required makefile and kconfig entries to enable PM for arm64 systems. The kernel relies on the cpu_{suspend}/{resume} infrastructure to properly save the context for a CPU and put it to sleep, hence this patch adds the config option required to enable cpu_{suspend}/{resume} API. In order to rely on the CPU PM implementation for saving and restoring of CPU subsystems like GIC and PMU, the arch Kconfig must be also augmented to select the CONFIG_CPU_PM option when SUSPEND or CPU_IDLE kernel implementations are selected. Signed-off-by: Lorenzo Pieralisi --- arch/arm64/Kconfig | 13 +++++++++++++ arch/arm64/kernel/Makefile | 1 + 2 files changed, 14 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index baab4eb8bfe4..a1c8f39cb009 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -12,6 +12,7 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select CPU_PM if (SUSPEND || CPU_IDLE) select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_IOMAP @@ -277,6 +278,18 @@ config SYSVIPC_COMPAT endmenu +menu "Power management options" + +source "kernel/power/Kconfig" + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +config ARM64_CPU_SUSPEND + def_bool PM_SLEEP + +endmenu + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 5ba2fd43a75b..1cd339d5037b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) -- cgit v1.2.3