diff options
Diffstat (limited to 'arch/tile/lib/atomic_asm_32.S')
-rw-r--r-- | arch/tile/lib/atomic_asm_32.S | 205 |
1 files changed, 0 insertions, 205 deletions
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S deleted file mode 100644 index 94709ab41ed8..000000000000 --- a/arch/tile/lib/atomic_asm_32.S +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Support routines for atomic operations. Each function takes: - * - * r0: address to manipulate - * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) - * r2: new value to write, or for cmpxchg/add_unless, value to compare against - * r3: (cmpxchg/xchg_add_unless) new value to write or add; - * (atomic64 ops) high word of value to write - * r4/r5: (cmpxchg64/add_unless64) new value to write or add - * - * The 32-bit routines return a "struct __get_user" so that the futex code - * has an opportunity to return -EFAULT to the user if needed. - * The 64-bit routines just return a "long long" with the value, - * since they are only used from kernel space and don't expect to fault. - * Support for 16-bit ops is included in the framework but we don't provide any. - * - * Note that the caller is advised to issue a suitable L1 or L2 - * prefetch on the address being manipulated to avoid extra stalls. - * In addition, the hot path is on two icache lines, and we start with - * a jump to the second line to make sure they are both in cache so - * that we never stall waiting on icache fill while holding the lock. - * (This doesn't work out with most 64-bit ops, since they consume - * too many bundles, so may take an extra i-cache stall.) - * - * These routines set the INTERRUPT_CRITICAL_SECTION bit, just - * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt - * the code, just page faults. - * - * If the load or store faults in a way that can be directly fixed in - * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it - * directly, return to the instruction that faulted, and retry it. - * - * If the load or store faults in a way that potentially requires us - * to release the atomic lock, then retry (e.g. a migrating PTE), we - * reset the PC in do_page_fault_ics() to the "tns" instruction so - * that on return we will reacquire the lock and restart the op. We - * are somewhat overloading the exception_table_entry notion by doing - * this, since those entries are not normally used for migrating PTEs. - * - * If the main page fault handler discovers a bad address, it will see - * the PC pointing to the "tns" instruction (due to the earlier - * exception_table_entry processing in do_page_fault_ics), and - * re-reset the PC to the fault handler, atomic_bad_address(), which - * effectively takes over from the atomic op and can either return a - * bad "struct __get_user" (for user addresses) or can just panic (for - * bad kernel addresses). - * - * Note that if the value we would store is the same as what we - * loaded, we bypass the store. Other platforms with true atomics can - * make the guarantee that a non-atomic __clear_bit(), for example, - * can safely race with an atomic test_and_set_bit(); this example is - * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do - * that on Tile since the "atomic" op is really just a - * read/modify/write, and can race with the non-atomic - * read/modify/write. However, if we can short-circuit the write when - * it is not needed, in the atomic case, we avoid the race. - */ - -#include <linux/linkage.h> -#include <asm/atomic_32.h> -#include <asm/page.h> -#include <asm/processor.h> - - .section .text.atomic,"ax" -ENTRY(__start_atomic_asm_code) - - .macro atomic_op, name, bitwidth, body - .align 64 -STD_ENTRY_SECTION(__atomic\name, .text.atomic) - { - movei r24, 1 - j 4f /* branch to second cache line */ - } -1: { - .ifc \bitwidth,16 - lh r22, r0 - .else - lw r22, r0 - addi r28, r0, 4 - .endif - } - .ifc \bitwidth,64 - lw r23, r28 - .endif - \body /* set r24, and r25 if 64-bit */ - { - seq r26, r22, r24 - seq r27, r23, r25 - } - .ifc \bitwidth,64 - bbnst r27, 2f - .endif - bbs r26, 3f /* skip write-back if it's the same value */ -2: { - .ifc \bitwidth,16 - sh r0, r24 - .else - sw r0, r24 - .endif - } - .ifc \bitwidth,64 - sw r28, r25 - .endif - mf -3: { - move r0, r22 - .ifc \bitwidth,64 - move r1, r23 - .else - move r1, zero - .endif - sw ATOMIC_LOCK_REG_NAME, zero - } - mtspr INTERRUPT_CRITICAL_SECTION, zero - jrp lr -4: { - move ATOMIC_LOCK_REG_NAME, r1 - mtspr INTERRUPT_CRITICAL_SECTION, r24 - } -#ifndef CONFIG_SMP - j 1b /* no atomic locks */ -#else - { - tns r21, ATOMIC_LOCK_REG_NAME - moveli r23, 2048 /* maximum backoff time in cycles */ - } - { - bzt r21, 1b /* branch if lock acquired */ - moveli r25, 32 /* starting backoff time in cycles */ - } -5: mtspr INTERRUPT_CRITICAL_SECTION, zero - mfspr r26, CYCLE_LOW /* get start point for this backoff */ -6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ - sub r22, r22, r26 - slt r22, r22, r25 - bbst r22, 6b - { - mtspr INTERRUPT_CRITICAL_SECTION, r24 - shli r25, r25, 1 /* double the backoff; retry the tns */ - } - { - tns r21, ATOMIC_LOCK_REG_NAME - slt r26, r23, r25 /* is the proposed backoff too big? */ - } - { - bzt r21, 1b /* branch if lock acquired */ - mvnz r25, r26, r23 - } - j 5b -#endif - STD_ENDPROC(__atomic\name) - .ifc \bitwidth,32 - .pushsection __ex_table,"a" - .align 4 - .word 1b, __atomic\name - .word 2b, __atomic\name - .word __atomic\name, __atomic_bad_address - .popsection - .endif - .endm - - -/* - * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions. - */ - -atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" -atomic_op 32_xchg, 32, "move r24, r2" -atomic_op 32_xchg_add, 32, "add r24, r22, r2" -atomic_op 32_xchg_add_unless, 32, \ - "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" -atomic_op 32_fetch_or, 32, "or r24, r22, r2" -atomic_op 32_fetch_and, 32, "and r24, r22, r2" -atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2" -atomic_op 32_fetch_xor, 32, "xor r24, r22, r2" - -atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ - { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" -atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }" -atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \ - slt_u r26, r24, r22; add r25, r25, r26" -atomic_op 64_xchg_add_unless, 64, \ - "{ sne r26, r22, r2; sne r27, r23, r3 }; \ - { bbns r26, 3f; add r24, r22, r4 }; \ - { bbns r27, 3f; add r25, r23, r5 }; \ - slt_u r26, r24, r22; add r25, r25, r26" -atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }" -atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }" -atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }" - - jrp lr /* happy backtracer */ - -ENTRY(__end_atomic_asm_code) |