diff options
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/memchr_32.c | 35 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_tile64.c | 11 | ||||
-rw-r--r-- | arch/tile/lib/spinlock_32.c | 29 |
3 files changed, 45 insertions, 30 deletions
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c index 6235283b4859..cc3d9badf030 100644 --- a/arch/tile/lib/memchr_32.c +++ b/arch/tile/lib/memchr_32.c @@ -18,12 +18,24 @@ void *memchr(const void *s, int c, size_t n) { + const uint32_t *last_word_ptr; + const uint32_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint32_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect(n == 0, 0)) { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + /* Get an aligned pointer. */ - const uintptr_t s_int = (uintptr_t) s; - const uint32_t *p = (const uint32_t *)(s_int & -4); + s_int = (uintptr_t) s; + p = (const uint32_t *)(s_int & -4); /* Create four copies of the byte for which we are looking. */ - const uint32_t goal = 0x01010101 * (uint8_t) c; + goal = 0x01010101 * (uint8_t) c; /* Read the first word, but munge it so that bytes before the array * will not match goal. @@ -31,23 +43,14 @@ void *memchr(const void *s, int c, size_t n) * Note that this shift count expression works because we know * shift counts are taken mod 32. */ - const uint32_t before_mask = (1 << (s_int << 3)) - 1; - uint32_t v = (*p | before_mask) ^ (goal & before_mask); + before_mask = (1 << (s_int << 3)) - 1; + v = (*p | before_mask) ^ (goal & before_mask); /* Compute the address of the last byte. */ - const char *const last_byte_ptr = (const char *)s + n - 1; + last_byte_ptr = (const char *)s + n - 1; /* Compute the address of the word containing the last byte. */ - const uint32_t *const last_word_ptr = - (const uint32_t *)((uintptr_t) last_byte_ptr & -4); - - uint32_t bits; - char *ret; - - if (__builtin_expect(n == 0, 0)) { - /* Don't dereference any memory if the array is empty. */ - return NULL; - } + last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4); while ((bits = __insn_seqb(v, goal)) == 0) { if (__builtin_expect(p == last_word_ptr, 0)) { diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index dfedea7b266b..f7d4a6ad61e8 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c @@ -54,7 +54,7 @@ typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); * we must run with interrupts disabled to avoid the risk of some * other code seeing the incoherent data in our cache. (Recall that * our cache is indexed by PA, so even if the other code doesn't use - * our KM_MEMCPY virtual addresses, they'll still hit in cache using + * our kmap_atomic virtual addresses, they'll still hit in cache using * the normal VAs that aren't supposed to hit in cache.) */ static void memcpy_multicache(void *dest, const void *source, @@ -64,6 +64,7 @@ static void memcpy_multicache(void *dest, const void *source, unsigned long flags, newsrc, newdst; pmd_t *pmdp; pte_t *ptep; + int type0, type1; int cpu = get_cpu(); /* @@ -77,7 +78,8 @@ static void memcpy_multicache(void *dest, const void *source, sim_allow_multiple_caching(1); /* Set up the new dest mapping */ - idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + KM_MEMCPY0; + type0 = kmap_atomic_idx_push(); + idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); ptep = pte_offset_kernel(pmdp, newdst); @@ -87,7 +89,8 @@ static void memcpy_multicache(void *dest, const void *source, } /* Set up the new source mapping */ - idx += (KM_MEMCPY0 - KM_MEMCPY1); + type1 = kmap_atomic_idx_push(); + idx += (type0 - type1); src_pte = hv_pte_set_nc(src_pte); src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); @@ -119,6 +122,8 @@ static void memcpy_multicache(void *dest, const void *source, * We're done: notify the simulator that all is back to normal, * and re-enable interrupts and pre-emption. */ + kmap_atomic_idx_pop(); + kmap_atomic_idx_pop(); sim_allow_multiple_caching(0); local_irq_restore(flags); put_cpu(); diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 485e24d62c6b..5cd1c4004eca 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) * when we compare them. */ u32 my_ticket_; + u32 iterations = 0; - /* Take out the next ticket; this will also stop would-be readers. */ - if (val & 1) - val = get_rwlock(rwlock); - rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); + /* + * Wait until there are no readers, then bump up the next + * field and capture the ticket value. + */ + for (;;) { + if (!(val & 1)) { + if ((val >> RD_COUNT_SHIFT) == 0) + break; + rwlock->lock = val; + } + delay_backoff(iterations++); + val = __insn_tns((int *)&rwlock->lock); + } - /* Extract my ticket value from the original word. */ + /* Take out the next ticket and extract my ticket value. */ + rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); my_ticket_ = val >> WR_NEXT_SHIFT; - /* - * Wait until the "current" field matches our ticket, and - * there are no remaining readers. - */ + /* Wait until the "current" field matches our ticket. */ for (;;) { u32 curr_ = val >> WR_CURR_SHIFT; - u32 readers = val >> RD_COUNT_SHIFT; - u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers; + u32 delta = ((my_ticket_ - curr_) & WR_MASK); if (likely(delta == 0)) break; |