summaryrefslogtreecommitdiff
path: root/arch/s390/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/lib')
-rw-r--r--arch/s390/lib/mem.S64
-rw-r--r--arch/s390/lib/spinlock.c343
-rw-r--r--arch/s390/lib/string.c28
3 files changed, 267 insertions, 168 deletions
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index d66751397e72..495c9c4bacc7 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -79,21 +79,25 @@ ENTRY(memset)
ex %r4,0(%r3)
br %r14
.Lmemset_fill:
- stc %r3,0(%r2)
cghi %r4,1
lgr %r1,%r2
- ber %r14
+ je .Lmemset_fill_exit
aghi %r4,-2
- srlg %r3,%r4,8
- ltgr %r3,%r3
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
jz .Lmemset_fill_remainder
.Lmemset_fill_loop:
- mvc 1(256,%r1),0(%r1)
+ stc %r3,0(%r1)
+ mvc 1(255,%r1),0(%r1)
la %r1,256(%r1)
- brctg %r3,.Lmemset_fill_loop
+ brctg %r5,.Lmemset_fill_loop
.Lmemset_fill_remainder:
- larl %r3,.Lmemset_mvc
- ex %r4,0(%r3)
+ stc %r3,0(%r1)
+ larl %r5,.Lmemset_mvc
+ ex %r4,0(%r5)
+ br %r14
+.Lmemset_fill_exit:
+ stc %r3,0(%r1)
br %r14
.Lmemset_xc:
xc 0(1,%r1),0(%r1)
@@ -127,3 +131,47 @@ ENTRY(memcpy)
.Lmemcpy_mvc:
mvc 0(1,%r1),0(%r3)
EXPORT_SYMBOL(memcpy)
+
+/*
+ * __memset16/32/64
+ *
+ * void *__memset16(uint16_t *s, uint16_t v, size_t count)
+ * void *__memset32(uint32_t *s, uint32_t v, size_t count)
+ * void *__memset64(uint64_t *s, uint64_t v, size_t count)
+ */
+.macro __MEMSET bits,bytes,insn
+ENTRY(__memset\bits)
+ ltgr %r4,%r4
+ bzr %r14
+ cghi %r4,\bytes
+ je .L__memset_exit\bits
+ aghi %r4,-(\bytes+1)
+ srlg %r5,%r4,8
+ ltgr %r5,%r5
+ lgr %r1,%r2
+ jz .L__memset_remainder\bits
+.L__memset_loop\bits:
+ \insn %r3,0(%r1)
+ mvc \bytes(256-\bytes,%r1),0(%r1)
+ la %r1,256(%r1)
+ brctg %r5,.L__memset_loop\bits
+.L__memset_remainder\bits:
+ \insn %r3,0(%r1)
+ larl %r5,.L__memset_mvc\bits
+ ex %r4,0(%r5)
+ br %r14
+.L__memset_exit\bits:
+ \insn %r3,0(%r2)
+ br %r14
+.L__memset_mvc\bits:
+ mvc \bytes(1,%r1),0(%r1)
+.endm
+
+__MEMSET 16,2,sth
+EXPORT_SYMBOL(__memset16)
+
+__MEMSET 32,4,st
+EXPORT_SYMBOL(__memset32)
+
+__MEMSET 64,8,stg
+EXPORT_SYMBOL(__memset64)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 2d0af8667695..84c0faeaf7ea 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -9,8 +9,11 @@
#include <linux/types.h>
#include <linux/export.h>
#include <linux/spinlock.h>
+#include <linux/jiffies.h>
#include <linux/init.h>
#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <asm/alternative.h>
#include <asm/io.h>
int spin_retry = -1;
@@ -33,14 +36,46 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
+struct spin_wait {
+ struct spin_wait *next, *prev;
+ int node_id;
+} __aligned(32);
+
+static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]);
+
+#define _Q_LOCK_CPU_OFFSET 0
+#define _Q_LOCK_STEAL_OFFSET 16
+#define _Q_TAIL_IDX_OFFSET 18
+#define _Q_TAIL_CPU_OFFSET 20
+
+#define _Q_LOCK_CPU_MASK 0x0000ffff
+#define _Q_LOCK_STEAL_ADD 0x00010000
+#define _Q_LOCK_STEAL_MASK 0x00030000
+#define _Q_TAIL_IDX_MASK 0x000c0000
+#define _Q_TAIL_CPU_MASK 0xfff00000
+
+#define _Q_LOCK_MASK (_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK)
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+void arch_spin_lock_setup(int cpu)
+{
+ struct spin_wait *node;
+ int ix;
+
+ node = per_cpu_ptr(&spin_wait[0], cpu);
+ for (ix = 0; ix < 4; ix++, node++) {
+ memset(node, 0, sizeof(*node));
+ node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) +
+ (ix << _Q_TAIL_IDX_OFFSET);
+ }
+}
+
static inline int arch_load_niai4(int *lock)
{
int owner;
asm volatile(
-#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
- " .long 0xb2fa0040\n" /* NIAI 4 */
-#endif
+ ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
return owner;
@@ -51,9 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
int expected = old;
asm volatile(
-#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
- " .long 0xb2fa0080\n" /* NIAI 8 */
-#endif
+ ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
@@ -61,75 +94,160 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
return expected == old;
}
-void arch_spin_lock_wait(arch_spinlock_t *lp)
+static inline struct spin_wait *arch_spin_decode_tail(int lock)
{
- int cpu = SPINLOCK_LOCKVAL;
- int owner, count;
+ int ix, cpu;
+
+ ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+ cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET;
+ return per_cpu_ptr(&spin_wait[ix], cpu - 1);
+}
+
+static inline int arch_spin_yield_target(int lock, struct spin_wait *node)
+{
+ if (lock & _Q_LOCK_CPU_MASK)
+ return lock & _Q_LOCK_CPU_MASK;
+ if (node == NULL || node->prev == NULL)
+ return 0; /* 0 -> no target cpu */
+ while (node->prev)
+ node = node->prev;
+ return node->node_id >> _Q_TAIL_CPU_OFFSET;
+}
+
+static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
+{
+ struct spin_wait *node, *next;
+ int lockval, ix, node_id, tail_id, old, new, owner, count;
+
+ ix = S390_lowcore.spinlock_index++;
+ barrier();
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ node = this_cpu_ptr(&spin_wait[ix]);
+ node->prev = node->next = NULL;
+ node_id = node->node_id;
+
+ /* Enqueue the node for this CPU in the spinlock wait queue */
+ while (1) {
+ old = READ_ONCE(lp->lock);
+ if ((old & _Q_LOCK_CPU_MASK) == 0 &&
+ (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
+ /*
+ * The lock is free but there may be waiters.
+ * With no waiters simply take the lock, if there
+ * are waiters try to steal the lock. The lock may
+ * be stolen three times before the next queued
+ * waiter will get the lock.
+ */
+ new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ /* Got the lock */
+ goto out;
+ /* lock passing in progress */
+ continue;
+ }
+ /* Make the node of this CPU the new tail. */
+ new = node_id | (old & _Q_LOCK_MASK);
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ break;
+ }
+ /* Set the 'next' pointer of the tail node in the queue */
+ tail_id = old & _Q_TAIL_MASK;
+ if (tail_id != 0) {
+ node->prev = arch_spin_decode_tail(tail_id);
+ WRITE_ONCE(node->prev->next, node);
+ }
/* Pass the virtual CPU to the lock holder if it is not running */
- owner = arch_load_niai4(&lp->lock);
- if (owner && arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
+ owner = arch_spin_yield_target(old, node);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+ /* Spin on the CPU local node->prev pointer */
+ if (tail_id != 0) {
+ count = spin_retry;
+ while (READ_ONCE(node->prev) != NULL) {
+ if (count-- >= 0)
+ continue;
+ count = spin_retry;
+ /* Query running state of lock holder again. */
+ owner = arch_spin_yield_target(old, node);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+ }
+ }
+
+ /* Spin on the lock value in the spinlock_t */
count = spin_retry;
while (1) {
- owner = arch_load_niai4(&lp->lock);
- /* Try to get the lock if it is free. */
+ old = READ_ONCE(lp->lock);
+ owner = old & _Q_LOCK_CPU_MASK;
if (!owner) {
- if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
- return;
+ tail_id = old & _Q_TAIL_MASK;
+ new = ((tail_id != node_id) ? tail_id : 0) | lockval;
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ /* Got the lock */
+ break;
continue;
}
if (count-- >= 0)
continue;
count = spin_retry;
- /*
- * For multiple layers of hypervisors, e.g. z/VM + LPAR
- * yield the CPU unconditionally. For LPAR rely on the
- * sense running status.
- */
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
}
+
+ /* Pass lock_spin job to next CPU in the queue */
+ if (node_id && tail_id != node_id) {
+ /* Wait until the next CPU has set up the 'next' pointer */
+ while ((next = READ_ONCE(node->next)) == NULL)
+ ;
+ next->prev = NULL;
+ }
+
+ out:
+ S390_lowcore.spinlock_index--;
}
-EXPORT_SYMBOL(arch_spin_lock_wait);
-void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
+static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
{
- int cpu = SPINLOCK_LOCKVAL;
- int owner, count;
+ int lockval, old, new, owner, count;
- local_irq_restore(flags);
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
/* Pass the virtual CPU to the lock holder if it is not running */
- owner = arch_load_niai4(&lp->lock);
- if (owner && arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
+ owner = arch_spin_yield_target(READ_ONCE(lp->lock), NULL);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
count = spin_retry;
while (1) {
- owner = arch_load_niai4(&lp->lock);
+ old = arch_load_niai4(&lp->lock);
+ owner = old & _Q_LOCK_CPU_MASK;
/* Try to get the lock if it is free. */
if (!owner) {
- local_irq_disable();
- if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
- return;
- local_irq_restore(flags);
+ new = (old & _Q_TAIL_MASK) | lockval;
+ if (arch_cmpxchg_niai8(&lp->lock, old, new))
+ /* Got the lock */
+ return;
continue;
}
if (count-- >= 0)
continue;
count = spin_retry;
- /*
- * For multiple layers of hypervisors, e.g. z/VM + LPAR
- * yield the CPU unconditionally. For LPAR rely on the
- * sense running status.
- */
- if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
+ if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
}
}
-EXPORT_SYMBOL(arch_spin_lock_wait_flags);
+
+void arch_spin_lock_wait(arch_spinlock_t *lp)
+{
+ /* Use classic spinlocks + niai if the steal time is >= 10% */
+ if (test_cpu_flag(CIF_DEDICATED_CPU))
+ arch_spin_lock_queued(lp);
+ else
+ arch_spin_lock_classic(lp);
+}
+EXPORT_SYMBOL(arch_spin_lock_wait);
int arch_spin_trylock_retry(arch_spinlock_t *lp)
{
@@ -148,126 +266,59 @@ int arch_spin_trylock_retry(arch_spinlock_t *lp)
}
EXPORT_SYMBOL(arch_spin_trylock_retry);
-void _raw_read_lock_wait(arch_rwlock_t *rw)
+void arch_read_lock_wait(arch_rwlock_t *rw)
{
- int count = spin_retry;
- int owner, old;
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
-#endif
- owner = 0;
- while (1) {
- if (count-- <= 0) {
- if (owner && arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
- count = spin_retry;
- }
- old = READ_ONCE(rw->lock);
- owner = READ_ONCE(rw->owner);
- if (old < 0)
- continue;
- if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
- return;
+ if (unlikely(in_interrupt())) {
+ while (READ_ONCE(rw->cnts) & 0x10000)
+ barrier();
+ return;
}
+
+ /* Remove this reader again to allow recursive read locking */
+ __atomic_add_const(-1, &rw->cnts);
+ /* Put the reader into the wait queue */
+ arch_spin_lock(&rw->wait);
+ /* Now add this reader to the count value again */
+ __atomic_add_const(1, &rw->cnts);
+ /* Loop until the writer is done */
+ while (READ_ONCE(rw->cnts) & 0x10000)
+ barrier();
+ arch_spin_unlock(&rw->wait);
}
-EXPORT_SYMBOL(_raw_read_lock_wait);
+EXPORT_SYMBOL(arch_read_lock_wait);
-int _raw_read_trylock_retry(arch_rwlock_t *rw)
+void arch_write_lock_wait(arch_rwlock_t *rw)
{
- int count = spin_retry;
int old;
- while (count-- > 0) {
- old = READ_ONCE(rw->lock);
- if (old < 0)
- continue;
- if (__atomic_cmpxchg_bool(&rw->lock, old, old + 1))
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(_raw_read_trylock_retry);
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ /* Add this CPU to the write waiters */
+ __atomic_add(0x20000, &rw->cnts);
-void _raw_write_lock_wait(arch_rwlock_t *rw, int prev)
-{
- int count = spin_retry;
- int owner, old;
+ /* Put the writer into the wait queue */
+ arch_spin_lock(&rw->wait);
- owner = 0;
while (1) {
- if (count-- <= 0) {
- if (owner && arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
- count = spin_retry;
- }
- old = READ_ONCE(rw->lock);
- owner = READ_ONCE(rw->owner);
- smp_mb();
- if (old >= 0) {
- prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
- old = prev;
- }
- if ((old & 0x7fffffff) == 0 && prev >= 0)
+ old = READ_ONCE(rw->cnts);
+ if ((old & 0x1ffff) == 0 &&
+ __atomic_cmpxchg_bool(&rw->cnts, old, old | 0x10000))
+ /* Got the lock */
break;
+ barrier();
}
-}
-EXPORT_SYMBOL(_raw_write_lock_wait);
-
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-void _raw_write_lock_wait(arch_rwlock_t *rw)
-{
- int count = spin_retry;
- int owner, old, prev;
- prev = 0x80000000;
- owner = 0;
- while (1) {
- if (count-- <= 0) {
- if (owner && arch_vcpu_is_preempted(~owner))
- smp_yield_cpu(~owner);
- count = spin_retry;
- }
- old = READ_ONCE(rw->lock);
- owner = READ_ONCE(rw->owner);
- if (old >= 0 &&
- __atomic_cmpxchg_bool(&rw->lock, old, old | 0x80000000))
- prev = old;
- else
- smp_mb();
- if ((old & 0x7fffffff) == 0 && prev >= 0)
- break;
- }
+ arch_spin_unlock(&rw->wait);
}
-EXPORT_SYMBOL(_raw_write_lock_wait);
-
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+EXPORT_SYMBOL(arch_write_lock_wait);
-int _raw_write_trylock_retry(arch_rwlock_t *rw)
+void arch_spin_relax(arch_spinlock_t *lp)
{
- int count = spin_retry;
- int old;
+ int cpu;
- while (count-- > 0) {
- old = READ_ONCE(rw->lock);
- if (old)
- continue;
- if (__atomic_cmpxchg_bool(&rw->lock, 0, 0x80000000))
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL(_raw_write_trylock_retry);
-
-void arch_lock_relax(int cpu)
-{
+ cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
if (!cpu)
return;
- if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(~cpu))
+ if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
return;
- smp_yield_cpu(~cpu);
+ smp_yield_cpu(cpu - 1);
}
-EXPORT_SYMBOL(arch_lock_relax);
+EXPORT_SYMBOL(arch_spin_relax);
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index dbf2fdad2724..a10e11f7a5f7 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL(strlen);
*
* returns the minimum of the length of @s and @n
*/
-size_t strnlen(const char * s, size_t n)
+size_t strnlen(const char *s, size_t n)
{
return __strnend(s, n) - s;
}
@@ -195,14 +195,14 @@ EXPORT_SYMBOL(strncat);
/**
* strcmp - Compare two strings
- * @cs: One string
- * @ct: Another string
+ * @s1: One string
+ * @s2: Another string
*
- * returns 0 if @cs and @ct are equal,
- * < 0 if @cs is less than @ct
- * > 0 if @cs is greater than @ct
+ * returns 0 if @s1 and @s2 are equal,
+ * < 0 if @s1 is less than @s2
+ * > 0 if @s1 is greater than @s2
*/
-int strcmp(const char *cs, const char *ct)
+int strcmp(const char *s1, const char *s2)
{
register int r0 asm("0") = 0;
int ret = 0;
@@ -214,7 +214,7 @@ int strcmp(const char *cs, const char *ct)
" ic %1,0(%3)\n"
" sr %0,%1\n"
"1:"
- : "+d" (ret), "+d" (r0), "+a" (cs), "+a" (ct)
+ : "+d" (ret), "+d" (r0), "+a" (s1), "+a" (s2)
: : "cc", "memory");
return ret;
}
@@ -225,7 +225,7 @@ EXPORT_SYMBOL(strcmp);
* @s: The string to be searched
* @c: The character to search for
*/
-char * strrchr(const char * s, int c)
+char *strrchr(const char *s, int c)
{
size_t len = __strend(s) - s;
@@ -261,7 +261,7 @@ static inline int clcle(const char *s1, unsigned long l1,
* @s1: The string to be searched
* @s2: The string to search for
*/
-char * strstr(const char * s1,const char * s2)
+char *strstr(const char *s1, const char *s2)
{
int l1, l2;
@@ -307,15 +307,15 @@ EXPORT_SYMBOL(memchr);
/**
* memcmp - Compare two areas of memory
- * @cs: One area of memory
- * @ct: Another area of memory
+ * @s1: One area of memory
+ * @s2: Another area of memory
* @count: The size of the area.
*/
-int memcmp(const void *cs, const void *ct, size_t n)
+int memcmp(const void *s1, const void *s2, size_t n)
{
int ret;
- ret = clcle(cs, n, ct, n);
+ ret = clcle(s1, n, s2, n);
if (ret)
ret = ret == 1 ? -1 : 1;
return ret;