diff options
author | leyfoon.tan <leyfoon.tan@starfivetech.com> | 2024-02-27 05:22:01 +0300 |
---|---|---|
committer | leyfoon.tan <leyfoon.tan@starfivetech.com> | 2024-02-27 05:22:01 +0300 |
commit | 200f10e0ed6d5767c1774f3bd842016d4d8c860d (patch) | |
tree | a871121475c83c3c158916d79f39a2dc23571387 | |
parent | 65fbb1ef216996b5245fca7c774147d6196cdd7f (diff) | |
parent | f8e99b19804b80aaeb3c328fde07a208b2af1227 (diff) | |
download | linux-200f10e0ed6d5767c1774f3bd842016d4d8c860d.tar.xz |
Merge branch 'starfive-6.6.10-dubhe-add-zicbop' into 'starfive-6.6.10-dubhe'
riscv: dts: starfive: dubhe: Add ZICBOP isa-extension
See merge request starfive-tech/linux!265
-rw-r--r-- | arch/riscv/Kconfig | 15 | ||||
-rw-r--r-- | arch/riscv/boot/dts/starfive/dubhe70.dtsi | 4 | ||||
-rw-r--r-- | arch/riscv/include/asm/atomic.h | 164 | ||||
-rw-r--r-- | arch/riscv/include/asm/cmpxchg.h | 408 | ||||
-rw-r--r-- | arch/riscv/include/asm/hwcap.h | 1 | ||||
-rw-r--r-- | arch/riscv/include/asm/insn-def.h | 60 | ||||
-rw-r--r-- | arch/riscv/include/asm/processor.h | 16 | ||||
-rw-r--r-- | arch/riscv/kernel/cpufeature.c | 1 |
8 files changed, 298 insertions, 371 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 9e6d442773ee..d25e14d91083 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -572,6 +572,21 @@ config RISCV_ISA_ZICBOZ If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOP + bool "Zicbop extension support for cache block prefetch" + depends on MMU + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZICBOP + extension (Cache Block Prefetch Operations) and enable its + usage. + + The Zicbop extension can be used to prefetch cache block for + read/write fetch. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZIHINTPAUSE bool default y diff --git a/arch/riscv/boot/dts/starfive/dubhe70.dtsi b/arch/riscv/boot/dts/starfive/dubhe70.dtsi index b9dad5cbb376..86795a768374 100644 --- a/arch/riscv/boot/dts/starfive/dubhe70.dtsi +++ b/arch/riscv/boot/dts/starfive/dubhe70.dtsi @@ -8,7 +8,7 @@ riscv,isa = "rv64imafdcbh"; riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "h", "zba", "zbb", - "zbc", "zbs", "zicbom", "zicboz", "zicntr", + "zbc", "zbs", "zicbom", "zicbop", "zicboz", "zicntr", "zicond", "zicsr", "zifencei", "zihintpause", "zihpm", "svinval", "svnapot", "svpbmt", "sscofpmf"; riscv,cbom-block-size = <64>; @@ -31,7 +31,7 @@ riscv,isa = "rv64imafdcbh"; riscv,isa-base = "rv64i"; riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "h", "zba", "zbb", - "zbc", "zbs", "zicbom", "zicboz", "zicntr", + "zbc", "zbs", "zicbom", "zicbop", "zicboz", "zicntr", "zicond", "zicsr", "zifencei", "zihintpause", "zihpm", "svinval", "svnapot", "svpbmt", "sscofpmf"; riscv,cbom-block-size = <64>; diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index f5dfef6c2153..80cca7ac16fd 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -196,22 +196,28 @@ ATOMIC_OPS(xor, xor, i) #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN +#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " beq %[p], %[u], 1f\n" \ + " add %[rc], %[p], %[a]\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : [a]"r" (_a), [u]"r" (_u) \ + : "memory"); \ +}) + /* This is required to provide a full barrier on success. */ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " beq %[p], %[u], 1f\n" - " add %[rc], %[p], %[a]\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : [a]"r" (a), [u]"r" (u) - : "memory"); + _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w"); + return prev; } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless @@ -222,77 +228,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " beq %[p], %[u], 1f\n" - " add %[rc], %[p], %[a]\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : [a]"r" (a), [u]"r" (u) - : "memory"); + _arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d"); + return prev; } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #endif +#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " bltz %[p], 1f\n" \ + " addi %[rc], %[p], 1\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : \ + : "memory"); \ +}) + static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " bltz %[p], 1f\n" - " addi %[rc], %[p], 1\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_inc_unless_negative(prev, rc, v->counter, "w"); + return !(prev < 0); } #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative +#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " bgtz %[p], 1f\n" \ + " addi %[rc], %[p], -1\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : \ + : "memory"); \ +}) + static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " bgtz %[p], 1f\n" - " addi %[rc], %[p], -1\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_unless_positive(prev, rc, v->counter, "w"); + return !(prev > 0); } #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive +#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \ +({ \ + __asm__ __volatile__ ( \ + "0: lr." sfx " %[p], %[c]\n" \ + " addi %[rc], %[p], -1\n" \ + " bltz %[rc], 1f\n" \ + " sc." sfx ".rl %[rc], %[rc], %[c]\n" \ + " bnez %[rc], 0b\n" \ + " fence rw, rw\n" \ + "1:\n" \ + : [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \ + : \ + : "memory"); \ +}) + static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) { int prev, rc; - __asm__ __volatile__ ( - "0: lr.w %[p], %[c]\n" - " addi %[rc], %[p], -1\n" - " bltz %[rc], 1f\n" - " sc.w.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_if_positive(prev, rc, v->counter, "w"); + return prev - 1; } @@ -304,17 +319,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " bltz %[p], 1f\n" - " addi %[rc], %[p], 1\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_inc_unless_negative(prev, rc, v->counter, "d"); + return !(prev < 0); } @@ -325,17 +331,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " bgtz %[p], 1f\n" - " addi %[rc], %[p], -1\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_unless_positive(prev, rc, v->counter, "d"); + return !(prev > 0); } @@ -346,17 +343,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) s64 prev; long rc; - __asm__ __volatile__ ( - "0: lr.d %[p], %[c]\n" - " addi %[rc], %[p], -1\n" - " bltz %[rc], 1f\n" - " sc.d.rl %[rc], %[rc], %[c]\n" - " bnez %[rc], 0b\n" - " fence rw, rw\n" - "1:\n" - : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) - : - : "memory"); + _arch_atomic_dec_if_positive(prev, rc, v->counter, "d"); + return prev - 1; } diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2f4726d3cfcc..d7b9d7951f08 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -10,141 +10,82 @@ #include <asm/barrier.h> #include <asm/fence.h> - -#define __xchg_relaxed(ptr, new, size) \ +#include <asm/processor.h> + +#define __arch_xchg_masked(prepend, append, r, p, n) \ +({ \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + PREFETCHW_ASM(%5) \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z4\n" \ + " or %1, %1, %z3\n" \ + " sc.w %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +}) + +#define __arch_xchg(sfx, prepend, append, r, p, n) \ +({ \ + __asm__ __volatile__ ( \ + prepend \ + " amoswap" sfx " %0, %2, %1\n" \ + append \ + : "=r" (r), "+A" (*(p)) \ + : "r" (n) \ + : "memory"); \ +}) + +#define _arch_xchg(ptr, new, sfx, prepend, append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ + __typeof__(*(__ptr)) __new = (new); \ + __typeof__(*(__ptr)) __ret; \ + \ + switch (sizeof(*__ptr)) { \ + case 1: \ + case 2: \ + __arch_xchg_masked(prepend, append, \ + __ret, __ptr, __new); \ + break; \ case 4: \ - __asm__ __volatile__ ( \ - " amoswap.w %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ + __arch_xchg(".w" sfx, prepend, append, \ + __ret, __ptr, __new); \ break; \ case 8: \ - __asm__ __volatile__ ( \ - " amoswap.d %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ + __arch_xchg(".d" sfx, prepend, append, \ + __ret, __ptr, __new); \ break; \ default: \ BUILD_BUG(); \ } \ - __ret; \ + (__typeof__(*(__ptr)))__ret; \ }) #define arch_xchg_relaxed(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ - _x_, sizeof(*(ptr))); \ -}) - -#define __xchg_acquire(ptr, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - " amoswap.w %0, %2, %1\n" \ - RISCV_ACQUIRE_BARRIER \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - " amoswap.d %0, %2, %1\n" \ - RISCV_ACQUIRE_BARRIER \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_xchg(ptr, x, "", "", "") #define arch_xchg_acquire(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_acquire((ptr), \ - _x_, sizeof(*(ptr))); \ -}) - -#define __xchg_release(ptr, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - " amoswap.w %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - " amoswap.d %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER) #define arch_xchg_release(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg_release((ptr), \ - _x_, sizeof(*(ptr))); \ -}) - -#define __arch_xchg(ptr, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(new) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - " amoswap.w.aqrl %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - " amoswap.d.aqrl %0, %2, %1\n" \ - : "=r" (__ret), "+A" (*__ptr) \ - : "r" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "") #define arch_xchg(ptr, x) \ -({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __arch_xchg((ptr), _x_, sizeof(*(ptr))); \ -}) + _arch_xchg(ptr, x, ".aqrl", "", "") #define xchg32(ptr, x) \ ({ \ @@ -163,190 +104,95 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ -#define __cmpxchg_relaxed(ptr, old, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) -#define arch_cmpxchg_relaxed(ptr, o, n) \ +#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ +({ \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __oldx = (ulong)(o) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z5\n" \ + " bne %1, %z3, 1f\n" \ + " and %1, %0, %z6\n" \ + " or %1, %1, %z4\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" ((long)__oldx), "rJ" (__newx), \ + "rJ" (__mask), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +}) + +#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ ({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) - -#define __cmpxchg_acquire(ptr, old, new, size) \ + register unsigned int __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr" lr_sfx " %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc" sc_sfx " %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ + : "rJ" (co o), "rJ" (n) \ + : "memory"); \ +}) + +#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ + __typeof__(*(__ptr)) __old = (old); \ + __typeof__(*(__ptr)) __new = (new); \ + __typeof__(*(__ptr)) __ret; \ + \ + switch (sizeof(*__ptr)) { \ + case 1: \ + case 2: \ + __arch_cmpxchg_masked(sc_sfx, prepend, append, \ + __ret, __ptr, __old, __new); \ + break; \ case 4: \ - __asm__ __volatile__ ( \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - RISCV_ACQUIRE_BARRIER \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ + __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ + __ret, __ptr, (long), __old, __new); \ break; \ case 8: \ - __asm__ __volatile__ ( \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - RISCV_ACQUIRE_BARRIER \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ + __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \ + __ret, __ptr, /**/, __old, __new); \ break; \ default: \ BUILD_BUG(); \ } \ - __ret; \ + (__typeof__(*(__ptr)))__ret; \ }) -#define arch_cmpxchg_acquire(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) +#define arch_cmpxchg_relaxed(ptr, o, n) \ + _arch_cmpxchg((ptr), (o), (n), "", "", "") -#define __cmpxchg_release(ptr, old, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - RISCV_RELEASE_BARRIER \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) +#define arch_cmpxchg_acquire(ptr, o, n) \ + _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) #define arch_cmpxchg_release(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg_release((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) - -#define __cmpxchg(ptr, old, new, size) \ -({ \ - __typeof__(ptr) __ptr = (ptr); \ - __typeof__(*(ptr)) __old = (old); \ - __typeof__(*(ptr)) __new = (new); \ - __typeof__(*(ptr)) __ret; \ - register unsigned int __rc; \ - switch (size) { \ - case 4: \ - __asm__ __volatile__ ( \ - "0: lr.w %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.w.rl %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - " fence rw, rw\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" ((long)__old), "rJ" (__new) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__ ( \ - "0: lr.d %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc.d.rl %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - " fence rw, rw\n" \ - "1:\n" \ - : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ - : "memory"); \ - break; \ - default: \ - BUILD_BUG(); \ - } \ - __ret; \ -}) + _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "") #define arch_cmpxchg(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg((ptr), \ - _o_, _n_, sizeof(*(ptr))); \ -}) + _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") #define arch_cmpxchg_local(ptr, o, n) \ - (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) + arch_cmpxchg_relaxed((ptr), (o), (n)) #define arch_cmpxchg64(ptr, o, n) \ ({ \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index fe6656af967a..b3da825a6fc7 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -60,6 +60,7 @@ #define RISCV_ISA_EXT_ZIHPM 42 #define RISCV_ISA_EXT_SMSTATEEN 43 #define RISCV_ISA_EXT_ZICOND 44 +#define RISCV_ISA_EXT_ZICBOP 45 #define RISCV_ISA_EXT_MAX 64 diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 6960beb75f32..3652de4ff19f 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -18,6 +18,13 @@ #define INSN_I_RD_SHIFT 7 #define INSN_I_OPCODE_SHIFT 0 +#define INSN_S_SIMM7_SHIFT 25 +#define INSN_S_RS2_SHIFT 20 +#define INSN_S_RS1_SHIFT 15 +#define INSN_S_FUNC3_SHIFT 12 +#define INSN_S_SIMM5_SHIFT 7 +#define INSN_S_OPCODE_SHIFT 0 + #ifdef __ASSEMBLY__ #ifdef CONFIG_AS_HAS_INSN @@ -30,6 +37,10 @@ .insn i \opcode, \func3, \rd, \rs1, \simm12 .endm + .macro insn_s, opcode, func3, rs2, simm12, rs1 + .insn s \opcode, \func3, \rs2, \simm12(\rs1) + .endm + #else #include <asm/gpr-num.h> @@ -51,10 +62,20 @@ (\simm12 << INSN_I_SIMM12_SHIFT)) .endm + .macro insn_s, opcode, func3, rs2, simm12, rs1 + .4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \ + (\func3 << INSN_S_FUNC3_SHIFT) | \ + (.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \ + (.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \ + ((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \ + (((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT)) + .endm + #endif #define __INSN_R(...) insn_r __VA_ARGS__ #define __INSN_I(...) insn_i __VA_ARGS__ +#define __INSN_S(...) insn_s __VA_ARGS__ #else /* ! __ASSEMBLY__ */ @@ -66,6 +87,9 @@ #define __INSN_I(opcode, func3, rd, rs1, simm12) \ ".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" +#define __INSN_S(opcode, func3, rs2, simm12, rs1) \ + ".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n" + #else #include <linux/stringify.h> @@ -92,12 +116,26 @@ " (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \ " .endm\n" +#define DEFINE_INSN_S \ + __DEFINE_ASM_GPR_NUMS \ +" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \ +" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \ +" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \ +" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \ +" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \ +" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \ +" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \ +" .endm\n" + #define UNDEFINE_INSN_R \ " .purgem insn_r\n" #define UNDEFINE_INSN_I \ " .purgem insn_i\n" +#define UNDEFINE_INSN_S \ +" .purgem insn_s\n" + #define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ DEFINE_INSN_R \ "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ @@ -108,6 +146,11 @@ "insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \ UNDEFINE_INSN_I +#define __INSN_S(opcode, func3, rs2, simm12, rs1) \ + DEFINE_INSN_S \ + "insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \ + UNDEFINE_INSN_S + #endif #endif /* ! __ASSEMBLY__ */ @@ -120,6 +163,10 @@ __INSN_I(RV_##opcode, RV_##func3, RV_##rd, \ RV_##rs1, RV_##simm12) +#define INSN_S(opcode, func3, rs2, simm12, rs1) \ + __INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \ + RV_##simm12, RV_##rs1) + #define RV_OPCODE(v) __ASM_STR(v) #define RV_FUNC3(v) __ASM_STR(v) #define RV_FUNC7(v) __ASM_STR(v) @@ -133,6 +180,7 @@ #define RV___RS2(v) __RV_REG(v) #define RV_OPCODE_MISC_MEM RV_OPCODE(15) +#define RV_OPCODE_OP_IMM RV_OPCODE(19) #define RV_OPCODE_SYSTEM RV_OPCODE(115) #define HFENCE_VVMA(vaddr, asid) \ @@ -196,4 +244,16 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) +#define CBO_PREFETCH_I(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \ + SIMM12(offset), RS1(base)) + +#define CBO_PREFETCH_R(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \ + SIMM12(offset), RS1(base)) + +#define CBO_PREFETCH_W(base, offset) \ + INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ + SIMM12(offset), RS1(base)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3e23e1786d05..69be17365f2f 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -12,6 +12,8 @@ #include <vdso/processor.h> #include <asm/ptrace.h> +#include <asm/insn-def.h> +#include <asm/alternative-macros.h> #ifdef CONFIG_64BIT #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) @@ -103,6 +105,20 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) +#ifdef CONFIG_RISCV_ISA_ZICBOP +#define ARCH_HAS_PREFETCHW +#define RISCV_ISA_EXT_ZICBOP 45 + +#define PREFETCHW_ASM(x) \ + ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0, \ + RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP) + + +static inline void prefetchw(const void *x) +{ + __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory"); +} +#endif /* CONFIG_RISCV_ISA_ZICBOP */ /* Do necessary setup to start up a newly executed thread. */ extern void start_thread(struct pt_regs *regs, diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index a7ef18dabcd8..0997e1da664f 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -166,6 +166,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), |