diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2019-04-26 22:48:22 +0300 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2019-04-27 04:53:40 +0300 |
commit | 34b8ab091f9ef57a2bb3c8c8359a0a03a8abf2f9 (patch) | |
tree | 00146898135b5e972cea6c930509f1ecdf8fc1ad /arch/arm64/net | |
parent | 8968c67a82ab7501bc3b9439c3624a49b42fe54c (diff) | |
download | linux-34b8ab091f9ef57a2bb3c8c8359a0a03a8abf2f9.tar.xz |
bpf, arm64: use more scalable stadd over ldxr / stxr loop in xadd
Since ARMv8.1 supplement introduced LSE atomic instructions back in 2016,
lets add support for STADD and use that in favor of LDXR / STXR loop for
the XADD mapping if available. STADD is encoded as an alias for LDADD with
XZR as the destination register, therefore add LDADD to the instruction
encoder along with STADD as special case and use it in the JIT for CPUs
that advertise LSE atomics in CPUID register. If immediate offset in the
BPF XADD insn is 0, then use dst register directly instead of temporary
one.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'arch/arm64/net')
-rw-r--r-- | arch/arm64/net/bpf_jit.h | 4 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 28 |
2 files changed, 23 insertions, 9 deletions
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 6c881659ee8a..76606e87233f 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,6 +100,10 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) +/* LSE atomics */ +#define A64_STADD(sf, Rn, Rs) \ + aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) + /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ aarch64_insn_gen_add_sub_imm(Rd, Rn, imm12, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a1420626fca2..df845cee438e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const bool is64 = BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_JMP; const bool isdw = BPF_SIZE(code) == BPF_DW; - u8 jmp_cond; + u8 jmp_cond, reg; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -756,18 +756,28 @@ emit_cond_jmp: break; } break; + /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: /* STX XADD: lock *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_ADD(1, tmp, tmp, dst), ctx); - emit(A64_LDXR(isdw, tmp2, tmp), ctx); - emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); - emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); - jmp_offset = -3; - check_imm19(jmp_offset); - emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + if (!off) { + reg = dst; + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); + reg = tmp; + } + if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) { + emit(A64_STADD(isdw, reg, src), ctx); + } else { + emit(A64_LDXR(isdw, tmp2, reg), ctx); + emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); + emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); + jmp_offset = -3; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + } break; default: |