diff options
Diffstat (limited to 'arch')
200 files changed, 7221 insertions, 1331 deletions
diff --git a/arch/arc/Kbuild b/arch/arc/Kbuild index b94102fff68b..20ea7dd482d4 100644 --- a/arch/arc/Kbuild +++ b/arch/arc/Kbuild @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += kernel/ obj-y += mm/ +obj-y += net/ # for cleaning subdir- += boot diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 4092bec198be..fd0b0a0d4686 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -51,6 +51,7 @@ config ARC select PCI_SYSCALL if PCI select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select TRACE_IRQFLAGS_SUPPORT + select HAVE_EBPF_JIT if ISA_ARCV2 config LOCKDEP_SUPPORT def_bool y diff --git a/arch/arc/net/Makefile b/arch/arc/net/Makefile new file mode 100644 index 000000000000..ea5790952e9a --- /dev/null +++ b/arch/arc/net/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only + +ifeq ($(CONFIG_ISA_ARCV2),y) + obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o + obj-$(CONFIG_BPF_JIT) += bpf_jit_arcv2.o +endif diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h new file mode 100644 index 000000000000..ec44873c42d1 --- /dev/null +++ b/arch/arc/net/bpf_jit.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * The interface that a back-end should provide to bpf_jit_core.c. + * + * Copyright (c) 2024 Synopsys Inc. + * Author: Shahab Vahedi <shahab@synopsys.com> + */ + +#ifndef _ARC_BPF_JIT_H +#define _ARC_BPF_JIT_H + +#include <linux/bpf.h> +#include <linux/filter.h> + +/* Print debug info and assert. */ +//#define ARC_BPF_JIT_DEBUG + +/* Determine the address type of the target. */ +#ifdef CONFIG_ISA_ARCV2 +#define ARC_ADDR u32 +#endif + +/* + * For the translation of some BPF instructions, a temporary register + * might be needed for some interim data. + */ +#define JIT_REG_TMP MAX_BPF_JIT_REG + +/* + * Buffer access: If buffer "b" is not NULL, advance by "n" bytes. + * + * This macro must be used in any place that potentially requires a + * "buf + len". This way, we make sure that the "buf" argument for + * the underlying "arc_*(buf, ...)" ends up as NULL instead of something + * like "0+4" or "0+8", etc. Those "arc_*()" functions check their "buf" + * value to decide if instructions should be emitted or not. + */ +#define BUF(b, n) (((b) != NULL) ? ((b) + (n)) : (b)) + +/************** Functions that the back-end must provide **************/ +/* Extension for 32-bit operations. */ +inline u8 zext(u8 *buf, u8 rd); +/***** Moves *****/ +u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext); +u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm); +u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext); +u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm); +u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi); +/***** Loads and stores *****/ +u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext); +u8 store_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size); +u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size); +/***** Addition *****/ +u8 add_r32(u8 *buf, u8 rd, u8 rs); +u8 add_r32_i32(u8 *buf, u8 rd, s32 imm); +u8 add_r64(u8 *buf, u8 rd, u8 rs); +u8 add_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Subtraction *****/ +u8 sub_r32(u8 *buf, u8 rd, u8 rs); +u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm); +u8 sub_r64(u8 *buf, u8 rd, u8 rs); +u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Multiplication *****/ +u8 mul_r32(u8 *buf, u8 rd, u8 rs); +u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm); +u8 mul_r64(u8 *buf, u8 rd, u8 rs); +u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Division *****/ +u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext); +u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext); +/***** Remainder *****/ +u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext); +u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext); +/***** Bitwise AND *****/ +u8 and_r32(u8 *buf, u8 rd, u8 rs); +u8 and_r32_i32(u8 *buf, u8 rd, s32 imm); +u8 and_r64(u8 *buf, u8 rd, u8 rs); +u8 and_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Bitwise OR *****/ +u8 or_r32(u8 *buf, u8 rd, u8 rs); +u8 or_r32_i32(u8 *buf, u8 rd, s32 imm); +u8 or_r64(u8 *buf, u8 rd, u8 rs); +u8 or_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Bitwise XOR *****/ +u8 xor_r32(u8 *buf, u8 rd, u8 rs); +u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm); +u8 xor_r64(u8 *buf, u8 rd, u8 rs); +u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Bitwise Negate *****/ +u8 neg_r32(u8 *buf, u8 r); +u8 neg_r64(u8 *buf, u8 r); +/***** Bitwise left shift *****/ +u8 lsh_r32(u8 *buf, u8 rd, u8 rs); +u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm); +u8 lsh_r64(u8 *buf, u8 rd, u8 rs); +u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Bitwise right shift (logical) *****/ +u8 rsh_r32(u8 *buf, u8 rd, u8 rs); +u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm); +u8 rsh_r64(u8 *buf, u8 rd, u8 rs); +u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Bitwise right shift (arithmetic) *****/ +u8 arsh_r32(u8 *buf, u8 rd, u8 rs); +u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm); +u8 arsh_r64(u8 *buf, u8 rd, u8 rs); +u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm); +/***** Frame related *****/ +u32 mask_for_used_regs(u8 bpf_reg, bool is_call); +u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size); +u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size); +/***** Jumps *****/ +/* + * Different sorts of conditions (ARC enum as opposed to BPF_*). + * + * Do not change the order of enums here. ARC_CC_SLE+1 is used + * to determine the number of JCCs. + */ +enum ARC_CC { + ARC_CC_UGT = 0, /* unsigned > */ + ARC_CC_UGE, /* unsigned >= */ + ARC_CC_ULT, /* unsigned < */ + ARC_CC_ULE, /* unsigned <= */ + ARC_CC_SGT, /* signed > */ + ARC_CC_SGE, /* signed >= */ + ARC_CC_SLT, /* signed < */ + ARC_CC_SLE, /* signed <= */ + ARC_CC_AL, /* always */ + ARC_CC_EQ, /* == */ + ARC_CC_NE, /* != */ + ARC_CC_SET, /* test */ + ARC_CC_LAST +}; + +/* + * A few notes: + * + * - check_jmp_*() are prerequisites before calling the gen_jmp_*(). + * They return "true" if the jump is possible and "false" otherwise. + * + * - The notion of "*_off" is to emphasize that these parameters are + * merely offsets in the JIT stream and not absolute addresses. One + * can look at them as addresses if the JIT code would start from + * address 0x0000_0000. Nonetheless, since the buffer address for the + * JIT is on a word-aligned address, this works and actually makes + * things simpler (offsets are in the range of u32 which is more than + * enough). + */ +bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond); +bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond); +u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off); +u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 c_off, u32 t_off); +/***** Miscellaneous *****/ +u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func); +u8 arc_to_bpf_return(u8 *buf); +/* + * - Perform byte swaps on "rd" based on the "size". + * - If "force" is set, do it unconditionally. Otherwise, consider the + * desired "endian"ness and the host endianness. + * - For data "size"s up to 32 bits, perform a zero-extension if asked + * by the "do_zext" boolean. + */ +u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext); + +#endif /* _ARC_BPF_JIT_H */ diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c new file mode 100644 index 000000000000..31bfb6e9ce00 --- /dev/null +++ b/arch/arc/net/bpf_jit_arcv2.c @@ -0,0 +1,3005 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The ARCv2 backend of Just-In-Time compiler for eBPF bytecode. + * + * Copyright (c) 2024 Synopsys Inc. + * Author: Shahab Vahedi <shahab@synopsys.com> + */ +#include <linux/bug.h> +#include "bpf_jit.h" + +/* ARC core registers. */ +enum { + ARC_R_0, ARC_R_1, ARC_R_2, ARC_R_3, ARC_R_4, ARC_R_5, + ARC_R_6, ARC_R_7, ARC_R_8, ARC_R_9, ARC_R_10, ARC_R_11, + ARC_R_12, ARC_R_13, ARC_R_14, ARC_R_15, ARC_R_16, ARC_R_17, + ARC_R_18, ARC_R_19, ARC_R_20, ARC_R_21, ARC_R_22, ARC_R_23, + ARC_R_24, ARC_R_25, ARC_R_26, ARC_R_FP, ARC_R_SP, ARC_R_ILINK, + ARC_R_30, ARC_R_BLINK, + /* + * Having ARC_R_IMM encoded as source register means there is an + * immediate that must be interpreted from the next 4 bytes. If + * encoded as the destination register though, it implies that the + * output of the operation is not assigned to any register. The + * latter is helpful if we only care about updating the CPU status + * flags. + */ + ARC_R_IMM = 62 +}; + +/* + * Remarks about the rationale behind the chosen mapping: + * + * - BPF_REG_{1,2,3,4} are the argument registers and must be mapped to + * argument registers in ARCv2 ABI: r0-r7. The r7 registers is the last + * argument register in the ABI. Therefore BPF_REG_5, as the fifth + * argument, must be pushed onto the stack. This is a must for calling + * in-kernel functions. + * + * - In ARCv2 ABI, the return value is in r0 for 32-bit results and (r1,r0) + * for 64-bit results. However, because they're already used for BPF_REG_1, + * the next available scratch registers, r8 and r9, are the best candidates + * for BPF_REG_0. After a "call" to a(n) (in-kernel) function, the result + * is "mov"ed to these registers. At a BPF_EXIT, their value is "mov"ed to + * (r1,r0). + * It is worth mentioning that scratch registers are the best choice for + * BPF_REG_0, because it is very popular in BPF instruction encoding. + * + * - JIT_REG_TMP is an artifact needed to translate some BPF instructions. + * Its life span is one single BPF instruction. Since during the + * analyze_reg_usage(), it is not known if temporary registers are used, + * it is mapped to ARC's scratch registers: r10 and r11. Therefore, they + * don't matter in analysing phase and don't need saving. This temporary + * register is added as yet another index in the bpf2arc array, so it will + * unfold like the rest of registers during the code generation process. + * + * - Mapping of callee-saved BPF registers, BPF_REG_{6,7,8,9}, starts from + * (r15,r14) register pair. The (r13,r12) is not a good choice, because + * in ARCv2 ABI, r12 is not a callee-saved register and this can cause + * problem when calling an in-kernel function. Theoretically, the mapping + * could start from (r14,r13), but it is not a conventional ARCv2 register + * pair. To have a future proof design, I opted for this arrangement. + * If/when we decide to add ARCv2 instructions that do use register pairs, + * the mapping, hopefully, doesn't need to be revisited. + */ +const u8 bpf2arc[][2] = { + /* Return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = {ARC_R_8, ARC_R_9}, + /* Arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = {ARC_R_0, ARC_R_1}, + [BPF_REG_2] = {ARC_R_2, ARC_R_3}, + [BPF_REG_3] = {ARC_R_4, ARC_R_5}, + [BPF_REG_4] = {ARC_R_6, ARC_R_7}, + /* Remaining arguments, to be passed on the stack per 32-bit ABI */ + [BPF_REG_5] = {ARC_R_22, ARC_R_23}, + /* Callee-saved registers that in-kernel function will preserve */ + [BPF_REG_6] = {ARC_R_14, ARC_R_15}, + [BPF_REG_7] = {ARC_R_16, ARC_R_17}, + [BPF_REG_8] = {ARC_R_18, ARC_R_19}, + [BPF_REG_9] = {ARC_R_20, ARC_R_21}, + /* Read-only frame pointer to access the eBPF stack. 32-bit only. */ + [BPF_REG_FP] = {ARC_R_FP, }, + /* Register for blinding constants */ + [BPF_REG_AX] = {ARC_R_24, ARC_R_25}, + /* Temporary registers for internal use */ + [JIT_REG_TMP] = {ARC_R_10, ARC_R_11} +}; + +#define ARC_CALLEE_SAVED_REG_FIRST ARC_R_13 +#define ARC_CALLEE_SAVED_REG_LAST ARC_R_25 + +#define REG_LO(r) (bpf2arc[(r)][0]) +#define REG_HI(r) (bpf2arc[(r)][1]) + +/* + * To comply with ARCv2 ABI, BPF's arg5 must be put on stack. After which, + * the stack needs to be restored by ARG5_SIZE. + */ +#define ARG5_SIZE 8 + +/* Instruction lengths in bytes. */ +enum { + INSN_len_normal = 4, /* Normal instructions length. */ + INSN_len_imm = 4 /* Length of an extra 32-bit immediate. */ +}; + +/* ZZ defines the size of operation in encodings that it is used. */ +enum { + ZZ_1_byte = 1, + ZZ_2_byte = 2, + ZZ_4_byte = 0, + ZZ_8_byte = 3 +}; + +/* + * AA is mostly about address write back mode. It determines if the + * address in question should be updated before usage or after: + * addr += offset; data = *addr; + * data = *addr; addr += offset; + * + * In "scaling" mode, the effective address will become the sum + * of "address" + "index"*"size". The "size" is specified by the + * "ZZ" field. There is no write back when AA is set for scaling: + * data = *(addr + offset<<zz) + */ +enum { + AA_none = 0, + AA_pre = 1, /* in assembly known as "a/aw". */ + AA_post = 2, /* in assembly known as "ab". */ + AA_scale = 3 /* in assembly known as "as". */ +}; + +/* X flag determines the mode of extension. */ +enum { + X_zero = 0, + X_sign = 1 +}; + +/* Condition codes. */ +enum { + CC_always = 0, /* condition is true all the time */ + CC_equal = 1, /* if status32.z flag is set */ + CC_unequal = 2, /* if status32.z flag is clear */ + CC_positive = 3, /* if status32.n flag is clear */ + CC_negative = 4, /* if status32.n flag is set */ + CC_less_u = 5, /* less than (unsigned) */ + CC_less_eq_u = 14, /* less than or equal (unsigned) */ + CC_great_eq_u = 6, /* greater than or equal (unsigned) */ + CC_great_u = 13, /* greater than (unsigned) */ + CC_less_s = 11, /* less than (signed) */ + CC_less_eq_s = 12, /* less than or equal (signed) */ + CC_great_eq_s = 10, /* greater than or equal (signed) */ + CC_great_s = 9 /* greater than (signed) */ +}; + +#define IN_U6_RANGE(x) ((x) <= (0x40 - 1) && (x) >= 0) +#define IN_S9_RANGE(x) ((x) <= (0x100 - 1) && (x) >= -0x100) +#define IN_S12_RANGE(x) ((x) <= (0x800 - 1) && (x) >= -0x800) +#define IN_S21_RANGE(x) ((x) <= (0x100000 - 1) && (x) >= -0x100000) +#define IN_S25_RANGE(x) ((x) <= (0x1000000 - 1) && (x) >= -0x1000000) + +/* Operands in most of the encodings. */ +#define OP_A(x) ((x) & 0x03f) +#define OP_B(x) ((((x) & 0x07) << 24) | (((x) & 0x38) << 9)) +#define OP_C(x) (((x) & 0x03f) << 6) +#define OP_IMM (OP_C(ARC_R_IMM)) +#define COND(x) (OP_A((x) & 31)) +#define FLAG(x) (((x) & 1) << 15) + +/* + * The 4-byte encoding of "mov b,c": + * + * 0010_0bbb 0000_1010 0BBB_cccc cc00_0000 + * + * b: BBBbbb destination register + * c: cccccc source register + */ +#define OPC_MOV 0x200a0000 + +/* + * The 4-byte encoding of "mov b,s12" (used for moving small immediates): + * + * 0010_0bbb 1000_1010 0BBB_ssss ssSS_SSSS + * + * b: BBBbbb destination register + * s: SSSSSSssssss source immediate (signed) + */ +#define OPC_MOVI 0x208a0000 +#define MOVI_S12(x) ((((x) & 0xfc0) >> 6) | (((x) & 0x3f) << 6)) + +/* + * The 4-byte encoding of "mov[.qq] b,u6", used for conditional + * moving of even smaller immediates: + * + * 0010_0bbb 1100_1010 0BBB_cccc cciq_qqqq + * + * qq: qqqqq condition code + * i: If set, c is considered a 6-bit immediate, else a reg. + * + * b: BBBbbb destination register + * c: cccccc source + */ +#define OPC_MOV_CC 0x20ca0000 +#define MOV_CC_I BIT(5) +#define OPC_MOVU_CC (OPC_MOV_CC | MOV_CC_I) + +/* + * The 4-byte encoding of "sexb b,c" (8-bit sign extension): + * + * 0010_0bbb 0010_1111 0BBB_cccc cc00_0101 + * + * b: BBBbbb destination register + * c: cccccc source register + */ +#define OPC_SEXB 0x202f0005 + +/* + * The 4-byte encoding of "sexh b,c" (16-bit sign extension): + * + * 0010_0bbb 0010_1111 0BBB_cccc cc00_0110 + * + * b: BBBbbb destination register + * c: cccccc source register + */ +#define OPC_SEXH 0x202f0006 + +/* + * The 4-byte encoding of "ld[zz][.x][.aa] c,[b,s9]": + * + * 0001_0bbb ssss_ssss SBBB_0aaz zxcc_cccc + * + * zz: size mode + * aa: address write back mode + * x: extension mode + * + * s9: S_ssss_ssss 9-bit signed number + * b: BBBbbb source reg for address + * c: cccccc destination register + */ +#define OPC_LOAD 0x10000000 +#define LOAD_X(x) ((x) << 6) +#define LOAD_ZZ(x) ((x) << 7) +#define LOAD_AA(x) ((x) << 9) +#define LOAD_S9(x) ((((x) & 0x0ff) << 16) | (((x) & 0x100) << 7)) +#define LOAD_C(x) ((x) & 0x03f) +/* Unsigned and signed loads. */ +#define OPC_LDU (OPC_LOAD | LOAD_X(X_zero)) +#define OPC_LDS (OPC_LOAD | LOAD_X(X_sign)) +/* 32-bit load. */ +#define OPC_LD32 (OPC_LDU | LOAD_ZZ(ZZ_4_byte)) +/* "pop reg" is merely a "ld.ab reg,[sp,4]". */ +#define OPC_POP \ + (OPC_LD32 | LOAD_AA(AA_post) | LOAD_S9(4) | OP_B(ARC_R_SP)) + +/* + * The 4-byte encoding of "st[zz][.aa] c,[b,s9]": + * + * 0001_1bbb ssss_ssss SBBB_cccc cc0a_azz0 + * + * zz: zz size mode + * aa: aa address write back mode + * + * s9: S_ssss_ssss 9-bit signed number + * b: BBBbbb source reg for address + * c: cccccc source reg to be stored + */ +#define OPC_STORE 0x18000000 +#define STORE_ZZ(x) ((x) << 1) +#define STORE_AA(x) ((x) << 3) +#define STORE_S9(x) ((((x) & 0x0ff) << 16) | (((x) & 0x100) << 7)) +/* 32-bit store. */ +#define OPC_ST32 (OPC_STORE | STORE_ZZ(ZZ_4_byte)) +/* "push reg" is merely a "st.aw reg,[sp,-4]". */ +#define OPC_PUSH \ + (OPC_ST32 | STORE_AA(AA_pre) | STORE_S9(-4) | OP_B(ARC_R_SP)) + +/* + * The 4-byte encoding of "add a,b,c": + * + * 0010_0bbb 0i00_0000 fBBB_cccc ccaa_aaaa + * + * f: indicates if flags (carry, etc.) should be updated + * i: If set, c is considered a 6-bit immediate, else a reg. + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_ADD 0x20000000 +/* Addition with updating the pertinent flags in "status32" register. */ +#define OPC_ADDF (OPC_ADD | FLAG(1)) +#define ADDI BIT(22) +#define ADDI_U6(x) OP_C(x) +#define OPC_ADDI (OPC_ADD | ADDI) +#define OPC_ADDIF (OPC_ADDI | FLAG(1)) +#define OPC_ADD_I (OPC_ADD | OP_IMM) + +/* + * The 4-byte encoding of "adc a,b,c" (addition with carry): + * + * 0010_0bbb 0i00_0001 0BBB_cccc ccaa_aaaa + * + * i: if set, c is considered a 6-bit immediate, else a reg. + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_ADC 0x20010000 +#define ADCI BIT(22) +#define ADCI_U6(x) OP_C(x) +#define OPC_ADCI (OPC_ADC | ADCI) + +/* + * The 4-byte encoding of "sub a,b,c": + * + * 0010_0bbb 0i00_0010 fBBB_cccc ccaa_aaaa + * + * f: indicates if flags (carry, etc.) should be updated + * i: if set, c is considered a 6-bit immediate, else a reg. + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_SUB 0x20020000 +/* Subtraction with updating the pertinent flags in "status32" register. */ +#define OPC_SUBF (OPC_SUB | FLAG(1)) +#define SUBI BIT(22) +#define SUBI_U6(x) OP_C(x) +#define OPC_SUBI (OPC_SUB | SUBI) +#define OPC_SUB_I (OPC_SUB | OP_IMM) + +/* + * The 4-byte encoding of "sbc a,b,c" (subtraction with carry): + * + * 0010_0bbb 0000_0011 fBBB_cccc ccaa_aaaa + * + * f: indicates if flags (carry, etc.) should be updated + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_SBC 0x20030000 + +/* + * The 4-byte encoding of "cmp[.qq] b,c": + * + * 0010_0bbb 1100_1100 1BBB_cccc cc0q_qqqq + * + * qq: qqqqq condition code + * + * b: BBBbbb the 1st operand + * c: cccccc the 2nd operand + */ +#define OPC_CMP 0x20cc8000 + +/* + * The 4-byte encoding of "neg a,b": + * + * 0010_0bbb 0100_1110 0BBB_0000 00aa_aaaa + * + * a: aaaaaa result + * b: BBBbbb input + */ +#define OPC_NEG 0x204e0000 + +/* + * The 4-byte encoding of "mpy a,b,c". + * mpy is the signed 32-bit multiplication with the lower 32-bit + * of the product as the result. + * + * 0010_0bbb 0001_1010 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_MPY 0x201a0000 +#define OPC_MPYI (OPC_MPY | OP_IMM) + +/* + * The 4-byte encoding of "mpydu a,b,c". + * mpydu is the unsigned 32-bit multiplication with the lower 32-bit of + * the product in register "a" and the higher 32-bit in register "a+1". + * + * 0010_1bbb 0001_1001 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa 64-bit result in registers (R_a+1,R_a) + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_MPYDU 0x28190000 +#define OPC_MPYDUI (OPC_MPYDU | OP_IMM) + +/* + * The 4-byte encoding of "divu a,b,c" (unsigned division): + * + * 0010_1bbb 0000_0101 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result (quotient) + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand (divisor) + */ +#define OPC_DIVU 0x28050000 +#define OPC_DIVUI (OPC_DIVU | OP_IMM) + +/* + * The 4-byte encoding of "div a,b,c" (signed division): + * + * 0010_1bbb 0000_0100 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result (quotient) + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand (divisor) + */ +#define OPC_DIVS 0x28040000 +#define OPC_DIVSI (OPC_DIVS | OP_IMM) + +/* + * The 4-byte encoding of "remu a,b,c" (unsigned remainder): + * + * 0010_1bbb 0000_1001 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result (remainder) + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand (divisor) + */ +#define OPC_REMU 0x28090000 +#define OPC_REMUI (OPC_REMU | OP_IMM) + +/* + * The 4-byte encoding of "rem a,b,c" (signed remainder): + * + * 0010_1bbb 0000_1000 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result (remainder) + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand (divisor) + */ +#define OPC_REMS 0x28080000 +#define OPC_REMSI (OPC_REMS | OP_IMM) + +/* + * The 4-byte encoding of "and a,b,c": + * + * 0010_0bbb 0000_0100 fBBB_cccc ccaa_aaaa + * + * f: indicates if zero and negative flags should be updated + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_AND 0x20040000 +#define OPC_ANDI (OPC_AND | OP_IMM) + +/* + * The 4-byte encoding of "tst[.qq] b,c". + * Checks if the two input operands have any bit set at the same + * position. + * + * 0010_0bbb 1100_1011 1BBB_cccc cc0q_qqqq + * + * qq: qqqqq condition code + * + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_TST 0x20cb8000 + +/* + * The 4-byte encoding of "or a,b,c": + * + * 0010_0bbb 0000_0101 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_OR 0x20050000 +#define OPC_ORI (OPC_OR | OP_IMM) + +/* + * The 4-byte encoding of "xor a,b,c": + * + * 0010_0bbb 0000_0111 0BBB_cccc ccaa_aaaa + * + * a: aaaaaa result + * b: BBBbbb the 1st input operand + * c: cccccc the 2nd input operand + */ +#define OPC_XOR 0x20070000 +#define OPC_XORI (OPC_XOR | OP_IMM) + +/* + * The 4-byte encoding of "not b,c": + * + * 0010_0bbb 0010_1111 0BBB_cccc cc00_1010 + * + * b: BBBbbb result + * c: cccccc input + */ +#define OPC_NOT 0x202f000a + +/* + * The 4-byte encoding of "btst b,u6": + * + * 0010_0bbb 0101_0001 1BBB_uuuu uu00_0000 + * + * b: BBBbbb input number to check + * u6: uuuuuu 6-bit unsigned number specifying bit position to check + */ +#define OPC_BTSTU6 0x20518000 +#define BTST_U6(x) (OP_C((x) & 63)) + +/* + * The 4-byte encoding of "asl[.qq] b,b,c" (arithmetic shift left): + * + * 0010_1bbb 0i00_0000 0BBB_cccc ccaa_aaaa + * + * i: if set, c is considered a 5-bit immediate, else a reg. + * + * b: BBBbbb result and the first operand (number to be shifted) + * c: cccccc amount to be shifted + */ +#define OPC_ASL 0x28000000 +#define ASL_I BIT(22) +#define ASLI_U6(x) OP_C((x) & 31) +#define OPC_ASLI (OPC_ASL | ASL_I) + +/* + * The 4-byte encoding of "asr a,b,c" (arithmetic shift right): + * + * 0010_1bbb 0i00_0010 0BBB_cccc ccaa_aaaa + * + * i: if set, c is considered a 6-bit immediate, else a reg. + * + * a: aaaaaa result + * b: BBBbbb first input: number to be shifted + * c: cccccc second input: amount to be shifted + */ +#define OPC_ASR 0x28020000 +#define ASR_I ASL_I +#define ASRI_U6(x) ASLI_U6(x) +#define OPC_ASRI (OPC_ASR | ASR_I) + +/* + * The 4-byte encoding of "lsr a,b,c" (logical shift right): + * + * 0010_1bbb 0i00_0001 0BBB_cccc ccaa_aaaa + * + * i: if set, c is considered a 6-bit immediate, else a reg. + * + * a: aaaaaa result + * b: BBBbbb first input: number to be shifted + * c: cccccc second input: amount to be shifted + */ +#define OPC_LSR 0x28010000 +#define LSR_I ASL_I +#define LSRI_U6(x) ASLI_U6(x) +#define OPC_LSRI (OPC_LSR | LSR_I) + +/* + * The 4-byte encoding of "swape b,c": + * + * 0010_1bbb 0010_1111 0bbb_cccc cc00_1001 + * + * b: BBBbbb destination register + * c: cccccc source register + */ +#define OPC_SWAPE 0x282f0009 + +/* + * Encoding for jump to an address in register: + * j reg_c + * + * 0010_0000 1110_0000 0000_cccc cc00_0000 + * + * c: cccccc register holding the destination address + */ +#define OPC_JMP 0x20e00000 +/* Jump to "branch-and-link" register, which effectively is a "return". */ +#define OPC_J_BLINK (OPC_JMP | OP_C(ARC_R_BLINK)) + +/* + * Encoding for jump-and-link to an address in register: + * jl reg_c + * + * 0010_0000 0010_0010 0000_cccc cc00_0000 + * + * c: cccccc register holding the destination address + */ +#define OPC_JL 0x20220000 + +/* + * Encoding for (conditional) branch to an offset from the current location + * that is word aligned: (PC & 0xffff_fffc) + s21 + * B[qq] s21 + * + * 0000_0sss ssss_sss0 SSSS_SSSS SS0q_qqqq + * + * qq: qqqqq condition code + * s21: SSSS SSSS_SSss ssss_ssss The displacement (21-bit signed) + * + * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore, + * it should be a multiple of 2. Hence, there is an implied '0' bit at its + * LSB: S_SSSS SSSS_Ssss ssss_sss0 + */ +#define OPC_BCC 0x00000000 +#define BCC_S21(d) ((((d) & 0x7fe) << 16) | (((d) & 0x1ff800) >> 5)) + +/* + * Encoding for unconditional branch to an offset from the current location + * that is word aligned: (PC & 0xffff_fffc) + s25 + * B s25 + * + * 0000_0sss ssss_sss1 SSSS_SSSS SS00_TTTT + * + * s25: TTTT SSSS SSSS_SSss ssss_ssss The displacement (25-bit signed) + * + * The displacement is supposed to be 16-bit (2-byte) aligned. Therefore, + * it should be a multiple of 2. Hence, there is an implied '0' bit at its + * LSB: T TTTS_SSSS SSSS_Ssss ssss_sss0 + */ +#define OPC_B 0x00010000 +#define B_S25(d) ((((d) & 0x1e00000) >> 21) | BCC_S21(d)) + +static inline void emit_2_bytes(u8 *buf, u16 bytes) +{ + *((u16 *)buf) = bytes; +} + +static inline void emit_4_bytes(u8 *buf, u32 bytes) +{ + emit_2_bytes(buf, bytes >> 16); + emit_2_bytes(buf + 2, bytes & 0xffff); +} + +static inline u8 bpf_to_arc_size(u8 size) +{ + switch (size) { + case BPF_B: + return ZZ_1_byte; + case BPF_H: + return ZZ_2_byte; + case BPF_W: + return ZZ_4_byte; + case BPF_DW: + return ZZ_8_byte; + default: + return ZZ_4_byte; + } +} + +/************** Encoders (Deal with ARC regs) ************/ + +/* Move an immediate to register with a 4-byte instruction. */ +static u8 arc_movi_r(u8 *buf, u8 reg, s16 imm) +{ + const u32 insn = OPC_MOVI | OP_B(reg) | MOVI_S12(imm); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* rd <- rs */ +static u8 arc_mov_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_MOV | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* The emitted code may have different sizes based on "imm". */ +static u8 arc_mov_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM; + + if (IN_S12_RANGE(imm)) + return arc_movi_r(buf, rd, imm); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* The emitted code will always have the same size (8). */ +static u8 arc_mov_i_fixed(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_MOV | OP_B(rd) | OP_IMM; + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* Conditional move. */ +static u8 arc_mov_cc_r(u8 *buf, u8 cc, u8 rd, u8 rs) +{ + const u32 insn = OPC_MOV_CC | OP_B(rd) | OP_C(rs) | COND(cc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* Conditional move of a small immediate to rd. */ +static u8 arc_movu_cc_r(u8 *buf, u8 cc, u8 rd, u8 imm) +{ + const u32 insn = OPC_MOVU_CC | OP_B(rd) | OP_C(imm) | COND(cc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* Sign extension from a byte. */ +static u8 arc_sexb_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_SEXB | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* Sign extension from two bytes. */ +static u8 arc_sexh_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_SEXH | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* st reg, [reg_mem, off] */ +static u8 arc_st_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz) +{ + const u32 insn = OPC_STORE | STORE_ZZ(zz) | OP_C(reg) | + OP_B(reg_mem) | STORE_S9(off); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* st.aw reg, [sp, -4] */ +static u8 arc_push_r(u8 *buf, u8 reg) +{ + const u32 insn = OPC_PUSH | OP_C(reg); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* ld reg, [reg_mem, off] (unsigned) */ +static u8 arc_ld_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz) +{ + const u32 insn = OPC_LDU | LOAD_ZZ(zz) | LOAD_C(reg) | + OP_B(reg_mem) | LOAD_S9(off); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* ld.x reg, [reg_mem, off] (sign extend) */ +static u8 arc_ldx_r(u8 *buf, u8 reg, u8 reg_mem, s16 off, u8 zz) +{ + const u32 insn = OPC_LDS | LOAD_ZZ(zz) | LOAD_C(reg) | + OP_B(reg_mem) | LOAD_S9(off); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* ld.ab reg,[sp,4] */ +static u8 arc_pop_r(u8 *buf, u8 reg) +{ + const u32 insn = OPC_POP | LOAD_C(reg); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* add Ra,Ra,Rc */ +static u8 arc_add_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_ADD | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* add.f Ra,Ra,Rc */ +static u8 arc_addf_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_ADDF | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* add.f Ra,Ra,u6 */ +static u8 arc_addif_r(u8 *buf, u8 ra, u8 u6) +{ + const u32 insn = OPC_ADDIF | OP_A(ra) | OP_B(ra) | ADDI_U6(u6); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* add Ra,Ra,u6 */ +static u8 arc_addi_r(u8 *buf, u8 ra, u8 u6) +{ + const u32 insn = OPC_ADDI | OP_A(ra) | OP_B(ra) | ADDI_U6(u6); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* add Ra,Rb,imm */ +static u8 arc_add_i(u8 *buf, u8 ra, u8 rb, s32 imm) +{ + const u32 insn = OPC_ADD_I | OP_A(ra) | OP_B(rb); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* adc Ra,Ra,Rc */ +static u8 arc_adc_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_ADC | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* adc Ra,Ra,u6 */ +static u8 arc_adci_r(u8 *buf, u8 ra, u8 u6) +{ + const u32 insn = OPC_ADCI | OP_A(ra) | OP_B(ra) | ADCI_U6(u6); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* sub Ra,Ra,Rc */ +static u8 arc_sub_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_SUB | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* sub.f Ra,Ra,Rc */ +static u8 arc_subf_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_SUBF | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* sub Ra,Ra,u6 */ +static u8 arc_subi_r(u8 *buf, u8 ra, u8 u6) +{ + const u32 insn = OPC_SUBI | OP_A(ra) | OP_B(ra) | SUBI_U6(u6); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* sub Ra,Ra,imm */ +static u8 arc_sub_i(u8 *buf, u8 ra, s32 imm) +{ + const u32 insn = OPC_SUB_I | OP_A(ra) | OP_B(ra); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* sbc Ra,Ra,Rc */ +static u8 arc_sbc_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_SBC | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* cmp Rb,Rc */ +static u8 arc_cmp_r(u8 *buf, u8 rb, u8 rc) +{ + const u32 insn = OPC_CMP | OP_B(rb) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* + * cmp.z Rb,Rc + * + * This "cmp.z" variant of compare instruction is used on lower + * 32-bits of register pairs after "cmp"ing their upper parts. If the + * upper parts are equal (z), then this one will proceed to check the + * rest. + */ +static u8 arc_cmpz_r(u8 *buf, u8 rb, u8 rc) +{ + const u32 insn = OPC_CMP | OP_B(rb) | OP_C(rc) | CC_equal; + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* neg Ra,Rb */ +static u8 arc_neg_r(u8 *buf, u8 ra, u8 rb) +{ + const u32 insn = OPC_NEG | OP_A(ra) | OP_B(rb); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* mpy Ra,Rb,Rc */ +static u8 arc_mpy_r(u8 *buf, u8 ra, u8 rb, u8 rc) +{ + const u32 insn = OPC_MPY | OP_A(ra) | OP_B(rb) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* mpy Ra,Rb,imm */ +static u8 arc_mpy_i(u8 *buf, u8 ra, u8 rb, s32 imm) +{ + const u32 insn = OPC_MPYI | OP_A(ra) | OP_B(rb); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* mpydu Ra,Ra,Rc */ +static u8 arc_mpydu_r(u8 *buf, u8 ra, u8 rc) +{ + const u32 insn = OPC_MPYDU | OP_A(ra) | OP_B(ra) | OP_C(rc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* mpydu Ra,Ra,imm */ +static u8 arc_mpydu_i(u8 *buf, u8 ra, s32 imm) +{ + const u32 insn = OPC_MPYDUI | OP_A(ra) | OP_B(ra); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* divu Rd,Rd,Rs */ +static u8 arc_divu_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_DIVU | OP_A(rd) | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* divu Rd,Rd,imm */ +static u8 arc_divu_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_DIVUI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* div Rd,Rd,Rs */ +static u8 arc_divs_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_DIVS | OP_A(rd) | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* div Rd,Rd,imm */ +static u8 arc_divs_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_DIVSI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* remu Rd,Rd,Rs */ +static u8 arc_remu_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_REMU | OP_A(rd) | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* remu Rd,Rd,imm */ +static u8 arc_remu_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_REMUI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* rem Rd,Rd,Rs */ +static u8 arc_rems_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_REMS | OP_A(rd) | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* rem Rd,Rd,imm */ +static u8 arc_rems_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_REMSI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* and Rd,Rd,Rs */ +static u8 arc_and_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_AND | OP_A(rd) | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* and Rd,Rd,limm */ +static u8 arc_and_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_ANDI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +/* tst Rd,Rs */ +static u8 arc_tst_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_TST | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* + * This particular version, "tst.z ...", is meant to be used after a + * "tst" on the low 32-bit of register pairs. If that "tst" is not + * zero, then we don't need to test the upper 32-bits lest it sets + * the zero flag. + */ +static u8 arc_tstz_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_TST | OP_B(rd) | OP_C(rs) | CC_equal; + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_or_r(u8 *buf, u8 rd, u8 rs1, u8 rs2) +{ + const u32 insn = OPC_OR | OP_A(rd) | OP_B(rs1) | OP_C(rs2); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_or_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_ORI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +static u8 arc_xor_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_XOR | OP_A(rd) | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_xor_i(u8 *buf, u8 rd, s32 imm) +{ + const u32 insn = OPC_XORI | OP_A(rd) | OP_B(rd); + + if (buf) { + emit_4_bytes(buf, insn); + emit_4_bytes(buf + INSN_len_normal, imm); + } + return INSN_len_normal + INSN_len_imm; +} + +static u8 arc_not_r(u8 *buf, u8 rd, u8 rs) +{ + const u32 insn = OPC_NOT | OP_B(rd) | OP_C(rs); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_btst_i(u8 *buf, u8 rs, u8 imm) +{ + const u32 insn = OPC_BTSTU6 | OP_B(rs) | BTST_U6(imm); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_asl_r(u8 *buf, u8 rd, u8 rs1, u8 rs2) +{ + const u32 insn = OPC_ASL | OP_A(rd) | OP_B(rs1) | OP_C(rs2); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_asli_r(u8 *buf, u8 rd, u8 rs, u8 imm) +{ + const u32 insn = OPC_ASLI | OP_A(rd) | OP_B(rs) | ASLI_U6(imm); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_asr_r(u8 *buf, u8 rd, u8 rs1, u8 rs2) +{ + const u32 insn = OPC_ASR | OP_A(rd) | OP_B(rs1) | OP_C(rs2); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_asri_r(u8 *buf, u8 rd, u8 rs, u8 imm) +{ + const u32 insn = OPC_ASRI | OP_A(rd) | OP_B(rs) | ASRI_U6(imm); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_lsr_r(u8 *buf, u8 rd, u8 rs1, u8 rs2) +{ + const u32 insn = OPC_LSR | OP_A(rd) | OP_B(rs1) | OP_C(rs2); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_lsri_r(u8 *buf, u8 rd, u8 rs, u8 imm) +{ + const u32 insn = OPC_LSRI | OP_A(rd) | OP_B(rs) | LSRI_U6(imm); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_swape_r(u8 *buf, u8 r) +{ + const u32 insn = OPC_SWAPE | OP_B(r) | OP_C(r); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +static u8 arc_jmp_return(u8 *buf) +{ + if (buf) + emit_4_bytes(buf, OPC_J_BLINK); + return INSN_len_normal; +} + +static u8 arc_jl(u8 *buf, u8 reg) +{ + const u32 insn = OPC_JL | OP_C(reg); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* + * Conditional jump to an address that is max 21 bits away (signed). + * + * b<cc> s21 + */ +static u8 arc_bcc(u8 *buf, u8 cc, int offset) +{ + const u32 insn = OPC_BCC | BCC_S21(offset) | COND(cc); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/* + * Unconditional jump to an address that is max 25 bits away (signed). + * + * b s25 + */ +static u8 arc_b(u8 *buf, s32 offset) +{ + const u32 insn = OPC_B | B_S25(offset); + + if (buf) + emit_4_bytes(buf, insn); + return INSN_len_normal; +} + +/************* Packers (Deal with BPF_REGs) **************/ + +inline u8 zext(u8 *buf, u8 rd) +{ + if (rd != BPF_REG_FP) + return arc_movi_r(buf, REG_HI(rd), 0); + else + return 0; +} + +u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext) +{ + u8 len = 0; + + if (sign_ext) { + if (sign_ext == 8) + len = arc_sexb_r(buf, REG_LO(rd), REG_LO(rs)); + else if (sign_ext == 16) + len = arc_sexh_r(buf, REG_LO(rd), REG_LO(rs)); + else if (sign_ext == 32 && rd != rs) + len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs)); + + return len; + } + + /* Unsigned move. */ + + if (rd != rs) + len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs)); + + return len; +} + +u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm) +{ + return arc_mov_i(buf, REG_LO(reg), imm); +} + +u8 mov_r64(u8 *buf, u8 rd, u8 rs, u8 sign_ext) +{ + u8 len = 0; + + if (sign_ext) { + /* First handle the low 32-bit part. */ + len = mov_r32(buf, rd, rs, sign_ext); + + /* Now propagate the sign bit of LO to HI. */ + if (sign_ext == 8 || sign_ext == 16 || sign_ext == 32) { + len += arc_asri_r(BUF(buf, len), + REG_HI(rd), REG_LO(rd), 31); + } + + return len; + } + + /* Unsigned move. */ + + if (rd == rs) + return 0; + + len = arc_mov_r(buf, REG_LO(rd), REG_LO(rs)); + + if (rs != BPF_REG_FP) + len += arc_mov_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + /* BPF_REG_FP is mapped to 32-bit "fp" register. */ + else + len += arc_movi_r(BUF(buf, len), REG_HI(rd), 0); + + return len; +} + +/* Sign extend the 32-bit immediate into 64-bit register pair. */ +u8 mov_r64_i32(u8 *buf, u8 reg, s32 imm) +{ + u8 len = 0; + + len = arc_mov_i(buf, REG_LO(reg), imm); + + /* BPF_REG_FP is mapped to 32-bit "fp" register. */ + if (reg != BPF_REG_FP) { + if (imm >= 0) + len += arc_movi_r(BUF(buf, len), REG_HI(reg), 0); + else + len += arc_movi_r(BUF(buf, len), REG_HI(reg), -1); + } + + return len; +} + +/* + * This is merely used for translation of "LD R, IMM64" instructions + * of the BPF. These sort of instructions are sometimes used for + * relocations. If during the normal pass, the relocation value is + * not known, the BPF instruction may look something like: + * + * LD R <- 0x0000_0001_0000_0001 + * + * Which will nicely translate to two 4-byte ARC instructions: + * + * mov R_lo, 1 # imm is small enough to be s12 + * mov R_hi, 1 # same + * + * However, during the extra pass, the IMM64 will have changed + * to the resolved address and looks something like: + * + * LD R <- 0x0000_0000_1234_5678 + * + * Now, the translated code will require 12 bytes: + * + * mov R_lo, 0x12345678 # this is an 8-byte instruction + * mov R_hi, 0 # still 4 bytes + * + * Which in practice will result in overwriting the following + * instruction. To avoid such cases, we will always emit codes + * with fixed sizes. + */ +u8 mov_r64_i64(u8 *buf, u8 reg, u32 lo, u32 hi) +{ + u8 len; + + len = arc_mov_i_fixed(buf, REG_LO(reg), lo); + len += arc_mov_i_fixed(BUF(buf, len), REG_HI(reg), hi); + + return len; +} + +/* + * If the "off"set is too big (doesn't encode as S9) for: + * + * {ld,st} r, [rm, off] + * + * Then emit: + * + * add r10, REG_LO(rm), off + * + * and make sure that r10 becomes the effective address: + * + * {ld,st} r, [r10, 0] + */ +static u8 adjust_mem_access(u8 *buf, s16 *off, u8 size, + u8 rm, u8 *arc_reg_mem) +{ + u8 len = 0; + *arc_reg_mem = REG_LO(rm); + + if (!IN_S9_RANGE(*off) || + (size == BPF_DW && !IN_S9_RANGE(*off + 4))) { + len += arc_add_i(BUF(buf, len), + REG_LO(JIT_REG_TMP), REG_LO(rm), (u32)(*off)); + *arc_reg_mem = REG_LO(JIT_REG_TMP); + *off = 0; + } + + return len; +} + +/* store rs, [rd, off] */ +u8 store_r(u8 *buf, u8 rs, u8 rd, s16 off, u8 size) +{ + u8 len, arc_reg_mem; + + len = adjust_mem_access(buf, &off, size, rd, &arc_reg_mem); + + if (size == BPF_DW) { + len += arc_st_r(BUF(buf, len), REG_LO(rs), arc_reg_mem, + off, ZZ_4_byte); + len += arc_st_r(BUF(buf, len), REG_HI(rs), arc_reg_mem, + off + 4, ZZ_4_byte); + } else { + u8 zz = bpf_to_arc_size(size); + + len += arc_st_r(BUF(buf, len), REG_LO(rs), arc_reg_mem, + off, zz); + } + + return len; +} + +/* + * For {8,16,32}-bit stores: + * mov r21, imm + * st r21, [...] + * For 64-bit stores: + * mov r21, imm + * st r21, [...] + * mov r21, {0,-1} + * st r21, [...+4] + */ +u8 store_i(u8 *buf, s32 imm, u8 rd, s16 off, u8 size) +{ + u8 len, arc_reg_mem; + /* REG_LO(JIT_REG_TMP) might be used by "adjust_mem_access()". */ + const u8 arc_rs = REG_HI(JIT_REG_TMP); + + len = adjust_mem_access(buf, &off, size, rd, &arc_reg_mem); + + if (size == BPF_DW) { + len += arc_mov_i(BUF(buf, len), arc_rs, imm); + len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem, + off, ZZ_4_byte); + imm = (imm >= 0 ? 0 : -1); + len += arc_mov_i(BUF(buf, len), arc_rs, imm); + len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem, + off + 4, ZZ_4_byte); + } else { + u8 zz = bpf_to_arc_size(size); + + len += arc_mov_i(BUF(buf, len), arc_rs, imm); + len += arc_st_r(BUF(buf, len), arc_rs, arc_reg_mem, off, zz); + } + + return len; +} + +/* + * For the calling convention of a little endian machine, the LO part + * must be on top of the stack. + */ +static u8 push_r64(u8 *buf, u8 reg) +{ + u8 len = 0; + +#ifdef __LITTLE_ENDIAN + /* BPF_REG_FP is mapped to 32-bit "fp" register. */ + if (reg != BPF_REG_FP) + len += arc_push_r(BUF(buf, len), REG_HI(reg)); + len += arc_push_r(BUF(buf, len), REG_LO(reg)); +#else + len += arc_push_r(BUF(buf, len), REG_LO(reg)); + if (reg != BPF_REG_FP) + len += arc_push_r(BUF(buf, len), REG_HI(reg)); +#endif + + return len; +} + +/* load rd, [rs, off] */ +u8 load_r(u8 *buf, u8 rd, u8 rs, s16 off, u8 size, bool sign_ext) +{ + u8 len, arc_reg_mem; + + len = adjust_mem_access(buf, &off, size, rs, &arc_reg_mem); + + if (size == BPF_B || size == BPF_H || size == BPF_W) { + const u8 zz = bpf_to_arc_size(size); + + /* Use LD.X only if the data size is less than 32-bit. */ + if (sign_ext && (zz == ZZ_1_byte || zz == ZZ_2_byte)) { + len += arc_ldx_r(BUF(buf, len), REG_LO(rd), + arc_reg_mem, off, zz); + } else { + len += arc_ld_r(BUF(buf, len), REG_LO(rd), + arc_reg_mem, off, zz); + } + + if (sign_ext) { + /* Propagate the sign bit to the higher reg. */ + len += arc_asri_r(BUF(buf, len), + REG_HI(rd), REG_LO(rd), 31); + } else { + len += arc_movi_r(BUF(buf, len), REG_HI(rd), 0); + } + } else if (size == BPF_DW) { + /* + * We are about to issue 2 consecutive loads: + * + * ld rx, [rb, off+0] + * ld ry, [rb, off+4] + * + * If "rx" and "rb" are the same registers, then the order + * should change to guarantee that "rb" remains intact + * during these 2 operations: + * + * ld ry, [rb, off+4] + * ld rx, [rb, off+0] + */ + if (REG_LO(rd) != arc_reg_mem) { + len += arc_ld_r(BUF(buf, len), REG_LO(rd), arc_reg_mem, + off, ZZ_4_byte); + len += arc_ld_r(BUF(buf, len), REG_HI(rd), arc_reg_mem, + off + 4, ZZ_4_byte); + } else { + len += arc_ld_r(BUF(buf, len), REG_HI(rd), arc_reg_mem, + off + 4, ZZ_4_byte); + len += arc_ld_r(BUF(buf, len), REG_LO(rd), arc_reg_mem, + off, ZZ_4_byte); + } + } + + return len; +} + +u8 add_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_add_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 add_r32_i32(u8 *buf, u8 rd, s32 imm) +{ + if (IN_U6_RANGE(imm)) + return arc_addi_r(buf, REG_LO(rd), imm); + else + return arc_add_i(buf, REG_LO(rd), REG_LO(rd), imm); +} + +u8 add_r64(u8 *buf, u8 rd, u8 rs) +{ + u8 len; + + len = arc_addf_r(buf, REG_LO(rd), REG_LO(rs)); + len += arc_adc_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + return len; +} + +u8 add_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + u8 len; + + if (IN_U6_RANGE(imm)) { + len = arc_addif_r(buf, REG_LO(rd), imm); + len += arc_adci_r(BUF(buf, len), REG_HI(rd), 0); + } else { + len = mov_r64_i32(buf, JIT_REG_TMP, imm); + len += add_r64(BUF(buf, len), rd, JIT_REG_TMP); + } + return len; +} + +u8 sub_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_sub_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 sub_r32_i32(u8 *buf, u8 rd, s32 imm) +{ + if (IN_U6_RANGE(imm)) + return arc_subi_r(buf, REG_LO(rd), imm); + else + return arc_sub_i(buf, REG_LO(rd), imm); +} + +u8 sub_r64(u8 *buf, u8 rd, u8 rs) +{ + u8 len; + + len = arc_subf_r(buf, REG_LO(rd), REG_LO(rs)); + len += arc_sbc_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + return len; +} + +u8 sub_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + u8 len; + + len = mov_r64_i32(buf, JIT_REG_TMP, imm); + len += sub_r64(BUF(buf, len), rd, JIT_REG_TMP); + return len; +} + +static u8 cmp_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_cmp_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 neg_r32(u8 *buf, u8 r) +{ + return arc_neg_r(buf, REG_LO(r), REG_LO(r)); +} + +/* In a two's complement system, -r is (~r + 1). */ +u8 neg_r64(u8 *buf, u8 r) +{ + u8 len; + + len = arc_not_r(buf, REG_LO(r), REG_LO(r)); + len += arc_not_r(BUF(buf, len), REG_HI(r), REG_HI(r)); + len += add_r64_i32(BUF(buf, len), r, 1); + return len; +} + +u8 mul_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_mpy_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs)); +} + +u8 mul_r32_i32(u8 *buf, u8 rd, s32 imm) +{ + return arc_mpy_i(buf, REG_LO(rd), REG_LO(rd), imm); +} + +/* + * MUL B, C + * -------- + * mpy t0, B_hi, C_lo + * mpy t1, B_lo, C_hi + * mpydu B_lo, B_lo, C_lo + * add B_hi, B_hi, t0 + * add B_hi, B_hi, t1 + */ +u8 mul_r64(u8 *buf, u8 rd, u8 rs) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 t1 = REG_HI(JIT_REG_TMP); + const u8 C_lo = REG_LO(rs); + const u8 C_hi = REG_HI(rs); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + u8 len; + + len = arc_mpy_r(buf, t0, B_hi, C_lo); + len += arc_mpy_r(BUF(buf, len), t1, B_lo, C_hi); + len += arc_mpydu_r(BUF(buf, len), B_lo, C_lo); + len += arc_add_r(BUF(buf, len), B_hi, t0); + len += arc_add_r(BUF(buf, len), B_hi, t1); + + return len; +} + +/* + * MUL B, imm + * ---------- + * + * To get a 64-bit result from a signed 64x32 multiplication: + * + * B_hi B_lo * + * sign imm + * ----------------------------- + * HI(B_lo*imm) LO(B_lo*imm) + + * B_hi*imm + + * B_lo*sign + * ----------------------------- + * res_hi res_lo + * + * mpy t1, B_lo, sign(imm) + * mpy t0, B_hi, imm + * mpydu B_lo, B_lo, imm + * add B_hi, B_hi, t0 + * add B_hi, B_hi, t1 + * + * Note: We can't use signed double multiplication, "mpyd", instead of an + * unsigned version, "mpydu", and then get rid of the sign adjustments + * calculated in "t1". The signed multiplication, "mpyd", will consider + * both operands, "B_lo" and "imm", as signed inputs. However, for this + * 64x32 multiplication, "B_lo" must be treated as an unsigned number. + */ +u8 mul_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 t1 = REG_HI(JIT_REG_TMP); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + u8 len = 0; + + if (imm == 1) + return 0; + + /* Is the sign-extension of the immediate "-1"? */ + if (imm < 0) + len += arc_neg_r(BUF(buf, len), t1, B_lo); + + len += arc_mpy_i(BUF(buf, len), t0, B_hi, imm); + len += arc_mpydu_i(BUF(buf, len), B_lo, imm); + len += arc_add_r(BUF(buf, len), B_hi, t0); + + /* Add the "sign*B_lo" part, if necessary. */ + if (imm < 0) + len += arc_add_r(BUF(buf, len), B_hi, t1); + + return len; +} + +u8 div_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext) +{ + if (sign_ext) + return arc_divs_r(buf, REG_LO(rd), REG_LO(rs)); + else + return arc_divu_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 div_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext) +{ + if (imm == 0) + return 0; + + if (sign_ext) + return arc_divs_i(buf, REG_LO(rd), imm); + else + return arc_divu_i(buf, REG_LO(rd), imm); +} + +u8 mod_r32(u8 *buf, u8 rd, u8 rs, bool sign_ext) +{ + if (sign_ext) + return arc_rems_r(buf, REG_LO(rd), REG_LO(rs)); + else + return arc_remu_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 mod_r32_i32(u8 *buf, u8 rd, s32 imm, bool sign_ext) +{ + if (imm == 0) + return 0; + + if (sign_ext) + return arc_rems_i(buf, REG_LO(rd), imm); + else + return arc_remu_i(buf, REG_LO(rd), imm); +} + +u8 and_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_and_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 and_r32_i32(u8 *buf, u8 rd, s32 imm) +{ + return arc_and_i(buf, REG_LO(rd), imm); +} + +u8 and_r64(u8 *buf, u8 rd, u8 rs) +{ + u8 len; + + len = arc_and_r(buf, REG_LO(rd), REG_LO(rs)); + len += arc_and_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + return len; +} + +u8 and_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + u8 len; + + len = mov_r64_i32(buf, JIT_REG_TMP, imm); + len += and_r64(BUF(buf, len), rd, JIT_REG_TMP); + return len; +} + +static u8 tst_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_tst_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 or_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_or_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs)); +} + +u8 or_r32_i32(u8 *buf, u8 rd, s32 imm) +{ + return arc_or_i(buf, REG_LO(rd), imm); +} + +u8 or_r64(u8 *buf, u8 rd, u8 rs) +{ + u8 len; + + len = arc_or_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs)); + len += arc_or_r(BUF(buf, len), REG_HI(rd), REG_HI(rd), REG_HI(rs)); + return len; +} + +u8 or_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + u8 len; + + len = mov_r64_i32(buf, JIT_REG_TMP, imm); + len += or_r64(BUF(buf, len), rd, JIT_REG_TMP); + return len; +} + +u8 xor_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_xor_r(buf, REG_LO(rd), REG_LO(rs)); +} + +u8 xor_r32_i32(u8 *buf, u8 rd, s32 imm) +{ + return arc_xor_i(buf, REG_LO(rd), imm); +} + +u8 xor_r64(u8 *buf, u8 rd, u8 rs) +{ + u8 len; + + len = arc_xor_r(buf, REG_LO(rd), REG_LO(rs)); + len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + return len; +} + +u8 xor_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + u8 len; + + len = mov_r64_i32(buf, JIT_REG_TMP, imm); + len += xor_r64(BUF(buf, len), rd, JIT_REG_TMP); + return len; +} + +/* "asl a,b,c" --> "a = (b << (c & 31))". */ +u8 lsh_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_asl_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs)); +} + +u8 lsh_r32_i32(u8 *buf, u8 rd, u8 imm) +{ + return arc_asli_r(buf, REG_LO(rd), REG_LO(rd), imm); +} + +/* + * algorithm + * --------- + * if (n <= 32) + * to_hi = lo >> (32-n) # (32-n) is the negate of "n" in a 5-bit width. + * lo <<= n + * hi <<= n + * hi |= to_hi + * else + * hi = lo << (n-32) + * lo = 0 + * + * assembly translation for "LSH B, C" + * (heavily influenced by ARC gcc) + * ----------------------------------- + * not t0, C_lo # The first 3 lines are almost the same as: + * lsr t1, B_lo, 1 # neg t0, C_lo + * lsr t1, t1, t0 # lsr t1, B_lo, t0 --> t1 is "to_hi" + * mov t0, C_lo* # with one important difference. In "neg" + * asl B_lo, B_lo, t0 # version, when C_lo=0, t1 becomes B_lo while + * asl B_hi, B_hi, t0 # it should be 0. The "not" approach instead, + * or B_hi, B_hi, t1 # "shift"s t1 once and 31 times, practically + * btst t0, 5 # setting it to 0 when C_lo=0. + * mov.ne B_hi, B_lo** + * mov.ne B_lo, 0 + * + * *The "mov t0, C_lo" is necessary to cover the cases that C is the same + * register as B. + * + * **ARC performs a shift in this manner: B <<= (C & 31) + * For 32<=n<64, "n-32" and "n&31" are the same. Therefore, "B << n" and + * "B << (n-32)" yield the same results. e.g. the results of "B << 35" and + * "B << 3" are the same. + * + * The behaviour is undefined for n >= 64. + */ +u8 lsh_r64(u8 *buf, u8 rd, u8 rs) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 t1 = REG_HI(JIT_REG_TMP); + const u8 C_lo = REG_LO(rs); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + u8 len; + + len = arc_not_r(buf, t0, C_lo); + len += arc_lsri_r(BUF(buf, len), t1, B_lo, 1); + len += arc_lsr_r(BUF(buf, len), t1, t1, t0); + len += arc_mov_r(BUF(buf, len), t0, C_lo); + len += arc_asl_r(BUF(buf, len), B_lo, B_lo, t0); + len += arc_asl_r(BUF(buf, len), B_hi, B_hi, t0); + len += arc_or_r(BUF(buf, len), B_hi, B_hi, t1); + len += arc_btst_i(BUF(buf, len), t0, 5); + len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_hi, B_lo); + len += arc_movu_cc_r(BUF(buf, len), CC_unequal, B_lo, 0); + + return len; +} + +/* + * if (n < 32) + * to_hi = B_lo >> 32-n # extract upper n bits + * lo <<= n + * hi <<=n + * hi |= to_hi + * else if (n < 64) + * hi = lo << n-32 + * lo = 0 + */ +u8 lsh_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + const u8 n = (u8)imm; + u8 len = 0; + + if (n == 0) { + return 0; + } else if (n <= 31) { + len = arc_lsri_r(buf, t0, B_lo, 32 - n); + len += arc_asli_r(BUF(buf, len), B_lo, B_lo, n); + len += arc_asli_r(BUF(buf, len), B_hi, B_hi, n); + len += arc_or_r(BUF(buf, len), B_hi, B_hi, t0); + } else if (n <= 63) { + len = arc_asli_r(buf, B_hi, B_lo, n - 32); + len += arc_movi_r(BUF(buf, len), B_lo, 0); + } + /* n >= 64 is undefined behaviour. */ + + return len; +} + +/* "lsr a,b,c" --> "a = (b >> (c & 31))". */ +u8 rsh_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_lsr_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs)); +} + +u8 rsh_r32_i32(u8 *buf, u8 rd, u8 imm) +{ + return arc_lsri_r(buf, REG_LO(rd), REG_LO(rd), imm); +} + +/* + * For better commentary, see lsh_r64(). + * + * algorithm + * --------- + * if (n <= 32) + * to_lo = hi << (32-n) + * hi >>= n + * lo >>= n + * lo |= to_lo + * else + * lo = hi >> (n-32) + * hi = 0 + * + * RSH B,C + * ---------- + * not t0, C_lo + * asl t1, B_hi, 1 + * asl t1, t1, t0 + * mov t0, C_lo + * lsr B_hi, B_hi, t0 + * lsr B_lo, B_lo, t0 + * or B_lo, B_lo, t1 + * btst t0, 5 + * mov.ne B_lo, B_hi + * mov.ne B_hi, 0 + */ +u8 rsh_r64(u8 *buf, u8 rd, u8 rs) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 t1 = REG_HI(JIT_REG_TMP); + const u8 C_lo = REG_LO(rs); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + u8 len; + + len = arc_not_r(buf, t0, C_lo); + len += arc_asli_r(BUF(buf, len), t1, B_hi, 1); + len += arc_asl_r(BUF(buf, len), t1, t1, t0); + len += arc_mov_r(BUF(buf, len), t0, C_lo); + len += arc_lsr_r(BUF(buf, len), B_hi, B_hi, t0); + len += arc_lsr_r(BUF(buf, len), B_lo, B_lo, t0); + len += arc_or_r(BUF(buf, len), B_lo, B_lo, t1); + len += arc_btst_i(BUF(buf, len), t0, 5); + len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_lo, B_hi); + len += arc_movu_cc_r(BUF(buf, len), CC_unequal, B_hi, 0); + + return len; +} + +/* + * if (n < 32) + * to_lo = B_lo << 32-n # extract lower n bits, right-padded with 32-n 0s + * lo >>=n + * hi >>=n + * hi |= to_lo + * else if (n < 64) + * lo = hi >> n-32 + * hi = 0 + */ +u8 rsh_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + const u8 n = (u8)imm; + u8 len = 0; + + if (n == 0) { + return 0; + } else if (n <= 31) { + len = arc_asli_r(buf, t0, B_hi, 32 - n); + len += arc_lsri_r(BUF(buf, len), B_lo, B_lo, n); + len += arc_lsri_r(BUF(buf, len), B_hi, B_hi, n); + len += arc_or_r(BUF(buf, len), B_lo, B_lo, t0); + } else if (n <= 63) { + len = arc_lsri_r(buf, B_lo, B_hi, n - 32); + len += arc_movi_r(BUF(buf, len), B_hi, 0); + } + /* n >= 64 is undefined behaviour. */ + + return len; +} + +/* "asr a,b,c" --> "a = (b s>> (c & 31))". */ +u8 arsh_r32(u8 *buf, u8 rd, u8 rs) +{ + return arc_asr_r(buf, REG_LO(rd), REG_LO(rd), REG_LO(rs)); +} + +u8 arsh_r32_i32(u8 *buf, u8 rd, u8 imm) +{ + return arc_asri_r(buf, REG_LO(rd), REG_LO(rd), imm); +} + +/* + * For comparison, see rsh_r64(). + * + * algorithm + * --------- + * if (n <= 32) + * to_lo = hi << (32-n) + * hi s>>= n + * lo >>= n + * lo |= to_lo + * else + * hi_sign = hi s>>31 + * lo = hi s>> (n-32) + * hi = hi_sign + * + * ARSH B,C + * ---------- + * not t0, C_lo + * asl t1, B_hi, 1 + * asl t1, t1, t0 + * mov t0, C_lo + * asr B_hi, B_hi, t0 + * lsr B_lo, B_lo, t0 + * or B_lo, B_lo, t1 + * btst t0, 5 + * asr t0, B_hi, 31 # now, t0 = 0 or -1 based on B_hi's sign + * mov.ne B_lo, B_hi + * mov.ne B_hi, t0 + */ +u8 arsh_r64(u8 *buf, u8 rd, u8 rs) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 t1 = REG_HI(JIT_REG_TMP); + const u8 C_lo = REG_LO(rs); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + u8 len; + + len = arc_not_r(buf, t0, C_lo); + len += arc_asli_r(BUF(buf, len), t1, B_hi, 1); + len += arc_asl_r(BUF(buf, len), t1, t1, t0); + len += arc_mov_r(BUF(buf, len), t0, C_lo); + len += arc_asr_r(BUF(buf, len), B_hi, B_hi, t0); + len += arc_lsr_r(BUF(buf, len), B_lo, B_lo, t0); + len += arc_or_r(BUF(buf, len), B_lo, B_lo, t1); + len += arc_btst_i(BUF(buf, len), t0, 5); + len += arc_asri_r(BUF(buf, len), t0, B_hi, 31); + len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_lo, B_hi); + len += arc_mov_cc_r(BUF(buf, len), CC_unequal, B_hi, t0); + + return len; +} + +/* + * if (n < 32) + * to_lo = lo << 32-n # extract lower n bits, right-padded with 32-n 0s + * lo >>=n + * hi s>>=n + * hi |= to_lo + * else if (n < 64) + * lo = hi s>> n-32 + * hi = (lo[msb] ? -1 : 0) + */ +u8 arsh_r64_i32(u8 *buf, u8 rd, s32 imm) +{ + const u8 t0 = REG_LO(JIT_REG_TMP); + const u8 B_lo = REG_LO(rd); + const u8 B_hi = REG_HI(rd); + const u8 n = (u8)imm; + u8 len = 0; + + if (n == 0) { + return 0; + } else if (n <= 31) { + len = arc_asli_r(buf, t0, B_hi, 32 - n); + len += arc_lsri_r(BUF(buf, len), B_lo, B_lo, n); + len += arc_asri_r(BUF(buf, len), B_hi, B_hi, n); + len += arc_or_r(BUF(buf, len), B_lo, B_lo, t0); + } else if (n <= 63) { + len = arc_asri_r(buf, B_lo, B_hi, n - 32); + len += arc_movi_r(BUF(buf, len), B_hi, -1); + len += arc_btst_i(BUF(buf, len), B_lo, 31); + len += arc_movu_cc_r(BUF(buf, len), CC_equal, B_hi, 0); + } + /* n >= 64 is undefined behaviour. */ + + return len; +} + +u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext) +{ + u8 len = 0; +#ifdef __BIG_ENDIAN + const u8 host_endian = BPF_FROM_BE; +#else + const u8 host_endian = BPF_FROM_LE; +#endif + if (host_endian != endian || force) { + switch (size) { + case 16: + /* + * r = B4B3_B2B1 << 16 --> r = B2B1_0000 + * then, swape(r) would become the desired 0000_B1B2 + */ + len = arc_asli_r(buf, REG_LO(rd), REG_LO(rd), 16); + fallthrough; + case 32: + len += arc_swape_r(BUF(buf, len), REG_LO(rd)); + if (do_zext) + len += zext(BUF(buf, len), rd); + break; + case 64: + /* + * swap "hi" and "lo": + * hi ^= lo; + * lo ^= hi; + * hi ^= lo; + * and then swap the bytes in "hi" and "lo". + */ + len = arc_xor_r(buf, REG_HI(rd), REG_LO(rd)); + len += arc_xor_r(BUF(buf, len), REG_LO(rd), REG_HI(rd)); + len += arc_xor_r(BUF(buf, len), REG_HI(rd), REG_LO(rd)); + len += arc_swape_r(BUF(buf, len), REG_LO(rd)); + len += arc_swape_r(BUF(buf, len), REG_HI(rd)); + break; + default: + /* The caller must have handled this. */ + } + } else { + /* + * If the same endianness, there's not much to do other + * than zeroing out the upper bytes based on the "size". + */ + switch (size) { + case 16: + len = arc_and_i(buf, REG_LO(rd), 0xffff); + fallthrough; + case 32: + if (do_zext) + len += zext(BUF(buf, len), rd); + break; + case 64: + break; + default: + /* The caller must have handled this. */ + } + } + + return len; +} + +/* + * To create a frame, all that is needed is: + * + * push fp + * mov fp, sp + * sub sp, <frame_size> + * + * "push fp" is taken care of separately while saving the clobbered registers. + * All that remains is copying SP value to FP and shrinking SP's address space + * for any possible function call to come. + */ +static inline u8 frame_create(u8 *buf, u16 size) +{ + u8 len; + + len = arc_mov_r(buf, ARC_R_FP, ARC_R_SP); + if (IN_U6_RANGE(size)) + len += arc_subi_r(BUF(buf, len), ARC_R_SP, size); + else + len += arc_sub_i(BUF(buf, len), ARC_R_SP, size); + return len; +} + +/* + * mov sp, fp + * + * The value of SP upon entering was copied to FP. + */ +static inline u8 frame_restore(u8 *buf) +{ + return arc_mov_r(buf, ARC_R_SP, ARC_R_FP); +} + +/* + * Going from a JITed code to the native caller: + * + * mov ARC_ABI_RET_lo, BPF_REG_0_lo # r0 <- r8 + * mov ARC_ABI_RET_hi, BPF_REG_0_hi # r1 <- r9 + */ +static u8 bpf_to_arc_return(u8 *buf) +{ + u8 len; + + len = arc_mov_r(buf, ARC_R_0, REG_LO(BPF_REG_0)); + len += arc_mov_r(BUF(buf, len), ARC_R_1, REG_HI(BPF_REG_0)); + return len; +} + +/* + * Coming back from an external (in-kernel) function to the JITed code: + * + * mov ARC_ABI_RET_lo, BPF_REG_0_lo # r8 <- r0 + * mov ARC_ABI_RET_hi, BPF_REG_0_hi # r9 <- r1 + */ +u8 arc_to_bpf_return(u8 *buf) +{ + u8 len; + + len = arc_mov_r(buf, REG_LO(BPF_REG_0), ARC_R_0); + len += arc_mov_r(BUF(buf, len), REG_HI(BPF_REG_0), ARC_R_1); + return len; +} + +/* + * This translation leads to: + * + * mov r10, addr # always an 8-byte instruction + * jl [r10] + * + * The length of the "mov" must be fixed (8), otherwise it may diverge + * during the normal and extra passes: + * + * normal pass extra pass + * + * 180: mov r10,0 | 180: mov r10,0x700578d8 + * 184: jl [r10] | 188: jl [r10] + * 188: add.f r16,r16,0x1 | 18c: adc r17,r17,0 + * 18c: adc r17,r17,0 | + * + * In the above example, the change from "r10 <- 0" to "r10 <- 0x700578d8" + * has led to an increase in the length of the "mov" instruction. + * Inadvertently, that caused the loss of the "add.f" instruction. + */ +static u8 jump_and_link(u8 *buf, u32 addr) +{ + u8 len; + + len = arc_mov_i_fixed(buf, REG_LO(JIT_REG_TMP), addr); + len += arc_jl(BUF(buf, len), REG_LO(JIT_REG_TMP)); + return len; +} + +/* + * This function determines which ARC registers must be saved and restored. + * It does so by looking into: + * + * "bpf_reg": The clobbered (destination) BPF register + * "is_call": Indicator if the current instruction is a call + * + * When a register of interest is clobbered, its corresponding bit position + * in return value, "usage", is set to true. + */ +u32 mask_for_used_regs(u8 bpf_reg, bool is_call) +{ + u32 usage = 0; + + /* BPF registers that must be saved. */ + if (bpf_reg >= BPF_REG_6 && bpf_reg <= BPF_REG_9) { + usage |= BIT(REG_LO(bpf_reg)); + usage |= BIT(REG_HI(bpf_reg)); + /* + * Using the frame pointer register implies that it should + * be saved and reinitialised with the current frame data. + */ + } else if (bpf_reg == BPF_REG_FP) { + usage |= BIT(REG_LO(BPF_REG_FP)); + /* Could there be some ARC registers that must to be saved? */ + } else { + if (REG_LO(bpf_reg) >= ARC_CALLEE_SAVED_REG_FIRST && + REG_LO(bpf_reg) <= ARC_CALLEE_SAVED_REG_LAST) + usage |= BIT(REG_LO(bpf_reg)); + + if (REG_HI(bpf_reg) >= ARC_CALLEE_SAVED_REG_FIRST && + REG_HI(bpf_reg) <= ARC_CALLEE_SAVED_REG_LAST) + usage |= BIT(REG_HI(bpf_reg)); + } + + /* A "call" indicates that ARC's "blink" reg must be saved. */ + usage |= is_call ? BIT(ARC_R_BLINK) : 0; + + return usage; +} + +/* + * push blink # if blink is marked as clobbered + * push r[0-n] # if r[i] is marked as clobbered + * push fp # if fp is marked as clobbered + * mov fp, sp # if frame_size > 0 (clobbers fp) + * sub sp, <frame_size> # same as above + */ +u8 arc_prologue(u8 *buf, u32 usage, u16 frame_size) +{ + u8 len = 0; + u32 gp_regs = 0; + + /* Deal with blink first. */ + if (usage & BIT(ARC_R_BLINK)) + len += arc_push_r(BUF(buf, len), ARC_R_BLINK); + + gp_regs = usage & ~(BIT(ARC_R_BLINK) | BIT(ARC_R_FP)); + while (gp_regs) { + u8 reg = __builtin_ffs(gp_regs) - 1; + + len += arc_push_r(BUF(buf, len), reg); + gp_regs &= ~BIT(reg); + } + + /* Deal with fp last. */ + if ((usage & BIT(ARC_R_FP)) || frame_size > 0) + len += arc_push_r(BUF(buf, len), ARC_R_FP); + + if (frame_size > 0) + len += frame_create(BUF(buf, len), frame_size); + +#ifdef ARC_BPF_JIT_DEBUG + if ((usage & BIT(ARC_R_FP)) && frame_size == 0) { + pr_err("FP is being saved while there is no frame."); + BUG(); + } +#endif + + return len; +} + +/* + * mov sp, fp # if frame_size > 0 + * pop fp # if fp is marked as clobbered + * pop r[n-0] # if r[i] is marked as clobbered + * pop blink # if blink is marked as clobbered + * mov r0, r8 # always: ABI_return <- BPF_return + * mov r1, r9 # continuation of above + * j [blink] # always + * + * "fp being marked as clobbered" and "frame_size > 0" are the two sides of + * the same coin. + */ +u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size) +{ + u32 len = 0; + u32 gp_regs = 0; + +#ifdef ARC_BPF_JIT_DEBUG + if ((usage & BIT(ARC_R_FP)) && frame_size == 0) { + pr_err("FP is being saved while there is no frame."); + BUG(); + } +#endif + + if (frame_size > 0) + len += frame_restore(BUF(buf, len)); + + /* Deal with fp first. */ + if ((usage & BIT(ARC_R_FP)) || frame_size > 0) + len += arc_pop_r(BUF(buf, len), ARC_R_FP); + + gp_regs = usage & ~(BIT(ARC_R_BLINK) | BIT(ARC_R_FP)); + while (gp_regs) { + /* "usage" is 32-bit, each bit indicating an ARC register. */ + u8 reg = 31 - __builtin_clz(gp_regs); + + len += arc_pop_r(BUF(buf, len), reg); + gp_regs &= ~BIT(reg); + } + + /* Deal with blink last. */ + if (usage & BIT(ARC_R_BLINK)) + len += arc_pop_r(BUF(buf, len), ARC_R_BLINK); + + /* Wrap up the return value and jump back to the caller. */ + len += bpf_to_arc_return(BUF(buf, len)); + len += arc_jmp_return(BUF(buf, len)); + + return len; +} + +/* + * For details on the algorithm, see the comments of "gen_jcc_64()". + * + * This data structure is holding information for jump translations. + * + * jit_off: How many bytes into the current JIT address, "b"ranch insn. occurs + * cond: The condition that the ARC branch instruction must use + * + * e.g.: + * + * BPF_JGE R1, R0, @target + * ------------------------ + * | + * v + * 0x1000: cmp r3, r1 # 0x1000 is the JIT address for "BPF_JGE ..." insn + * 0x1004: bhi @target # first jump (branch higher) + * 0x1008: blo @end # second jump acting as a skip (end is 0x1014) + * 0x100C: cmp r2, r0 # the lower 32 bits are evaluated + * 0x1010: bhs @target # third jump (branch higher or same) + * 0x1014: ... + * + * The jit_off(set) of the "bhi" is 4 bytes. + * The cond(ition) for the "bhi" is "CC_great_u". + * + * The jit_off(set) is necessary for calculating the exact displacement + * to the "target" address: + * + * jit_address + jit_off(set) - @target + * 0x1000 + 4 - @target + */ +#define JCC64_NR_OF_JMPS 3 /* Number of jumps in jcc64 template. */ +#define JCC64_INSNS_TO_END 3 /* Number of insn. inclusive the 2nd jmp to end. */ +#define JCC64_SKIP_JMP 1 /* Index of the "skip" jump to "end". */ +const struct { + /* + * "jit_off" is common between all "jmp[]" and is coupled with + * "cond" of each "jmp[]" instance. e.g.: + * + * arcv2_64_jccs.jit_off[1] + * arcv2_64_jccs.jmp[ARC_CC_UGT].cond[1] + * + * Are indicating that the second jump in JITed code of "UGT" + * is at offset "jit_off[1]" while its condition is "cond[1]". + */ + u8 jit_off[JCC64_NR_OF_JMPS]; + + struct { + u8 cond[JCC64_NR_OF_JMPS]; + } jmp[ARC_CC_SLE + 1]; +} arcv2_64_jccs = { + .jit_off = { + INSN_len_normal * 1, + INSN_len_normal * 2, + INSN_len_normal * 4 + }, + /* + * cmp rd_hi, rs_hi + * bhi @target # 1: u> + * blo @end # 2: u< + * cmp rd_lo, rs_lo + * bhi @target # 3: u> + * end: + */ + .jmp[ARC_CC_UGT] = { + .cond = {CC_great_u, CC_less_u, CC_great_u} + }, + /* + * cmp rd_hi, rs_hi + * bhi @target # 1: u> + * blo @end # 2: u< + * cmp rd_lo, rs_lo + * bhs @target # 3: u>= + * end: + */ + .jmp[ARC_CC_UGE] = { + .cond = {CC_great_u, CC_less_u, CC_great_eq_u} + }, + /* + * cmp rd_hi, rs_hi + * blo @target # 1: u< + * bhi @end # 2: u> + * cmp rd_lo, rs_lo + * blo @target # 3: u< + * end: + */ + .jmp[ARC_CC_ULT] = { + .cond = {CC_less_u, CC_great_u, CC_less_u} + }, + /* + * cmp rd_hi, rs_hi + * blo @target # 1: u< + * bhi @end # 2: u> + * cmp rd_lo, rs_lo + * bls @target # 3: u<= + * end: + */ + .jmp[ARC_CC_ULE] = { + .cond = {CC_less_u, CC_great_u, CC_less_eq_u} + }, + /* + * cmp rd_hi, rs_hi + * bgt @target # 1: s> + * blt @end # 2: s< + * cmp rd_lo, rs_lo + * bhi @target # 3: u> + * end: + */ + .jmp[ARC_CC_SGT] = { + .cond = {CC_great_s, CC_less_s, CC_great_u} + }, + /* + * cmp rd_hi, rs_hi + * bgt @target # 1: s> + * blt @end # 2: s< + * cmp rd_lo, rs_lo + * bhs @target # 3: u>= + * end: + */ + .jmp[ARC_CC_SGE] = { + .cond = {CC_great_s, CC_less_s, CC_great_eq_u} + }, + /* + * cmp rd_hi, rs_hi + * blt @target # 1: s< + * bgt @end # 2: s> + * cmp rd_lo, rs_lo + * blo @target # 3: u< + * end: + */ + .jmp[ARC_CC_SLT] = { + .cond = {CC_less_s, CC_great_s, CC_less_u} + }, + /* + * cmp rd_hi, rs_hi + * blt @target # 1: s< + * bgt @end # 2: s> + * cmp rd_lo, rs_lo + * bls @target # 3: u<= + * end: + */ + .jmp[ARC_CC_SLE] = { + .cond = {CC_less_s, CC_great_s, CC_less_eq_u} + } +}; + +/* + * The displacement (offset) for ARC's "b"ranch instruction is the distance + * from the aligned version of _current_ instruction (PCL) to the target + * instruction: + * + * DISP = TARGET - PCL # PCL is the word aligned PC + */ +static inline s32 get_displacement(u32 curr_off, u32 targ_off) +{ + return (s32)(targ_off - (curr_off & ~3L)); +} + +/* + * "disp"lacement should be: + * + * 1. 16-bit aligned. + * 2. fit in S25, because no "condition code" is supposed to be encoded. + */ +static inline bool is_valid_far_disp(s32 disp) +{ + return (!(disp & 1) && IN_S25_RANGE(disp)); +} + +/* + * "disp"lacement should be: + * + * 1. 16-bit aligned. + * 2. fit in S21, because "condition code" is supposed to be encoded too. + */ +static inline bool is_valid_near_disp(s32 disp) +{ + return (!(disp & 1) && IN_S21_RANGE(disp)); +} + +/* + * cmp rd_hi, rs_hi + * cmp.z rd_lo, rs_lo + * b{eq,ne} @target + * | | + * | `--> "eq" param is false (JNE) + * `-----> "eq" param is true (JEQ) + */ +static int gen_j_eq_64(u8 *buf, u8 rd, u8 rs, bool eq, + u32 curr_off, u32 targ_off) +{ + s32 disp; + u8 len = 0; + + len += arc_cmp_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + len += arc_cmpz_r(BUF(buf, len), REG_LO(rd), REG_LO(rs)); + disp = get_displacement(curr_off + len, targ_off); + len += arc_bcc(BUF(buf, len), eq ? CC_equal : CC_unequal, disp); + + return len; +} + +/* + * tst rd_hi, rs_hi + * tst.z rd_lo, rs_lo + * bne @target + */ +static u8 gen_jset_64(u8 *buf, u8 rd, u8 rs, u32 curr_off, u32 targ_off) +{ + u8 len = 0; + s32 disp; + + len += arc_tst_r(BUF(buf, len), REG_HI(rd), REG_HI(rs)); + len += arc_tstz_r(BUF(buf, len), REG_LO(rd), REG_LO(rs)); + disp = get_displacement(curr_off + len, targ_off); + len += arc_bcc(BUF(buf, len), CC_unequal, disp); + + return len; +} + +/* + * Verify if all the jumps for a JITed jcc64 operation are valid, + * by consulting the data stored at "arcv2_64_jccs". + */ +static bool check_jcc_64(u32 curr_off, u32 targ_off, u8 cond) +{ + size_t i; + + if (cond >= ARC_CC_LAST) + return false; + + for (i = 0; i < JCC64_NR_OF_JMPS; i++) { + u32 from, to; + + from = curr_off + arcv2_64_jccs.jit_off[i]; + /* for the 2nd jump, we jump to the end of block. */ + if (i != JCC64_SKIP_JMP) + to = targ_off; + else + to = from + (JCC64_INSNS_TO_END * INSN_len_normal); + /* There is a "cc" in the instruction, so a "near" jump. */ + if (!is_valid_near_disp(get_displacement(from, to))) + return false; + } + + return true; +} + +/* Can the jump from "curr_off" to "targ_off" actually happen? */ +bool check_jmp_64(u32 curr_off, u32 targ_off, u8 cond) +{ + s32 disp; + + switch (cond) { + case ARC_CC_UGT: + case ARC_CC_UGE: + case ARC_CC_ULT: + case ARC_CC_ULE: + case ARC_CC_SGT: + case ARC_CC_SGE: + case ARC_CC_SLT: + case ARC_CC_SLE: + return check_jcc_64(curr_off, targ_off, cond); + case ARC_CC_EQ: + case ARC_CC_NE: + case ARC_CC_SET: + /* + * The "jump" for the JITed BPF_J{SET,EQ,NE} is actually the + * 3rd instruction. See comments of "gen_j{set,_eq}_64()". + */ + curr_off += 2 * INSN_len_normal; + disp = get_displacement(curr_off, targ_off); + /* There is a "cc" field in the issued instruction. */ + return is_valid_near_disp(disp); + case ARC_CC_AL: + disp = get_displacement(curr_off, targ_off); + return is_valid_far_disp(disp); + default: + return false; + } +} + +/* + * The template for the 64-bit jumps with the following BPF conditions + * + * u< u<= u> u>= s< s<= s> s>= + * + * Looks like below: + * + * cmp rd_hi, rs_hi + * b<c1> @target + * b<c2> @end + * cmp rd_lo, rs_lo # if execution reaches here, r{d,s}_hi are equal + * b<c3> @target + * end: + * + * "c1" is the condition that JIT is handling minus the equality part. + * For instance if we have to translate an "unsigned greater or equal", + * then "c1" will be "unsigned greater". We won't know about equality + * until all 64-bits of data (higeher and lower registers) are processed. + * + * "c2" is the counter logic of "c1". For instance, if "c1" is originated + * from "s>", then "c2" would be "s<". Notice that equality doesn't play + * a role here either, because the lower 32 bits are not processed yet. + * + * "c3" is the unsigned version of "c1", no matter if the BPF condition + * was signed or unsigned. An unsigned version is necessary, because the + * MSB of the lower 32 bits does not reflect a sign in the whole 64-bit + * scheme. Otherwise, 64-bit comparisons like + * (0x0000_0000,0x8000_0000) s>= (0x0000_0000,0x0000_0000) + * would yield an incorrect result. Finally, if there is an equality + * check in the BPF condition, it will be reflected in "c3". + * + * You can find all the instances of this template where the + * "arcv2_64_jccs" is getting initialised. + */ +static u8 gen_jcc_64(u8 *buf, u8 rd, u8 rs, u8 cond, + u32 curr_off, u32 targ_off) +{ + s32 disp; + u32 end_off; + const u8 *cc = arcv2_64_jccs.jmp[cond].cond; + u8 len = 0; + + /* cmp rd_hi, rs_hi */ + len += arc_cmp_r(buf, REG_HI(rd), REG_HI(rs)); + + /* b<c1> @target */ + disp = get_displacement(curr_off + len, targ_off); + len += arc_bcc(BUF(buf, len), cc[0], disp); + + /* b<c2> @end */ + end_off = curr_off + len + (JCC64_INSNS_TO_END * INSN_len_normal); + disp = get_displacement(curr_off + len, end_off); + len += arc_bcc(BUF(buf, len), cc[1], disp); + + /* cmp rd_lo, rs_lo */ + len += arc_cmp_r(BUF(buf, len), REG_LO(rd), REG_LO(rs)); + + /* b<c3> @target */ + disp = get_displacement(curr_off + len, targ_off); + len += arc_bcc(BUF(buf, len), cc[2], disp); + + return len; +} + +/* + * This function only applies the necessary logic to make the proper + * translations. All the sanity checks must have already been done + * by calling the check_jmp_64(). + */ +u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off) +{ + u8 len = 0; + bool eq = false; + s32 disp; + + switch (cond) { + case ARC_CC_AL: + disp = get_displacement(curr_off, targ_off); + len = arc_b(buf, disp); + break; + case ARC_CC_UGT: + case ARC_CC_UGE: + case ARC_CC_ULT: + case ARC_CC_ULE: + case ARC_CC_SGT: + case ARC_CC_SGE: + case ARC_CC_SLT: + case ARC_CC_SLE: + len = gen_jcc_64(buf, rd, rs, cond, curr_off, targ_off); + break; + case ARC_CC_EQ: + eq = true; + fallthrough; + case ARC_CC_NE: + len = gen_j_eq_64(buf, rd, rs, eq, curr_off, targ_off); + break; + case ARC_CC_SET: + len = gen_jset_64(buf, rd, rs, curr_off, targ_off); + break; + default: +#ifdef ARC_BPF_JIT_DEBUG + pr_err("64-bit jump condition is not known."); + BUG(); +#endif + } + return len; +} + +/* + * The condition codes to use when generating JIT instructions + * for 32-bit jumps. + * + * The "ARC_CC_AL" index is not really used by the code, but it + * is here for the sake of completeness. + * + * The "ARC_CC_SET" becomes "CC_unequal" because of the "tst" + * instruction that precedes the conditional branch. + */ +const u8 arcv2_32_jmps[ARC_CC_LAST] = { + [ARC_CC_UGT] = CC_great_u, + [ARC_CC_UGE] = CC_great_eq_u, + [ARC_CC_ULT] = CC_less_u, + [ARC_CC_ULE] = CC_less_eq_u, + [ARC_CC_SGT] = CC_great_s, + [ARC_CC_SGE] = CC_great_eq_s, + [ARC_CC_SLT] = CC_less_s, + [ARC_CC_SLE] = CC_less_eq_s, + [ARC_CC_AL] = CC_always, + [ARC_CC_EQ] = CC_equal, + [ARC_CC_NE] = CC_unequal, + [ARC_CC_SET] = CC_unequal +}; + +/* Can the jump from "curr_off" to "targ_off" actually happen? */ +bool check_jmp_32(u32 curr_off, u32 targ_off, u8 cond) +{ + u8 addendum; + s32 disp; + + if (cond >= ARC_CC_LAST) + return false; + + /* + * The unconditional jump happens immediately, while the rest + * are either preceded by a "cmp" or "tst" instruction. + */ + addendum = (cond == ARC_CC_AL) ? 0 : INSN_len_normal; + disp = get_displacement(curr_off + addendum, targ_off); + + if (ARC_CC_AL) + return is_valid_far_disp(disp); + else + return is_valid_near_disp(disp); +} + +/* + * The JITed code for 32-bit (conditional) branches: + * + * ARC_CC_AL @target + * b @jit_targ_addr + * + * ARC_CC_SET rd, rs, @target + * tst rd, rs + * bnz @jit_targ_addr + * + * ARC_CC_xx rd, rs, @target + * cmp rd, rs + * b<cc> @jit_targ_addr # cc = arcv2_32_jmps[xx] + */ +u8 gen_jmp_32(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off) +{ + s32 disp; + u8 len = 0; + + /* + * Although this must have already been checked by "check_jmp_32()", + * we're not going to risk accessing "arcv2_32_jmps" array without + * the boundary check. + */ + if (cond >= ARC_CC_LAST) { +#ifdef ARC_BPF_JIT_DEBUG + pr_err("32-bit jump condition is not known."); + BUG(); +#endif + return 0; + } + + /* If there is a "condition", issue the "cmp" or "tst" first. */ + if (cond != ARC_CC_AL) { + if (cond == ARC_CC_SET) + len = tst_r32(buf, rd, rs); + else + len = cmp_r32(buf, rd, rs); + /* + * The issued instruction affects the "disp"lacement as + * it alters the "curr_off" by its "len"gth. The "curr_off" + * should always point to the jump instruction. + */ + disp = get_displacement(curr_off + len, targ_off); + len += arc_bcc(BUF(buf, len), arcv2_32_jmps[cond], disp); + } else { + /* The straight forward unconditional jump. */ + disp = get_displacement(curr_off, targ_off); + len = arc_b(buf, disp); + } + + return len; +} + +/* + * Generate code for functions calls. There can be two types of calls: + * + * - Calling another BPF function + * - Calling an in-kernel function which is compiled by ARC gcc + * + * In the later case, we must comply to ARCv2 ABI and handle arguments + * and return values accordingly. + */ +u8 gen_func_call(u8 *buf, ARC_ADDR func_addr, bool external_func) +{ + u8 len = 0; + + /* + * In case of an in-kernel function call, always push the 5th + * argument onto the stack, because that's where the ABI dictates + * it should be found. If the callee doesn't really use it, no harm + * is done. The stack is readjusted either way after the call. + */ + if (external_func) + len += push_r64(BUF(buf, len), BPF_REG_5); + + len += jump_and_link(BUF(buf, len), func_addr); + + if (external_func) + len += arc_add_i(BUF(buf, len), ARC_R_SP, ARC_R_SP, ARG5_SIZE); + + return len; +} diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c new file mode 100644 index 000000000000..6f6b4ffccf2c --- /dev/null +++ b/arch/arc/net/bpf_jit_core.c @@ -0,0 +1,1425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The back-end-agnostic part of Just-In-Time compiler for eBPF bytecode. + * + * Copyright (c) 2024 Synopsys Inc. + * Author: Shahab Vahedi <shahab@synopsys.com> + */ +#include <linux/bug.h> +#include "bpf_jit.h" + +/* + * Check for the return value. A pattern used often in this file. + * There must be a "ret" variable of type "int" in the scope. + */ +#define CHECK_RET(cmd) \ + do { \ + ret = (cmd); \ + if (ret < 0) \ + return ret; \ + } while (0) + +#ifdef ARC_BPF_JIT_DEBUG +/* Dumps bytes in /var/log/messages at KERN_INFO level (4). */ +static void dump_bytes(const u8 *buf, u32 len, const char *header) +{ + u8 line[64]; + size_t i, j; + + pr_info("-----------------[ %s ]-----------------\n", header); + + for (i = 0, j = 0; i < len; i++) { + /* Last input byte? */ + if (i == len - 1) { + j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]); + pr_info("%s\n", line); + break; + } + /* End of line? */ + else if (i % 8 == 7) { + j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]); + pr_info("%s\n", line); + j = 0; + } else { + j += scnprintf(line + j, 64 - j, "0x%02x, ", buf[i]); + } + } +} +#endif /* ARC_BPF_JIT_DEBUG */ + +/********************* JIT context ***********************/ + +/* + * buf: Translated instructions end up here. + * len: The length of whole block in bytes. + * index: The offset at which the _next_ instruction may be put. + */ +struct jit_buffer { + u8 *buf; + u32 len; + u32 index; +}; + +/* + * This is a subset of "struct jit_context" that its information is deemed + * necessary for the next extra pass to come. + * + * bpf_header: Needed to finally lock the region. + * bpf2insn: Used to find the translation for instructions of interest. + * + * Things like "jit.buf" and "jit.len" can be retrieved respectively from + * "prog->bpf_func" and "prog->jited_len". + */ +struct arc_jit_data { + struct bpf_binary_header *bpf_header; + u32 *bpf2insn; +}; + +/* + * The JIT pertinent context that is used by different functions. + * + * prog: The current eBPF program being handled. + * orig_prog: The original eBPF program before any possible change. + * jit: The JIT buffer and its length. + * bpf_header: The JITed program header. "jit.buf" points inside it. + * emit: If set, opcodes are written to memory; else, a dry-run. + * do_zext: If true, 32-bit sub-regs must be zero extended. + * bpf2insn: Maps BPF insn indices to their counterparts in jit.buf. + * bpf2insn_valid: Indicates if "bpf2ins" is populated with the mappings. + * jit_data: A piece of memory to transfer data to the next pass. + * arc_regs_clobbered: Each bit status determines if that arc reg is clobbered. + * save_blink: Whether ARC's "blink" register needs to be saved. + * frame_size: Derived from "prog->aux->stack_depth". + * epilogue_offset: Used by early "return"s in the code to jump here. + * need_extra_pass: A forecast if an "extra_pass" will occur. + * is_extra_pass: Indicates if the current pass is an extra pass. + * user_bpf_prog: True, if VM opcodes come from a real program. + * blinded: True if "constant blinding" step returned a new "prog". + * success: Indicates if the whole JIT went OK. + */ +struct jit_context { + struct bpf_prog *prog; + struct bpf_prog *orig_prog; + struct jit_buffer jit; + struct bpf_binary_header *bpf_header; + bool emit; + bool do_zext; + u32 *bpf2insn; + bool bpf2insn_valid; + struct arc_jit_data *jit_data; + u32 arc_regs_clobbered; + bool save_blink; + u16 frame_size; + u32 epilogue_offset; + bool need_extra_pass; + bool is_extra_pass; + bool user_bpf_prog; + bool blinded; + bool success; +}; + +/* + * If we're in ARC_BPF_JIT_DEBUG mode and the debug level is right, dump the + * input BPF stream. "bpf_jit_dump()" is not fully suited for this purpose. + */ +static void vm_dump(const struct bpf_prog *prog) +{ +#ifdef ARC_BPF_JIT_DEBUG + if (bpf_jit_enable > 1) + dump_bytes((u8 *)prog->insns, 8 * prog->len, " VM "); +#endif +} + +/* + * If the right level of debug is set, dump the bytes. There are 2 variants + * of this function: + * + * 1. Use the standard bpf_jit_dump() which is meant only for JITed code. + * 2. Use the dump_bytes() to match its "vm_dump()" instance. + */ +static void jit_dump(const struct jit_context *ctx) +{ +#ifdef ARC_BPF_JIT_DEBUG + u8 header[8]; +#endif + const int pass = ctx->is_extra_pass ? 2 : 1; + + if (bpf_jit_enable <= 1 || !ctx->prog->jited) + return; + +#ifdef ARC_BPF_JIT_DEBUG + scnprintf(header, sizeof(header), "JIT:%d", pass); + dump_bytes(ctx->jit.buf, ctx->jit.len, header); + pr_info("\n"); +#else + bpf_jit_dump(ctx->prog->len, ctx->jit.len, pass, ctx->jit.buf); +#endif +} + +/* Initialise the context so there's no garbage. */ +static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog) +{ + memset(ctx, 0, sizeof(ctx)); + + ctx->orig_prog = prog; + + /* If constant blinding was requested but failed, scram. */ + ctx->prog = bpf_jit_blind_constants(prog); + if (IS_ERR(ctx->prog)) + return PTR_ERR(ctx->prog); + ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true); + + /* If the verifier doesn't zero-extend, then we have to do it. */ + ctx->do_zext = !ctx->prog->aux->verifier_zext; + + ctx->is_extra_pass = ctx->prog->jited; + ctx->user_bpf_prog = ctx->prog->is_func; + + return 0; +} + +/* + * Only after the first iteration of normal pass (the dry-run), + * there are valid offsets in ctx->bpf2insn array. + */ +static inline bool offsets_available(const struct jit_context *ctx) +{ + return ctx->bpf2insn_valid; +} + +/* + * "*mem" should be freed when there is no "extra pass" to come, + * or the compilation terminated abruptly. A few of such memory + * allocations are: ctx->jit_data and ctx->bpf2insn. + */ +static inline void maybe_free(struct jit_context *ctx, void **mem) +{ + if (*mem) { + if (!ctx->success || !ctx->need_extra_pass) { + kfree(*mem); + *mem = NULL; + } + } +} + +/* + * Free memories based on the status of the context. + * + * A note about "bpf_header": On successful runs, "bpf_header" is + * not freed, because "jit.buf", a sub-array of it, is returned as + * the "bpf_func". However, "bpf_header" is lost and nothing points + * to it. This should not cause a leakage, because apparently + * "bpf_header" can be revived by "bpf_jit_binary_hdr()". This is + * how "bpf_jit_free()" in "kernel/bpf/core.c" releases the memory. + */ +static void jit_ctx_cleanup(struct jit_context *ctx) +{ + if (ctx->blinded) { + /* if all went well, release the orig_prog. */ + if (ctx->success) + bpf_jit_prog_release_other(ctx->prog, ctx->orig_prog); + else + bpf_jit_prog_release_other(ctx->orig_prog, ctx->prog); + } + + maybe_free(ctx, (void **)&ctx->bpf2insn); + maybe_free(ctx, (void **)&ctx->jit_data); + + if (!ctx->bpf2insn) + ctx->bpf2insn_valid = false; + + /* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */ + if (!ctx->success && ctx->bpf_header) { + bpf_jit_binary_free(ctx->bpf_header); + ctx->bpf_header = NULL; + ctx->jit.buf = NULL; + ctx->jit.index = 0; + ctx->jit.len = 0; + } + + ctx->emit = false; + ctx->do_zext = false; +} + +/* + * Analyse the register usage and record the frame size. + * The register usage is determined by consulting the back-end. + */ +static void analyze_reg_usage(struct jit_context *ctx) +{ + size_t i; + u32 usage = 0; + const struct bpf_insn *insn = ctx->prog->insnsi; + + for (i = 0; i < ctx->prog->len; i++) { + u8 bpf_reg; + bool call; + + bpf_reg = insn[i].dst_reg; + call = (insn[i].code == (BPF_JMP | BPF_CALL)) ? true : false; + usage |= mask_for_used_regs(bpf_reg, call); + } + + ctx->arc_regs_clobbered = usage; + ctx->frame_size = ctx->prog->aux->stack_depth; +} + +/* Verify that no instruction will be emitted when there is no buffer. */ +static inline int jit_buffer_check(const struct jit_context *ctx) +{ + if (ctx->emit) { + if (!ctx->jit.buf) { + pr_err("bpf-jit: inconsistence state; no " + "buffer to emit instructions.\n"); + return -EINVAL; + } else if (ctx->jit.index > ctx->jit.len) { + pr_err("bpf-jit: estimated JIT length is less " + "than the emitted instructions.\n"); + return -EFAULT; + } + } + return 0; +} + +/* On a dry-run (emit=false), "jit.len" is growing gradually. */ +static inline void jit_buffer_update(struct jit_context *ctx, u32 n) +{ + if (!ctx->emit) + ctx->jit.len += n; + else + ctx->jit.index += n; +} + +/* Based on "emit", determine the address where instructions are emitted. */ +static inline u8 *effective_jit_buf(const struct jit_context *ctx) +{ + return ctx->emit ? (ctx->jit.buf + ctx->jit.index) : NULL; +} + +/* Prologue based on context variables set by "analyze_reg_usage()". */ +static int handle_prologue(struct jit_context *ctx) +{ + int ret; + u8 *buf = effective_jit_buf(ctx); + u32 len = 0; + + CHECK_RET(jit_buffer_check(ctx)); + + len = arc_prologue(buf, ctx->arc_regs_clobbered, ctx->frame_size); + jit_buffer_update(ctx, len); + + return 0; +} + +/* The counter part for "handle_prologue()". */ +static int handle_epilogue(struct jit_context *ctx) +{ + int ret; + u8 *buf = effective_jit_buf(ctx); + u32 len = 0; + + CHECK_RET(jit_buffer_check(ctx)); + + len = arc_epilogue(buf, ctx->arc_regs_clobbered, ctx->frame_size); + jit_buffer_update(ctx, len); + + return 0; +} + +/* Tell which number of the BPF instruction we are dealing with. */ +static inline s32 get_index_for_insn(const struct jit_context *ctx, + const struct bpf_insn *insn) +{ + return (insn - ctx->prog->insnsi); +} + +/* + * In most of the cases, the "offset" is read from "insn->off". However, + * if it is an unconditional BPF_JMP32, then it comes from "insn->imm". + * + * (Courtesy of "cpu=v4" support) + */ +static inline s32 get_offset(const struct bpf_insn *insn) +{ + if ((BPF_CLASS(insn->code) == BPF_JMP32) && + (BPF_OP(insn->code) == BPF_JA)) + return insn->imm; + else + return insn->off; +} + +/* + * Determine to which number of the BPF instruction we're jumping to. + * + * The "offset" is interpreted as the "number" of BPF instructions + * from the _next_ BPF instruction. e.g.: + * + * 4 means 4 instructions after the next insn + * 0 means 0 instructions after the next insn -> fallthrough. + * -1 means 1 instruction before the next insn -> jmp to current insn. + * + * Another way to look at this, "offset" is the number of instructions + * that exist between the current instruction and the target instruction. + * + * It is worth noting that a "mov r,i64", which is 16-byte long, is + * treated as two instructions long, therefore "offset" needn't be + * treated specially for those. Everything is uniform. + */ +static inline s32 get_target_index_for_insn(const struct jit_context *ctx, + const struct bpf_insn *insn) +{ + return (get_index_for_insn(ctx, insn) + 1) + get_offset(insn); +} + +/* Is there an immediate operand encoded in the "insn"? */ +static inline bool has_imm(const struct bpf_insn *insn) +{ + return BPF_SRC(insn->code) == BPF_K; +} + +/* Is the last BPF instruction? */ +static inline bool is_last_insn(const struct bpf_prog *prog, u32 idx) +{ + return idx == (prog->len - 1); +} + +/* + * Invocation of this function, conditionally signals the need for + * an extra pass. The conditions that must be met are: + * + * 1. The current pass itself shouldn't be an extra pass. + * 2. The stream of bytes being JITed must come from a user program. + */ +static inline void set_need_for_extra_pass(struct jit_context *ctx) +{ + if (!ctx->is_extra_pass) + ctx->need_extra_pass = ctx->user_bpf_prog; +} + +/* + * Check if the "size" is valid and then transfer the control to + * the back-end for the swap. + */ +static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian, + bool force, bool do_zext, u8 *len) +{ + /* Sanity check on the size. */ + switch (size) { + case 16: + case 32: + case 64: + break; + default: + pr_err("bpf-jit: invalid size for swap.\n"); + return -EINVAL; + } + + *len = gen_swap(buf, rd, size, endian, force, do_zext); + + return 0; +} + +/* Checks if the (instruction) index is in valid range. */ +static inline bool check_insn_idx_valid(const struct jit_context *ctx, + const s32 idx) +{ + return (idx >= 0 && idx < ctx->prog->len); +} + +/* + * Decouple the back-end from BPF by converting BPF conditions + * to internal enum. ARC_CC_* start from 0 and are used as index + * to an array. BPF_J* usage must end after this conversion. + */ +static int bpf_cond_to_arc(const u8 op, u8 *arc_cc) +{ + switch (op) { + case BPF_JA: + *arc_cc = ARC_CC_AL; + break; + case BPF_JEQ: + *arc_cc = ARC_CC_EQ; + break; + case BPF_JGT: + *arc_cc = ARC_CC_UGT; + break; + case BPF_JGE: + *arc_cc = ARC_CC_UGE; + break; + case BPF_JSET: + *arc_cc = ARC_CC_SET; + break; + case BPF_JNE: + *arc_cc = ARC_CC_NE; + break; + case BPF_JSGT: + *arc_cc = ARC_CC_SGT; + break; + case BPF_JSGE: + *arc_cc = ARC_CC_SGE; + break; + case BPF_JLT: + *arc_cc = ARC_CC_ULT; + break; + case BPF_JLE: + *arc_cc = ARC_CC_ULE; + break; + case BPF_JSLT: + *arc_cc = ARC_CC_SLT; + break; + case BPF_JSLE: + *arc_cc = ARC_CC_SLE; + break; + default: + pr_err("bpf-jit: can't handle condition 0x%02X\n", op); + return -EINVAL; + } + return 0; +} + +/* + * Check a few things for a supposedly "jump" instruction: + * + * 0. "insn" is a "jump" instruction, but not the "call/exit" variant. + * 1. The current "insn" index is in valid range. + * 2. The index of target instruction is in valid range. + */ +static int check_bpf_jump(const struct jit_context *ctx, + const struct bpf_insn *insn) +{ + const u8 class = BPF_CLASS(insn->code); + const u8 op = BPF_OP(insn->code); + + /* Must be a jmp(32) instruction that is not a "call/exit". */ + if ((class != BPF_JMP && class != BPF_JMP32) || + (op == BPF_CALL || op == BPF_EXIT)) { + pr_err("bpf-jit: not a jump instruction.\n"); + return -EINVAL; + } + + if (!check_insn_idx_valid(ctx, get_index_for_insn(ctx, insn))) { + pr_err("bpf-jit: the bpf jump insn is not in prog.\n"); + return -EINVAL; + } + + if (!check_insn_idx_valid(ctx, get_target_index_for_insn(ctx, insn))) { + pr_err("bpf-jit: bpf jump label is out of range.\n"); + return -EINVAL; + } + + return 0; +} + +/* + * Based on input "insn", consult "ctx->bpf2insn" to get the + * related index (offset) of the translation in JIT stream. + */ +static u32 get_curr_jit_off(const struct jit_context *ctx, + const struct bpf_insn *insn) +{ + const s32 idx = get_index_for_insn(ctx, insn); +#ifdef ARC_BPF_JIT_DEBUG + BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, idx)); +#endif + return ctx->bpf2insn[idx]; +} + +/* + * The input "insn" must be a jump instruction. + * + * Based on input "insn", consult "ctx->bpf2insn" to get the + * related JIT index (offset) of "target instruction" that + * "insn" would jump to. + */ +static u32 get_targ_jit_off(const struct jit_context *ctx, + const struct bpf_insn *insn) +{ + const s32 tidx = get_target_index_for_insn(ctx, insn); +#ifdef ARC_BPF_JIT_DEBUG + BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, tidx)); +#endif + return ctx->bpf2insn[tidx]; +} + +/* + * This function will return 0 for a feasible jump. + * + * Consult the back-end to check if it finds it feasible to emit + * the necessary instructions based on "cond" and the displacement + * between the "from_off" and the "to_off". + */ +static int feasible_jit_jump(u32 from_off, u32 to_off, u8 cond, bool j32) +{ + int ret = 0; + + if (j32) { + if (!check_jmp_32(from_off, to_off, cond)) + ret = -EFAULT; + } else { + if (!check_jmp_64(from_off, to_off, cond)) + ret = -EFAULT; + } + + if (ret != 0) + pr_err("bpf-jit: the JIT displacement is not OK.\n"); + + return ret; +} + +/* + * This jump handler performs the following steps: + * + * 1. Compute ARC's internal condition code from BPF's + * 2. Determine the bitness of the operation (32 vs. 64) + * 3. Sanity check on BPF stream + * 4. Sanity check on what is supposed to be JIT's displacement + * 5. And finally, emit the necessary instructions + * + * The last two steps are performed through the back-end. + * The value of steps 1 and 2 are necessary inputs for the back-end. + */ +static int handle_jumps(const struct jit_context *ctx, + const struct bpf_insn *insn, + u8 *len) +{ + u8 cond; + int ret = 0; + u8 *buf = effective_jit_buf(ctx); + const bool j32 = (BPF_CLASS(insn->code) == BPF_JMP32) ? true : false; + const u8 rd = insn->dst_reg; + u8 rs = insn->src_reg; + u32 curr_off = 0, targ_off = 0; + + *len = 0; + + /* Map the BPF condition to internal enum. */ + CHECK_RET(bpf_cond_to_arc(BPF_OP(insn->code), &cond)); + + /* Sanity check on the BPF byte stream. */ + CHECK_RET(check_bpf_jump(ctx, insn)); + + /* + * Move the immediate into a temporary register _now_ for 2 reasons: + * + * 1. "gen_jmp_{32,64}()" deal with operands in registers. + * + * 2. The "len" parameter will grow so that the current jit offset + * (curr_off) will have increased to a point where the necessary + * instructions can be inserted by "gen_jmp_{32,64}()". + */ + if (has_imm(insn) && cond != ARC_CC_AL) { + if (j32) { + *len += mov_r32_i32(BUF(buf, *len), JIT_REG_TMP, + insn->imm); + } else { + *len += mov_r64_i32(BUF(buf, *len), JIT_REG_TMP, + insn->imm); + } + rs = JIT_REG_TMP; + } + + /* If the offsets are known, check if the branch can occur. */ + if (offsets_available(ctx)) { + curr_off = get_curr_jit_off(ctx, insn) + *len; + targ_off = get_targ_jit_off(ctx, insn); + + /* Sanity check on the back-end side. */ + CHECK_RET(feasible_jit_jump(curr_off, targ_off, cond, j32)); + } + + if (j32) { + *len += gen_jmp_32(BUF(buf, *len), rd, rs, cond, + curr_off, targ_off); + } else { + *len += gen_jmp_64(BUF(buf, *len), rd, rs, cond, + curr_off, targ_off); + } + + return ret; +} + +/* Jump to translated epilogue address. */ +static int handle_jmp_epilogue(struct jit_context *ctx, + const struct bpf_insn *insn, u8 *len) +{ + u8 *buf = effective_jit_buf(ctx); + u32 curr_off = 0, epi_off = 0; + + /* Check the offset only if the data is available. */ + if (offsets_available(ctx)) { + curr_off = get_curr_jit_off(ctx, insn); + epi_off = ctx->epilogue_offset; + + if (!check_jmp_64(curr_off, epi_off, ARC_CC_AL)) { + pr_err("bpf-jit: epilogue offset is not valid.\n"); + return -EINVAL; + } + } + + /* Jump to "epilogue offset" (rd and rs don't matter). */ + *len = gen_jmp_64(buf, 0, 0, ARC_CC_AL, curr_off, epi_off); + + return 0; +} + +/* Try to get the resolved address and generate the instructions. */ +static int handle_call(struct jit_context *ctx, + const struct bpf_insn *insn, + u8 *len) +{ + int ret; + bool in_kernel_func, fixed = false; + u64 addr = 0; + u8 *buf = effective_jit_buf(ctx); + + ret = bpf_jit_get_func_addr(ctx->prog, insn, ctx->is_extra_pass, + &addr, &fixed); + if (ret < 0) { + pr_err("bpf-jit: can't get the address for call.\n"); + return ret; + } + in_kernel_func = (fixed ? true : false); + + /* No valuable address retrieved (yet). */ + if (!fixed && !addr) + set_need_for_extra_pass(ctx); + + *len = gen_func_call(buf, (ARC_ADDR)addr, in_kernel_func); + + if (insn->src_reg != BPF_PSEUDO_CALL) { + /* Assigning ABI's return reg to JIT's return reg. */ + *len += arc_to_bpf_return(BUF(buf, *len)); + } + + return 0; +} + +/* + * Try to generate instructions for loading a 64-bit immediate. + * These sort of instructions are usually associated with the 64-bit + * relocations: R_BPF_64_64. Therefore, signal the need for an extra + * pass if the circumstances are right. + */ +static int handle_ld_imm64(struct jit_context *ctx, + const struct bpf_insn *insn, + u8 *len) +{ + const s32 idx = get_index_for_insn(ctx, insn); + u8 *buf = effective_jit_buf(ctx); + + /* We're about to consume 2 VM instructions. */ + if (is_last_insn(ctx->prog, idx)) { + pr_err("bpf-jit: need more data for 64-bit immediate.\n"); + return -EINVAL; + } + + *len = mov_r64_i64(buf, insn->dst_reg, insn->imm, (insn + 1)->imm); + + if (bpf_pseudo_func(insn)) + set_need_for_extra_pass(ctx); + + return 0; +} + +/* + * Handles one eBPF instruction at a time. To make this function faster, + * it does not call "jit_buffer_check()". Else, it would call it for every + * instruction. As a result, it should not be invoked directly. Only + * "handle_body()", that has already executed the "check", may call this + * function. + * + * If the "ret" value is negative, something has went wrong. Else, + * it mostly holds the value 0 and rarely 1. Number 1 signals + * the loop in "handle_body()" to skip the next instruction, because + * it has been consumed as part of a 64-bit immediate value. + */ +static int handle_insn(struct jit_context *ctx, u32 idx) +{ + const struct bpf_insn *insn = &ctx->prog->insnsi[idx]; + const u8 code = insn->code; + const u8 dst = insn->dst_reg; + const u8 src = insn->src_reg; + const s16 off = insn->off; + const s32 imm = insn->imm; + u8 *buf = effective_jit_buf(ctx); + u8 len = 0; + int ret = 0; + + switch (code) { + /* dst += src (32-bit) */ + case BPF_ALU | BPF_ADD | BPF_X: + len = add_r32(buf, dst, src); + break; + /* dst += imm (32-bit) */ + case BPF_ALU | BPF_ADD | BPF_K: + len = add_r32_i32(buf, dst, imm); + break; + /* dst -= src (32-bit) */ + case BPF_ALU | BPF_SUB | BPF_X: + len = sub_r32(buf, dst, src); + break; + /* dst -= imm (32-bit) */ + case BPF_ALU | BPF_SUB | BPF_K: + len = sub_r32_i32(buf, dst, imm); + break; + /* dst = -dst (32-bit) */ + case BPF_ALU | BPF_NEG: + len = neg_r32(buf, dst); + break; + /* dst *= src (32-bit) */ + case BPF_ALU | BPF_MUL | BPF_X: + len = mul_r32(buf, dst, src); + break; + /* dst *= imm (32-bit) */ + case BPF_ALU | BPF_MUL | BPF_K: + len = mul_r32_i32(buf, dst, imm); + break; + /* dst /= src (32-bit) */ + case BPF_ALU | BPF_DIV | BPF_X: + len = div_r32(buf, dst, src, off == 1); + break; + /* dst /= imm (32-bit) */ + case BPF_ALU | BPF_DIV | BPF_K: + len = div_r32_i32(buf, dst, imm, off == 1); + break; + /* dst %= src (32-bit) */ + case BPF_ALU | BPF_MOD | BPF_X: + len = mod_r32(buf, dst, src, off == 1); + break; + /* dst %= imm (32-bit) */ + case BPF_ALU | BPF_MOD | BPF_K: + len = mod_r32_i32(buf, dst, imm, off == 1); + break; + /* dst &= src (32-bit) */ + case BPF_ALU | BPF_AND | BPF_X: + len = and_r32(buf, dst, src); + break; + /* dst &= imm (32-bit) */ + case BPF_ALU | BPF_AND | BPF_K: + len = and_r32_i32(buf, dst, imm); + break; + /* dst |= src (32-bit) */ + case BPF_ALU | BPF_OR | BPF_X: + len = or_r32(buf, dst, src); + break; + /* dst |= imm (32-bit) */ + case BPF_ALU | BPF_OR | BPF_K: + len = or_r32_i32(buf, dst, imm); + break; + /* dst ^= src (32-bit) */ + case BPF_ALU | BPF_XOR | BPF_X: + len = xor_r32(buf, dst, src); + break; + /* dst ^= imm (32-bit) */ + case BPF_ALU | BPF_XOR | BPF_K: + len = xor_r32_i32(buf, dst, imm); + break; + /* dst <<= src (32-bit) */ + case BPF_ALU | BPF_LSH | BPF_X: + len = lsh_r32(buf, dst, src); + break; + /* dst <<= imm (32-bit) */ + case BPF_ALU | BPF_LSH | BPF_K: + len = lsh_r32_i32(buf, dst, imm); + break; + /* dst >>= src (32-bit) [unsigned] */ + case BPF_ALU | BPF_RSH | BPF_X: + len = rsh_r32(buf, dst, src); + break; + /* dst >>= imm (32-bit) [unsigned] */ + case BPF_ALU | BPF_RSH | BPF_K: + len = rsh_r32_i32(buf, dst, imm); + break; + /* dst >>= src (32-bit) [signed] */ + case BPF_ALU | BPF_ARSH | BPF_X: + len = arsh_r32(buf, dst, src); + break; + /* dst >>= imm (32-bit) [signed] */ + case BPF_ALU | BPF_ARSH | BPF_K: + len = arsh_r32_i32(buf, dst, imm); + break; + /* dst = src (32-bit) */ + case BPF_ALU | BPF_MOV | BPF_X: + len = mov_r32(buf, dst, src, (u8)off); + break; + /* dst = imm32 (32-bit) */ + case BPF_ALU | BPF_MOV | BPF_K: + len = mov_r32_i32(buf, dst, imm); + break; + /* dst = swap(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: { + CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code), + BPF_CLASS(code) == BPF_ALU64, + ctx->do_zext, &len)); + break; + } + /* dst += src (64-bit) */ + case BPF_ALU64 | BPF_ADD | BPF_X: + len = add_r64(buf, dst, src); + break; + /* dst += imm32 (64-bit) */ + case BPF_ALU64 | BPF_ADD | BPF_K: + len = add_r64_i32(buf, dst, imm); + break; + /* dst -= src (64-bit) */ + case BPF_ALU64 | BPF_SUB | BPF_X: + len = sub_r64(buf, dst, src); + break; + /* dst -= imm32 (64-bit) */ + case BPF_ALU64 | BPF_SUB | BPF_K: + len = sub_r64_i32(buf, dst, imm); + break; + /* dst = -dst (64-bit) */ + case BPF_ALU64 | BPF_NEG: + len = neg_r64(buf, dst); + break; + /* dst *= src (64-bit) */ + case BPF_ALU64 | BPF_MUL | BPF_X: + len = mul_r64(buf, dst, src); + break; + /* dst *= imm32 (64-bit) */ + case BPF_ALU64 | BPF_MUL | BPF_K: + len = mul_r64_i32(buf, dst, imm); + break; + /* dst &= src (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_X: + len = and_r64(buf, dst, src); + break; + /* dst &= imm32 (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_K: + len = and_r64_i32(buf, dst, imm); + break; + /* dst |= src (64-bit) */ + case BPF_ALU64 | BPF_OR | BPF_X: + len = or_r64(buf, dst, src); + break; + /* dst |= imm32 (64-bit) */ + case BPF_ALU64 | BPF_OR | BPF_K: + len = or_r64_i32(buf, dst, imm); + break; + /* dst ^= src (64-bit) */ + case BPF_ALU64 | BPF_XOR | BPF_X: + len = xor_r64(buf, dst, src); + break; + /* dst ^= imm32 (64-bit) */ + case BPF_ALU64 | BPF_XOR | BPF_K: + len = xor_r64_i32(buf, dst, imm); + break; + /* dst <<= src (64-bit) */ + case BPF_ALU64 | BPF_LSH | BPF_X: + len = lsh_r64(buf, dst, src); + break; + /* dst <<= imm32 (64-bit) */ + case BPF_ALU64 | BPF_LSH | BPF_K: + len = lsh_r64_i32(buf, dst, imm); + break; + /* dst >>= src (64-bit) [unsigned] */ + case BPF_ALU64 | BPF_RSH | BPF_X: + len = rsh_r64(buf, dst, src); + break; + /* dst >>= imm32 (64-bit) [unsigned] */ + case BPF_ALU64 | BPF_RSH | BPF_K: + len = rsh_r64_i32(buf, dst, imm); + break; + /* dst >>= src (64-bit) [signed] */ + case BPF_ALU64 | BPF_ARSH | BPF_X: + len = arsh_r64(buf, dst, src); + break; + /* dst >>= imm32 (64-bit) [signed] */ + case BPF_ALU64 | BPF_ARSH | BPF_K: + len = arsh_r64_i32(buf, dst, imm); + break; + /* dst = src (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_X: + len = mov_r64(buf, dst, src, (u8)off); + break; + /* dst = imm32 (sign extend to 64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_K: + len = mov_r64_i32(buf, dst, imm); + break; + /* dst = imm64 */ + case BPF_LD | BPF_DW | BPF_IMM: + CHECK_RET(handle_ld_imm64(ctx, insn, &len)); + /* Tell the loop to skip the next instruction. */ + ret = 1; + break; + /* dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_DW: + len = load_r(buf, dst, src, off, BPF_SIZE(code), false); + break; + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_B: + len = load_r(buf, dst, src, off, BPF_SIZE(code), true); + break; + /* *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_DW: + len = store_r(buf, src, dst, off, BPF_SIZE(code)); + break; + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_DW: + len = store_i(buf, imm, dst, off, BPF_SIZE(code)); + break; + case BPF_JMP | BPF_JA: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JA: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + CHECK_RET(handle_jumps(ctx, insn, &len)); + break; + case BPF_JMP | BPF_CALL: + CHECK_RET(handle_call(ctx, insn, &len)); + break; + + case BPF_JMP | BPF_EXIT: + /* If this is the last instruction, epilogue will follow. */ + if (is_last_insn(ctx->prog, idx)) + break; + CHECK_RET(handle_jmp_epilogue(ctx, insn, &len)); + break; + default: + pr_err("bpf-jit: can't handle instruction code 0x%02X\n", code); + return -EOPNOTSUPP; + } + + if (BPF_CLASS(code) == BPF_ALU) { + /* + * Skip the "swap" instructions. Even 64-bit swaps are of type + * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one + * has to look at the "size" of the operations rather than the + * ALU type. "gen_swap()" specifically takes care of that. + */ + if (BPF_OP(code) != BPF_END && ctx->do_zext) + len += zext(BUF(buf, len), dst); + } + + jit_buffer_update(ctx, len); + + return ret; +} + +static int handle_body(struct jit_context *ctx) +{ + int ret; + bool populate_bpf2insn = false; + const struct bpf_prog *prog = ctx->prog; + + CHECK_RET(jit_buffer_check(ctx)); + + /* + * Record the mapping for the instructions during the dry-run. + * Doing it this way allows us to have the mapping ready for + * the jump instructions during the real compilation phase. + */ + if (!ctx->emit) + populate_bpf2insn = true; + + for (u32 i = 0; i < prog->len; i++) { + /* During the dry-run, jit.len grows gradually per BPF insn. */ + if (populate_bpf2insn) + ctx->bpf2insn[i] = ctx->jit.len; + + CHECK_RET(handle_insn(ctx, i)); + if (ret > 0) { + /* "ret" is 1 if two (64-bit) chunks were consumed. */ + ctx->bpf2insn[i + 1] = ctx->bpf2insn[i]; + i++; + } + } + + /* If bpf2insn had to be populated, then it is done at this point. */ + if (populate_bpf2insn) + ctx->bpf2insn_valid = true; + + return 0; +} + +/* + * Initialize the memory with "unimp_s" which is the mnemonic for + * "unimplemented" instruction and always raises an exception. + * + * The instruction is 2 bytes. If "size" is odd, there is not much + * that can be done about the last byte in "area". Because, the + * CPU always fetches instructions in two bytes. Therefore, the + * byte beyond the last one is going to accompany it during a + * possible fetch. In the most likely case of a little endian + * system, that beyond-byte will become the major opcode and + * we have no control over its initialisation. + */ +static void fill_ill_insn(void *area, unsigned int size) +{ + const u16 unimp_s = 0x79e0; + + if (size & 1) { + *((u8 *)area + (size - 1)) = 0xff; + size -= 1; + } + + memset16(area, unimp_s, size >> 1); +} + +/* Piece of memory that can be allocated at the beginning of jit_prepare(). */ +static int jit_prepare_early_mem_alloc(struct jit_context *ctx) +{ + ctx->bpf2insn = kcalloc(ctx->prog->len, sizeof(ctx->jit.len), + GFP_KERNEL); + + if (!ctx->bpf2insn) { + pr_err("bpf-jit: could not allocate memory for " + "mapping of the instructions.\n"); + return -ENOMEM; + } + + return 0; +} + +/* + * Memory allocations that rely on parameters known at the end of + * jit_prepare(). + */ +static int jit_prepare_final_mem_alloc(struct jit_context *ctx) +{ + const size_t alignment = sizeof(u32); + + ctx->bpf_header = bpf_jit_binary_alloc(ctx->jit.len, &ctx->jit.buf, + alignment, fill_ill_insn); + if (!ctx->bpf_header) { + pr_err("bpf-jit: could not allocate memory for translation.\n"); + return -ENOMEM; + } + + if (ctx->need_extra_pass) { + ctx->jit_data = kzalloc(sizeof(*ctx->jit_data), GFP_KERNEL); + if (!ctx->jit_data) + return -ENOMEM; + } + + return 0; +} + +/* + * The first phase of the translation without actually emitting any + * instruction. It helps in getting a forecast on some aspects, such + * as the length of the whole program or where the epilogue starts. + * + * Whenever the necessary parameters are known, memories are allocated. + */ +static int jit_prepare(struct jit_context *ctx) +{ + int ret; + + /* Dry run. */ + ctx->emit = false; + + CHECK_RET(jit_prepare_early_mem_alloc(ctx)); + + /* Get the length of prologue section after some register analysis. */ + analyze_reg_usage(ctx); + CHECK_RET(handle_prologue(ctx)); + + CHECK_RET(handle_body(ctx)); + + /* Record at which offset epilogue begins. */ + ctx->epilogue_offset = ctx->jit.len; + + /* Process the epilogue section now. */ + CHECK_RET(handle_epilogue(ctx)); + + CHECK_RET(jit_prepare_final_mem_alloc(ctx)); + + return 0; +} + +/* + * All the "handle_*()" functions have been called before by the + * "jit_prepare()". If there was an error, we would know by now. + * Therefore, no extra error checking at this point, other than + * a sanity check at the end that expects the calculated length + * (jit.len) to be equal to the length of generated instructions + * (jit.index). + */ +static int jit_compile(struct jit_context *ctx) +{ + int ret; + + /* Let there be code. */ + ctx->emit = true; + + CHECK_RET(handle_prologue(ctx)); + + CHECK_RET(handle_body(ctx)); + + CHECK_RET(handle_epilogue(ctx)); + + if (ctx->jit.index != ctx->jit.len) { + pr_err("bpf-jit: divergence between the phases; " + "%u vs. %u (bytes).\n", + ctx->jit.len, ctx->jit.index); + return -EFAULT; + } + + return 0; +} + +/* + * Calling this function implies a successful JIT. A successful + * translation is signaled by setting the right parameters: + * + * prog->jited=1, prog->jited_len=..., prog->bpf_func=... + */ +static int jit_finalize(struct jit_context *ctx) +{ + struct bpf_prog *prog = ctx->prog; + + /* We're going to need this information for the "do_extra_pass()". */ + if (ctx->need_extra_pass) { + ctx->jit_data->bpf_header = ctx->bpf_header; + ctx->jit_data->bpf2insn = ctx->bpf2insn; + prog->aux->jit_data = (void *)ctx->jit_data; + } else { + /* + * If things seem finalised, then mark the JITed memory + * as R-X and flush it. + */ + if (bpf_jit_binary_lock_ro(ctx->bpf_header)) { + pr_err("bpf-jit: Could not lock the JIT memory.\n"); + return -EFAULT; + } + flush_icache_range((unsigned long)ctx->bpf_header, + (unsigned long) + BUF(ctx->jit.buf, ctx->jit.len)); + prog->aux->jit_data = NULL; + bpf_prog_fill_jited_linfo(prog, ctx->bpf2insn); + } + + ctx->success = true; + prog->bpf_func = (void *)ctx->jit.buf; + prog->jited_len = ctx->jit.len; + prog->jited = 1; + + jit_ctx_cleanup(ctx); + jit_dump(ctx); + + return 0; +} + +/* + * A lenient verification for the existence of JIT context in "prog". + * Apparently the JIT internals, namely jit_subprogs() in bpf/verifier.c, + * may request for a second compilation although nothing needs to be done. + */ +static inline int check_jit_context(const struct bpf_prog *prog) +{ + if (!prog->aux->jit_data) { + pr_notice("bpf-jit: no jit data for the extra pass.\n"); + return 1; + } else { + return 0; + } +} + +/* Reuse the previous pass's data. */ +static int jit_resume_context(struct jit_context *ctx) +{ + struct arc_jit_data *jdata = + (struct arc_jit_data *)ctx->prog->aux->jit_data; + + if (!jdata) { + pr_err("bpf-jit: no jit data for the extra pass.\n"); + return -EINVAL; + } + + ctx->jit.buf = (u8 *)ctx->prog->bpf_func; + ctx->jit.len = ctx->prog->jited_len; + ctx->bpf_header = jdata->bpf_header; + ctx->bpf2insn = (u32 *)jdata->bpf2insn; + ctx->bpf2insn_valid = ctx->bpf2insn ? true : false; + ctx->jit_data = jdata; + + return 0; +} + +/* + * Patch in the new addresses. The instructions of interest are: + * + * - call + * - ld r64, imm64 + * + * For "call"s, it resolves the addresses one more time through the + * handle_call(). + * + * For 64-bit immediate loads, it just retranslates them, because the BPF + * core in kernel might have changed the value since the normal pass. + */ +static int jit_patch_relocations(struct jit_context *ctx) +{ + const u8 bpf_opc_call = BPF_JMP | BPF_CALL; + const u8 bpf_opc_ldi64 = BPF_LD | BPF_DW | BPF_IMM; + const struct bpf_prog *prog = ctx->prog; + int ret; + + ctx->emit = true; + for (u32 i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + u8 dummy; + /* + * Adjust "ctx.jit.index", so "gen_*()" functions below + * can use it for their output addresses. + */ + ctx->jit.index = ctx->bpf2insn[i]; + + if (insn->code == bpf_opc_call) { + CHECK_RET(handle_call(ctx, insn, &dummy)); + } else if (insn->code == bpf_opc_ldi64) { + CHECK_RET(handle_ld_imm64(ctx, insn, &dummy)); + /* Skip the next instruction. */ + ++i; + } + } + return 0; +} + +/* + * A normal pass that involves a "dry-run" phase, jit_prepare(), + * to get the necessary data for the real compilation phase, + * jit_compile(). + */ +static struct bpf_prog *do_normal_pass(struct bpf_prog *prog) +{ + struct jit_context ctx; + + /* Bail out if JIT is disabled. */ + if (!prog->jit_requested) + return prog; + + if (jit_ctx_init(&ctx, prog)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + /* Get the lengths and allocate buffer. */ + if (jit_prepare(&ctx)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + if (jit_compile(&ctx)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + if (jit_finalize(&ctx)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + return ctx.prog; +} + +/* + * If there are multi-function BPF programs that call each other, + * their translated addresses are not known all at once. Therefore, + * an extra pass is needed to consult the bpf_jit_get_func_addr() + * again to get the newly translated addresses in order to resolve + * the "call"s. + */ +static struct bpf_prog *do_extra_pass(struct bpf_prog *prog) +{ + struct jit_context ctx; + + /* Skip if there's no context to resume from. */ + if (check_jit_context(prog)) + return prog; + + if (jit_ctx_init(&ctx, prog)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + if (jit_resume_context(&ctx)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + if (jit_patch_relocations(&ctx)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + if (jit_finalize(&ctx)) { + jit_ctx_cleanup(&ctx); + return prog; + } + + return ctx.prog; +} + +/* + * This function may be invoked twice for the same stream of BPF + * instructions. The "extra pass" happens, when there are "call"s + * involved that their addresses are not known during the first + * invocation. + */ +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + vm_dump(prog); + + /* Was this program already translated? */ + if (!prog->jited) + return do_normal_pass(prog); + else + return do_extra_pass(prog); + + return prog; +} diff --git a/arch/arm/boot/dts/st/stm32mp131.dtsi b/arch/arm/boot/dts/st/stm32mp131.dtsi index ecfa120827ba..6704ceef284d 100644 --- a/arch/arm/boot/dts/st/stm32mp131.dtsi +++ b/arch/arm/boot/dts/st/stm32mp131.dtsi @@ -783,10 +783,82 @@ }; exti: interrupt-controller@5000d000 { - compatible = "st,stm32mp13-exti", "syscon"; + compatible = "st,stm32mp1-exti", "syscon"; interrupt-controller; #interrupt-cells = <2>; reg = <0x5000d000 0x400>; + interrupts-extended = + <&intc GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_0 */ + <&intc GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_10 */ + <&intc GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0>, /* EXTI_20 */ + <&intc GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_30 */ + <&intc GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, /* EXTI_40 */ + <0>, + <0>, + <0>, + <&intc GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_50 */ + <0>, + <&intc GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, /* EXTI_60 */ + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <&intc GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>; /* EXTI_70 */ }; syscfg: syscon@50020000 { diff --git a/arch/arm/boot/dts/st/stm32mp151.dtsi b/arch/arm/boot/dts/st/stm32mp151.dtsi index 16bd6eee32b4..90c5c72c87ab 100644 --- a/arch/arm/boot/dts/st/stm32mp151.dtsi +++ b/arch/arm/boot/dts/st/stm32mp151.dtsi @@ -176,6 +176,81 @@ interrupt-controller; #interrupt-cells = <2>; reg = <0x5000d000 0x400>; + interrupts-extended = + <&intc GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_0 */ + <&intc GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_10 */ + <&intc GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0>, /* EXTI_20 */ + <&intc GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_30 */ + <&intc GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, /* EXTI_40 */ + <0>, + <0>, + <0>, + <0>, + <0>, + <&intc GIC_SPI 151 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_50 */ + <0>, + <&intc GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, /* EXTI_60 */ + <&intc GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, + <&intc GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_70 */ + <0>, + <0>, + <&intc GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>; }; syscfg: syscon@50020000 { diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 01b5a5a73f03..42cb1c854118 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set CONFIG_ARCH_MULTI_V4=y # CONFIG_ARCH_MULTI_V7 is not set diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 59c4835ffc97..c1291ca290b2 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -12,7 +12,7 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_BLK_DEV_INITRD=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EXPERT=y CONFIG_PROFILING=y diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig index d169da9b2824..f55c231e0870 100644 --- a/arch/arm/configs/lpc18xx_defconfig +++ b/arch/arm/configs/lpc18xx_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZ4 is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 1d41e73f4903..34d079e03b3c 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -6,7 +6,7 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SIGNALFD is not set # CONFIG_TIMERFD is not set # CONFIG_EVENTFD is not set diff --git a/arch/arm/configs/mps2_defconfig b/arch/arm/configs/mps2_defconfig index 3ed73f184d83..e995e50537ef 100644 --- a/arch/arm/configs/mps2_defconfig +++ b/arch/arm/configs/mps2_defconfig @@ -5,7 +5,7 @@ CONFIG_LOG_BUF_SHIFT=16 CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_UID16 is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 729ea8157e2a..025b595dd837 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -9,7 +9,7 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SHMEM is not set # CONFIG_KALLSYMS is not set CONFIG_PROFILING=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index b9fe3fbed5ae..3baec075d1ef 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -6,7 +6,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_UID16 is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index 661b3fc43275..1e4cd502340e 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -7,7 +7,6 @@ #include <linux/clk-provider.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> -#include <linux/spi/pxa2xx_spi.h> #include <linux/platform_data/i2c-pxa.h> #include <linux/soc/pxa/cpu.h> @@ -665,23 +664,6 @@ struct platform_device pxa27x_device_gpio = { .resource = pxa_resource_gpio, }; -/* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1. - * See comment in arch/arm/mach-pxa/ssp.c::ssp_probe() */ -void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info) -{ - struct platform_device *pd; - - pd = platform_device_alloc("pxa2xx-spi", id); - if (pd == NULL) { - printk(KERN_ERR "pxa2xx-spi: failed to allocate device id %d\n", - id); - return; - } - - pd->dev.platform_data = info; - platform_device_add(pd); -} - static struct resource pxa_dma_resource[] = { [0] = { .start = 0x40000000, diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index cc691b199429..3c5f5a3cb480 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -18,10 +18,10 @@ #include <linux/i2c.h> #include <linux/platform_data/i2c-pxa.h> #include <linux/platform_data/pca953x.h> +#include <linux/property.h> #include <linux/spi/spi.h> #include <linux/spi/ads7846.h> #include <linux/spi/corgi_lcd.h> -#include <linux/spi/pxa2xx_spi.h> #include <linux/mtd/sharpsl.h> #include <linux/mtd/physmap.h> #include <linux/input-event-codes.h> @@ -569,10 +569,6 @@ static struct spi_board_info spitz_spi_devices[] = { }, }; -static struct pxa2xx_spi_controller spitz_spi_info = { - .num_chipselect = 3, -}; - static struct gpiod_lookup_table spitz_spi_gpio_table = { .dev_id = "spi2", .table = { @@ -583,8 +579,21 @@ static struct gpiod_lookup_table spitz_spi_gpio_table = { }, }; +static const struct property_entry spitz_spi_properties[] = { + PROPERTY_ENTRY_U32("num-cs", 3), + { } +}; + +static const struct software_node spitz_spi_node = { + .properties = spitz_spi_properties, +}; + static void __init spitz_spi_init(void) { + struct platform_device *pd; + int id = 2; + int err; + if (machine_is_akita()) gpiod_add_lookup_table(&akita_lcdcon_gpio_table); else @@ -592,7 +601,21 @@ static void __init spitz_spi_init(void) gpiod_add_lookup_table(&spitz_ads7846_gpio_table); gpiod_add_lookup_table(&spitz_spi_gpio_table); - pxa2xx_set_spi_info(2, &spitz_spi_info); + + /* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1 */ + pd = platform_device_alloc("pxa2xx-spi", id); + if (pd == NULL) { + pr_err("pxa2xx-spi: failed to allocate device id %d\n", id); + } else { + err = device_add_software_node(&pd->dev, &spitz_spi_node); + if (err) { + platform_device_put(pd); + pr_err("pxa2xx-spi: failed to add software node\n"); + } else { + platform_device_add(pd); + } + } + spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices)); } #else diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c index 8bc4ea51a0c1..03b4b347f11a 100644 --- a/arch/arm/mach-pxa/spitz_pm.c +++ b/arch/arm/mach-pxa/spitz_pm.c @@ -35,18 +35,20 @@ static int spitz_last_ac_status; -static struct gpio spitz_charger_gpios[] = { - { SPITZ_GPIO_KEY_INT, GPIOF_IN, "Keyboard Interrupt" }, - { SPITZ_GPIO_SYNC, GPIOF_IN, "Sync" }, - { SPITZ_GPIO_AC_IN, GPIOF_IN, "Charger Detection" }, - { SPITZ_GPIO_ADC_TEMP_ON, GPIOF_OUT_INIT_LOW, "ADC Temp On" }, - { SPITZ_GPIO_JK_B, GPIOF_OUT_INIT_LOW, "JK B" }, - { SPITZ_GPIO_CHRG_ON, GPIOF_OUT_INIT_LOW, "Charger On" }, -}; - static void spitz_charger_init(void) { - gpio_request_array(ARRAY_AND_SIZE(spitz_charger_gpios)); + gpio_request(SPITZ_GPIO_KEY_INT, "Keyboard Interrupt"); + gpio_direction_input(SPITZ_GPIO_KEY_INT); + gpio_request(SPITZ_GPIO_SYNC, "Sync"); + gpio_direction_input(SPITZ_GPIO_SYNC); + gpio_request(SPITZ_GPIO_AC_IN, "Charger Detection"); + gpio_direction_input(SPITZ_GPIO_AC_IN); + gpio_request(SPITZ_GPIO_ADC_TEMP_ON, "ADC Temp On"); + gpio_direction_output(SPITZ_GPIO_ADC_TEMP_ON, 0); + gpio_request(SPITZ_GPIO_JK_B, "JK B"); + gpio_direction_output(SPITZ_GPIO_JK_B, 0); + gpio_request(SPITZ_GPIO_CHRG_ON, "Charger On"); + gpio_direction_output(SPITZ_GPIO_CHRG_ON, 0); } static void spitz_measure_temp(int on) diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 5e25dfa752e9..1cfc0b1fa41c 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -20,16 +20,6 @@ #include "generic.h" -/* - * helper for sa1100fb - */ -static struct gpio h3600_lcd_gpio[] = { - { H3XXX_EGPIO_LCD_ON, GPIOF_OUT_INIT_LOW, "LCD power" }, - { H3600_EGPIO_LCD_PCI, GPIOF_OUT_INIT_LOW, "LCD control" }, - { H3600_EGPIO_LCD_5V_ON, GPIOF_OUT_INIT_LOW, "LCD 5v" }, - { H3600_EGPIO_LVDD_ON, GPIOF_OUT_INIT_LOW, "LCD 9v/-6.5v" }, -}; - static bool h3600_lcd_request(void) { static bool h3600_lcd_ok; @@ -38,7 +28,42 @@ static bool h3600_lcd_request(void) if (h3600_lcd_ok) return true; - rc = gpio_request_array(h3600_lcd_gpio, ARRAY_SIZE(h3600_lcd_gpio)); + rc = gpio_request(H3XXX_EGPIO_LCD_ON, "LCD power"); + if (rc) + goto out; + rc = gpio_direction_output(H3XXX_EGPIO_LCD_ON, 0); + if (rc) + goto out_free_on; + rc = gpio_request(H3600_EGPIO_LCD_PCI, "LCD control"); + if (rc) + goto out_free_on; + rc = gpio_direction_output(H3600_EGPIO_LCD_PCI, 0); + if (rc) + goto out_free_pci; + rc = gpio_request(H3600_EGPIO_LCD_5V_ON, "LCD 5v"); + if (rc) + goto out_free_pci; + rc = gpio_direction_output(H3600_EGPIO_LCD_5V_ON, 0); + if (rc) + goto out_free_5v_on; + rc = gpio_request(H3600_EGPIO_LVDD_ON, "LCD 9v/-6.5v"); + if (rc) + goto out_free_5v_on; + rc = gpio_direction_output(H3600_EGPIO_LVDD_ON, 0); + if (rc) + goto out_free_lvdd_on; + + goto out; + +out_free_lvdd_on: + gpio_free(H3600_EGPIO_LVDD_ON); +out_free_5v_on: + gpio_free(H3600_EGPIO_LCD_5V_ON); +out_free_pci: + gpio_free(H3600_EGPIO_LCD_PCI); +out_free_on: + gpio_free(H3XXX_EGPIO_LCD_ON); +out: if (rc) pr_err("%s: can't request GPIOs\n", __func__); else diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 72b5cd697f5d..deeb8f292454 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -2252,28 +2252,21 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* If building the body of the JITed code fails somehow, * we fall back to the interpretation. */ - if (build_body(&ctx) < 0) { - image_ptr = NULL; - bpf_jit_binary_free(header); - prog = orig_prog; - goto out_imms; - } + if (build_body(&ctx) < 0) + goto out_free; build_epilogue(&ctx); /* 3.) Extra pass to validate JITed Code */ - if (validate_code(&ctx)) { - image_ptr = NULL; - bpf_jit_binary_free(header); - prog = orig_prog; - goto out_imms; - } + if (validate_code(&ctx)) + goto out_free; flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx)); if (bpf_jit_enable > 1) /* there are 2 passes here */ bpf_jit_dump(prog->len, image_size, 2, ctx.target); - bpf_jit_binary_lock_ro(header); + if (bpf_jit_binary_lock_ro(header)) + goto out_free; prog->bpf_func = (void *)ctx.target; prog->jited = 1; prog->jited_len = image_size; @@ -2290,5 +2283,11 @@ out: bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); return prog; + +out_free: + image_ptr = NULL; + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_imms; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 74b34a78b7ac..2df0818c3ca9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,9 +256,11 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK + select USER_STACKTRACE_SUPPORT help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index a028ea312378..a52618073de2 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -309,6 +309,7 @@ config ARCH_STM32 select GPIOLIB select PINCTRL select PINCTRL_STM32MP257 + select STM32_EXTI select ARM_SMC_MBOX select ARM_SCMI_PROTOCOL select COMMON_CLK_SCMI diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 0e075d3c546b..b8b1d4f4a572 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -154,6 +154,10 @@ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make boot := arch/arm64/boot +BOOT_TARGETS := Image vmlinuz.efi image.fit + +PHONY += $(BOOT_TARGETS) + ifeq ($(CONFIG_EFI_ZBOOT),) KBUILD_IMAGE := $(boot)/Image.gz else @@ -162,8 +166,10 @@ endif all: $(notdir $(KBUILD_IMAGE)) -vmlinuz.efi: Image -Image vmlinuz.efi: vmlinux +image.fit: dtbs + +vmlinuz.efi image.fit: Image +$(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ Image.%: Image @@ -215,6 +221,7 @@ virtconfig: define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' + echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index af5dc61f8b43..abaae9de1bdd 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -2,3 +2,4 @@ Image Image.gz vmlinuz* +image.fit diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index a5a787371117..607a67a649c4 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,8 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo \ + Image.zst image.fit $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -39,6 +40,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/Image.zst: $(obj)/Image FORCE $(call if_changed,zstd) +$(obj)/image.fit: $(obj)/Image $(obj)/dts/dtbs-list FORCE + $(call if_changed,fit) + EFI_ZBOOT_PAYLOAD := Image EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64 EFI_ZBOOT_MACH_TYPE := ARM64 diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi index af421ba24ce0..d12b01c5f41b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-bigtreetech-cb1.dtsi @@ -6,6 +6,7 @@ /dts-v1/; #include "sun50i-h616.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -62,6 +63,10 @@ }; }; +&cpu0 { + cpu-supply = <®_dcdc2>; +}; + &mmc0 { vmmc-supply = <®_dldo1>; /* Card detection pin is not connected */ diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616-cpu-opp.dtsi new file mode 100644 index 000000000000..aca22a7f0191 --- /dev/null +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-cpu-opp.dtsi @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +// Copyright (C) 2023 Martin Botka <martin@somainline.org> + +/ { + cpu_opp_table: opp-table-cpu { + compatible = "allwinner,sun50i-h616-operating-points"; + nvmem-cells = <&cpu_speed_grade>; + opp-shared; + + opp-480000000 { + opp-hz = /bits/ 64 <480000000>; + opp-microvolt = <900000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x1f>; + }; + + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <900000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x12>; + }; + + opp-720000000 { + opp-hz = /bits/ 64 <720000000>; + opp-microvolt = <900000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x0d>; + }; + + opp-792000000 { + opp-hz = /bits/ 64 <792000000>; + opp-microvolt-speed1 = <900000>; + opp-microvolt-speed4 = <940000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x12>; + }; + + opp-936000000 { + opp-hz = /bits/ 64 <936000000>; + opp-microvolt = <900000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x0d>; + }; + + opp-1008000000 { + opp-hz = /bits/ 64 <1008000000>; + opp-microvolt-speed0 = <950000>; + opp-microvolt-speed1 = <940000>; + opp-microvolt-speed2 = <950000>; + opp-microvolt-speed3 = <950000>; + opp-microvolt-speed4 = <1020000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x1f>; + }; + + opp-1104000000 { + opp-hz = /bits/ 64 <1104000000>; + opp-microvolt-speed0 = <1000000>; + opp-microvolt-speed2 = <1000000>; + opp-microvolt-speed3 = <1000000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x0d>; + }; + + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt-speed0 = <1050000>; + opp-microvolt-speed1 = <1020000>; + opp-microvolt-speed2 = <1050000>; + opp-microvolt-speed3 = <1050000>; + opp-microvolt-speed4 = <1100000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x1f>; + }; + + opp-1320000000 { + opp-hz = /bits/ 64 <1320000000>; + opp-microvolt = <1100000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x1d>; + }; + + opp-1416000000 { + opp-hz = /bits/ 64 <1416000000>; + opp-microvolt = <1100000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x0d>; + }; + + opp-1512000000 { + opp-hz = /bits/ 64 <1512000000>; + opp-microvolt-speed1 = <1100000>; + opp-microvolt-speed3 = <1100000>; + clock-latency-ns = <244144>; /* 8 32k periods */ + opp-supported-hw = <0x0a>; + }; + }; +}; + +&cpu0 { + operating-points-v2 = <&cpu_opp_table>; +}; + +&cpu1 { + operating-points-v2 = <&cpu_opp_table>; +}; + +&cpu2 { + operating-points-v2 = <&cpu_opp_table>; +}; + +&cpu3 { + operating-points-v2 = <&cpu_opp_table>; +}; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts index b5d713926a34..a360d8567f95 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-orangepi-zero2.dts @@ -6,12 +6,17 @@ /dts-v1/; #include "sun50i-h616-orangepi-zero.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" / { model = "OrangePi Zero2"; compatible = "xunlong,orangepi-zero2", "allwinner,sun50i-h616"; }; +&cpu0 { + cpu-supply = <®_dcdca>; +}; + &emac0 { allwinner,rx-delay-ps = <3100>; allwinner,tx-delay-ps = <700>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616-x96-mate.dts b/arch/arm64/boot/dts/allwinner/sun50i-h616-x96-mate.dts index 959b6fd18483..26d25b5b59e0 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616-x96-mate.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616-x96-mate.dts @@ -6,6 +6,7 @@ /dts-v1/; #include "sun50i-h616.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -32,6 +33,10 @@ }; }; +&cpu0 { + cpu-supply = <®_dcdca>; +}; + &ehci0 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi index f8ecd7db4868..921d5f61d8d6 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h616.dtsi @@ -26,6 +26,7 @@ reg = <0>; enable-method = "psci"; clocks = <&ccu CLK_CPUX>; + #cooling-cells = <2>; }; cpu1: cpu@1 { @@ -34,6 +35,7 @@ reg = <1>; enable-method = "psci"; clocks = <&ccu CLK_CPUX>; + #cooling-cells = <2>; }; cpu2: cpu@2 { @@ -42,6 +44,7 @@ reg = <2>; enable-method = "psci"; clocks = <&ccu CLK_CPUX>; + #cooling-cells = <2>; }; cpu3: cpu@3 { @@ -50,6 +53,7 @@ reg = <3>; enable-method = "psci"; clocks = <&ccu CLK_CPUX>; + #cooling-cells = <2>; }; }; @@ -156,6 +160,10 @@ ths_calibration: thermal-sensor-calibration@14 { reg = <0x14 0x8>; }; + + cpu_speed_grade: cpu-speed-grade@0 { + reg = <0x0 2>; + }; }; watchdog: watchdog@30090a0 { diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi index 8c1263a3939e..e92d150aaf1c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-longan-module-3h.dtsi @@ -4,6 +4,11 @@ */ #include "sun50i-h616.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" + +&cpu0 { + cpu-supply = <®_dcdc2>; +}; &mmc2 { pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero2w.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero2w.dts index 21ca1977055d..6a4f0da97233 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero2w.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero2w.dts @@ -6,6 +6,7 @@ /dts-v1/; #include "sun50i-h616.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -53,6 +54,10 @@ }; }; +&cpu0 { + cpu-supply = <®_dcdc2>; +}; + &ehci1 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts index b3b1b8692125..e1cd7572a14c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-orangepi-zero3.dts @@ -6,12 +6,17 @@ /dts-v1/; #include "sun50i-h616-orangepi-zero.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" / { model = "OrangePi Zero3"; compatible = "xunlong,orangepi-zero3", "allwinner,sun50i-h618"; }; +&cpu0 { + cpu-supply = <®_dcdc2>; +}; + &emac0 { allwinner,tx-delay-ps = <700>; phy-mode = "rgmii-rxid"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts b/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts index a1d0cac4d244..d6631bfe629f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h618-transpeed-8k618-t.dts @@ -6,6 +6,7 @@ /dts-v1/; #include "sun50i-h616.dtsi" +#include "sun50i-h616-cpu-opp.dtsi" #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -51,6 +52,10 @@ }; }; +&cpu0 { + cpu-supply = <®_dcdc2>; +}; + &ehci0 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index 4b48e4ed2d28..dcd0656d67a8 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -443,6 +443,99 @@ <&clk_dsi_txbyte>; }; + exti1: interrupt-controller@44220000 { + compatible = "st,stm32mp1-exti", "syscon"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x44220000 0x400>; + interrupts-extended = + <&intc GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_0 */ + <&intc GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 270 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 271 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 272 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 273 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 274 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 275 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 276 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 277 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_10 */ + <&intc GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 280 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 281 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 283 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 260 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 259 IRQ_TYPE_LEVEL_HIGH>, + <0>, /* EXTI_20 */ + <&intc GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 181 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_30 */ + <&intc GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 149 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 150 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_40 */ + <&intc GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 182 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 209 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 229 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 210 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_50 */ + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <&intc GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>, + <0>, /* EXTI_60 */ + <&intc GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_70 */ + <0>, + <&intc GIC_SPI 224 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 202 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 253 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 254 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 255 IRQ_TYPE_LEVEL_HIGH>, + <0>, /* EXTI_80 */ + <0>, + <0>, + <&intc GIC_SPI 257 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 258 IRQ_TYPE_LEVEL_HIGH>; + }; + syscfg: syscon@44230000 { compatible = "st,stm32mp25-syscfg", "syscon"; reg = <0x44230000 0x10000>; @@ -453,6 +546,8 @@ #size-cells = <1>; compatible = "st,stm32mp257-pinctrl"; ranges = <0 0x44240000 0xa0400>; + interrupt-parent = <&exti1>; + st,syscfg = <&exti1 0x60 0xff>; pins-are-numbered; gpioa: gpio@44240000 { @@ -582,6 +677,8 @@ #size-cells = <1>; compatible = "st,stm32mp257-z-pinctrl"; ranges = <0 0x46200000 0x400>; + interrupt-parent = <&exti1>; + st,syscfg = <&exti1 0x60 0xff>; pins-are-numbered; gpioz: gpio@46200000 { @@ -597,5 +694,84 @@ }; }; + + exti2: interrupt-controller@46230000 { + compatible = "st,stm32mp1-exti", "syscon"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x46230000 0x400>; + interrupts-extended = + <&intc GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_0 */ + <&intc GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_10 */ + <&intc GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, /* EXTI_20 */ + <&intc GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 151 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 216 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_30 */ + <&intc GIC_SPI 218 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 175 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 199 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_40 */ + <0>, + <0>, + <&intc GIC_SPI 200 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, /* EXTI_50 */ + <&intc GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, + <0>, /* EXTI_60 */ + <&intc GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <&intc GIC_SPI 247 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 248 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 249 IRQ_TYPE_LEVEL_HIGH>, + <&intc GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>, + <0>, + <0>, + <&intc GIC_SPI 213 IRQ_TYPE_LEVEL_HIGH>; /* EXTI_70 */ + }; }; }; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ab8b396428da..bc0b0d75acef 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -50,16 +50,12 @@ msr daif, \flags .endm - .macro enable_dbg - msr daifclr, #8 - .endm - .macro disable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 bic \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp - isb // Synchronise with enable_dbg + isb // Take effect before a subsequent clear of DAIF.D 9990: .endm @@ -480,9 +476,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 52f076afeb96..936389e9aecb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -159,6 +160,7 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b7afaa026842..e4546b29dd0c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -59,13 +59,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index db1aeacd4cd9..8c0a36f72d6f 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -135,6 +135,12 @@ enum aarch64_insn_special_register { AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 }; +enum aarch64_insn_system_register { + AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684, + AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682, + AARCH64_INSN_SYSREG_SP_EL0 = 0x4208, +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -686,6 +692,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, + enum aarch64_insn_system_register sysreg); s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 0a7186a93882..d4d7451c2c12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -5,7 +5,6 @@ #ifndef __ASM_IRQFLAGS_H #define __ASM_IRQFLAGS_H -#include <asm/alternative.h> #include <asm/barrier.h> #include <asm/ptrace.h> #include <asm/sysreg.h> diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 6aafbb789991..4e753908b801 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,17 +15,23 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\"\n\t" \ + ".align 3\n\t" \ + ".long 1b - ., %l["#label"] - .\n\t" \ + ".quad %c0 - .\n\t" \ + ".popsection\n\t" \ + : : "i"(key) : : label + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; + asm goto( "1: nop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + JUMP_TABLE_ENTRY(k, l_yes) + ); return false; l_yes: @@ -35,15 +41,11 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; asm goto( "1: b %l[l_yes] \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); - + JUMP_TABLE_ENTRY(k, l_yes) + ); return false; l_yes: return true; diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index dd9ee67d1d87..b11cfb9fdd37 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -18,14 +18,21 @@ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* - * This bit indicates that the entry is present i.e. pmd_page() - * still points to a valid huge page in memory even if the pmd - * has been invalidated. + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be + * interpreted according to the HW layout by SW but any attempted HW access to + * the address will result in a fault. pte_present() returns true. */ -#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */ + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ +#else +#define PTE_UFFD_WP (_AT(pteval_t, 0)) +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) @@ -103,7 +110,7 @@ static inline bool __pure lpa2_is_enabled(void) __val; \ }) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_SHARED) #define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index afdd56d26ad7..bde9fd179388 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -105,7 +105,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) @@ -132,6 +132,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +#define pte_present_invalid(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) /* * Execute-only user mappings do not have the PTE_USER bit set. All valid * kernel mappings have the PTE_UXN bit set. @@ -261,6 +263,13 @@ static inline pte_t pte_mkpresent(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_VALID)); } +static inline pte_t pte_mkinvalid(pte_t pte) +{ + pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); + pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); + return pte; +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); @@ -271,9 +280,31 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } -static inline void __set_pte(pte_t *ptep, pte_t pte) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_UFFD_WP); +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); /* * Only if the new pte is valid and kernel, otherwise TLB maintenance @@ -463,13 +494,39 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_SWP_UFFD_WP); +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; + /* + * pte_present_invalid() tells us that the pte is invalid from HW + * perspective but present from SW perspective, so the fields are to be + * interpretted as per the HW layout. The second 2 checks are the unique + * encoding that we use for PROT_NONE. It is insufficient to only use + * the first check because we share the same encoding scheme with pmds + * which support pmd_mkinvalid(), so can be present-invalid without + * being PROT_NONE. + */ + return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); } static inline int pmd_protnone(pmd_t pmd) @@ -478,12 +535,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) - -static inline int pmd_present(pmd_t pmd) -{ - return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); -} +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) /* * THP definitions. @@ -508,14 +560,16 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) - -static inline pmd_t pmd_mkinvalid(pmd_t pmd) -{ - pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); - pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); - - return pmd; -} +#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) +#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) +#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) +#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) +#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) +#define pmd_swp_clear_uffd_wp(pmd) \ + pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) @@ -760,6 +814,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #else +#define pud_valid(pud) false #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) #define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ @@ -1005,6 +1060,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) static inline bool pgtable_l5_enabled(void) { return false; } +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ #define p4d_set_fixmap(addr) NULL #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) @@ -1027,8 +1084,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | - PTE_ATTRINDX_MASK; + PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | + PTE_GP | PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); @@ -1076,17 +1133,17 @@ static inline int pgd_devmap(pgd_t pgd) #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { - return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); + return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); + return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif @@ -1248,15 +1305,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * Encode and decode a swap entry: * bits 0-1: present (must be zero) * bits 2: remember PG_anon_exclusive - * bits 3-7: swap type - * bits 8-57: swap offset - * bit 58: PTE_PROT_NONE (must be zero) + * bit 3: remember uffd-wp state + * bits 6-10: swap type + * bit 11: PTE_PRESENT_INVALID (must be zero) + * bits 12-61: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 6 #define __SWP_TYPE_BITS 5 -#define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) -#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_OFFSET_BITS 50 #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e8999592f3a..af3b206fa423 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1036,18 +1036,18 @@ * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). */ -#define PIE_NONE_O 0x0 -#define PIE_R_O 0x1 -#define PIE_X_O 0x2 -#define PIE_RX_O 0x3 -#define PIE_RW_O 0x5 -#define PIE_RWnX_O 0x6 -#define PIE_RWX_O 0x7 -#define PIE_R 0x8 -#define PIE_GCS 0x9 -#define PIE_RX 0xa -#define PIE_RW 0xc -#define PIE_RWX 0xe +#define PIE_NONE_O UL(0x0) +#define PIE_R_O UL(0x1) +#define PIE_X_O UL(0x2) +#define PIE_RX_O UL(0x3) +#define PIE_RW_O UL(0x5) +#define PIE_RWnX_O UL(0x6) +#define PIE_RWX_O UL(0x7) +#define PIE_R UL(0x8) +#define PIE_GCS UL(0x9) +#define PIE_RX UL(0xa) +#define PIE_RW UL(0xc) +#define PIE_RWX UL(0xe) #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a75de2665d84..95fbc8c05607 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -142,17 +142,24 @@ static inline unsigned long get_trans_granule(void) * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (baddr); \ - unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= __ttl << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -439,11 +446,11 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, * When not uses TLB range ops, we can handle up to * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. + * MAX_TLBI_RANGE_PAGES pages. */ if ((!system_supports_tlb_range() && (end - start) >= (MAX_DVM_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { + pages > MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index dba8fcec7f33..e0e7b93c16cc 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -26,6 +26,7 @@ #include <linux/libfdt.h> #include <linux/smp.h> #include <linux/serial_core.h> +#include <linux/suspend.h> #include <linux/pgtable.h> #include <acpi/ghes.h> @@ -227,6 +228,15 @@ done: if (earlycon_acpi_spcr_enable) early_init_dt_scan_chosen_stdout(); } else { +#ifdef CONFIG_HIBERNATION + struct acpi_table_header *facs = NULL; + acpi_get_table(ACPI_SIG_FACS, 1, &facs); + if (facs) { + swsusp_hardware_signature = + ((struct acpi_table_facs *)facs)->hardware_signature; + acpi_put_table(facs); + } +#endif acpi_parse_spcr(earlycon_acpi_spcr_enable, true); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 6d157f32187b..e8ed5673f481 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -10,94 +10,12 @@ #include <asm/pointer_auth.h> -struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; -} __attribute__((packed)); - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) -{ - struct frame_tail buftail; - unsigned long err; - unsigned long lr; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - lr = ptrauth_strip_user_insn_pac(buftail.lr); - - perf_callchain_store(entry, lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail >= buftail.fp) - return NULL; - - return buftail.fp; -} - -#ifdef CONFIG_COMPAT -/* - * The registers we're interested in are at the end of the variable - * length saved register structure. The fp points at the end of this - * structure so the address of this struct is: - * (struct compat_frame_tail *)(xxx->fp)-1 - * - * This code has been adapted from the ARM OProfile support. - */ -struct compat_frame_tail { - compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ - u32 sp; - u32 lr; -} __attribute__((packed)); - -static struct compat_frame_tail __user * -compat_user_backtrace(struct compat_frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) +static bool callchain_trace(void *data, unsigned long pc) { - struct compat_frame_tail buftail; - unsigned long err; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail + 1 >= (struct compat_frame_tail __user *) - compat_ptr(buftail.fp)) - return NULL; + struct perf_callchain_entry_ctx *entry = data; - return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; + return perf_callchain_store(entry, pc) == 0; } -#endif /* CONFIG_COMPAT */ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) @@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, return; } - perf_callchain_store(entry, regs->pc); - - if (!compat_user_mode(regs)) { - /* AARCH64 mode */ - struct frame_tail __user *tail; - - tail = (struct frame_tail __user *)regs->regs[29]; - - while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0x7)) - tail = user_backtrace(tail, entry); - } else { -#ifdef CONFIG_COMPAT - /* AARCH32 compat mode */ - struct compat_frame_tail __user *tail; - - tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - - while ((entry->nr < entry->max_stack) && - tail && !((unsigned long)tail & 0x3)) - tail = compat_user_backtrace(tail, entry); -#endif - } -} - -static bool callchain_trace(void *data, unsigned long pc) -{ - struct perf_callchain_entry_ctx *entry = data; - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(callchain_trace, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index aad399796e81..48c1aa456af9 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -108,6 +108,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = { .override = &id_aa64pfr0_override, .fields = { FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter), + FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL), {} }, }; @@ -223,6 +224,7 @@ static const struct { { "nokaslr", "arm64_sw.nokaslr=1" }, { "rodata=off", "arm64_sw.rodataoff=1" }, { "arm64.nolva", "id_aa64mmfr2.varange=0" }, + { "arm64.no32bit_el0", "id_aa64pfr0.el0=1" }, }; static int __init parse_hexdigit(const char *p, u64 *v) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 65a052bf741f..a096e2451044 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -298,8 +298,15 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) dynamic_scs_init(); /* - * Unmask SError as soon as possible after initializing earlycon so - * that we can report any SErrors immediately. + * The primary CPU enters the kernel with all DAIF exceptions masked. + * + * We must unmask Debug and SError before preemption or scheduling is + * possible to ensure that these are consistently unmasked across + * threads, and we want to unmask SError as soon as possible after + * initializing earlycon so that we can report any SErrors immediately. + * + * IRQ and FIQ will be unmasked after the root irqchip has been + * detected and initialized. */ local_daif_restore(DAIF_PROCCTX_NOIRQ); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4ced34f62dab..31c8b3094dd7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -264,6 +264,13 @@ asmlinkage notrace void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Secondary CPUs enter the kernel with all DAIF exceptions masked. + * + * As with setup_arch() we must unmask Debug and SError exceptions, and + * as the root irqchip has already been detected and initialized we can + * unmask IRQ and FIQ at the same time. + */ local_daif_restore(DAIF_PROCCTX); /* diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 684c26511696..6b3258860377 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -324,3 +324,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) dump_backtrace(NULL, tsk, loglvl); barrier(); } + +/* + * The struct defined for userspace stack frame in AARCH64 mode. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +unwind_user_frame(struct frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct frame_tail buftail; + unsigned long err; + unsigned long lr; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + lr = ptrauth_strip_user_insn_pac(buftail.lr); + + if (!consume_entry(cookie, lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + if (!consume_entry(cookie, buftail.lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + if (!consume_entry(cookie, regs->pc)) + return; + + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + while (tail && !((unsigned long)tail & 0x7)) + tail = unwind_user_frame(tail, cookie, consume_entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + while (tail && !((unsigned long)tail & 0x3)) + tail = unwind_compat_user_frame(tail, cookie, consume_entry); +#endif + } +} diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index a635ab83fee3..b008a9b46a7f 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -1515,3 +1515,14 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } + +u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, + enum aarch64_insn_system_register sysreg) +{ + u32 insn = aarch64_insn_get_mrs_value(); + + insn &= ~GENMASK(19, 0); + insn |= sysreg << 5; + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, + insn, result); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 495b732d5af3..c927e9312f10 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot); static phys_addr_t __init early_pgtable_alloc(int shift) { phys_addr_t phys; - void *ptr; phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); - /* - * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE - * slot will be free, so we can (ab)use the FIX_PTE slot to initialise - * any level of table. - */ - ptr = pte_set_fixmap(phys); - - memset(ptr, 0, PAGE_SIZE); - - /* - * Implicit barriers also ensure the zeroed page is visible to the page - * table walker - */ - pte_clear_fixmap(); - return phys; } @@ -172,16 +156,25 @@ bool pgattr_change_is_safe(u64 old, u64 new) return ((old ^ new) & ~mask) == 0; } -static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) +static void init_clear_pgtable(void *table) { - pte_t *ptep; + clear_page(table); - ptep = pte_set_fixmap_offset(pmdp, addr); + /* Ensure the zeroing is observed by page table walks. */ + dsb(ishst); +} + +static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) +{ do { pte_t old_pte = __ptep_get(ptep); - __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + /* + * Required barriers to make this visible to the table walker + * are deferred to the end of alloc_init_cont_pte(). + */ + __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we @@ -192,8 +185,6 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys += PAGE_SIZE; } while (ptep++, addr += PAGE_SIZE, addr != end); - - pte_clear_fixmap(); } static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, @@ -204,6 +195,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, { unsigned long next; pmd_t pmd = READ_ONCE(*pmdp); + pte_t *ptep; BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { @@ -214,10 +206,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); + ptep = pte_set_fixmap(pte_phys); + init_clear_pgtable(ptep); + ptep += pte_index(addr); __pmd_populate(pmdp, pte_phys, pmdval); - pmd = READ_ONCE(*pmdp); + } else { + BUG_ON(pmd_bad(pmd)); + ptep = pte_set_fixmap_offset(pmdp, addr); } - BUG_ON(pmd_bad(pmd)); do { pgprot_t __prot = prot; @@ -229,20 +225,26 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pte(pmdp, addr, next, phys, __prot); + init_pte(ptep, addr, next, phys, __prot); + ptep += pte_index(next) - pte_index(addr); phys += next - addr; } while (addr = next, addr != end); + + /* + * Note: barriers and maintenance necessary to clear the fixmap slot + * ensure that all previous pgtable writes are visible to the table + * walker. + */ + pte_clear_fixmap(); } -static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, +static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; - pmd_t *pmdp; - pmdp = pmd_set_fixmap_offset(pudp, addr); do { pmd_t old_pmd = READ_ONCE(*pmdp); @@ -268,8 +270,6 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, } phys += next - addr; } while (pmdp++, addr = next, addr != end); - - pmd_clear_fixmap(); } static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, @@ -279,6 +279,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, { unsigned long next; pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; /* * Check for initial section mappings in the pgd/pud. @@ -292,10 +293,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); + pmdp = pmd_set_fixmap(pmd_phys); + init_clear_pgtable(pmdp); + pmdp += pmd_index(addr); __pud_populate(pudp, pmd_phys, pudval); - pud = READ_ONCE(*pudp); + } else { + BUG_ON(pud_bad(pud)); + pmdp = pmd_set_fixmap_offset(pudp, addr); } - BUG_ON(pud_bad(pud)); do { pgprot_t __prot = prot; @@ -307,10 +312,13 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); + init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + pmdp += pmd_index(next) - pmd_index(addr); phys += next - addr; } while (addr = next, addr != end); + + pmd_clear_fixmap(); } static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, @@ -330,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); + pudp = pud_set_fixmap(pud_phys); + init_clear_pgtable(pudp); + pudp += pud_index(addr); __p4d_populate(p4dp, pud_phys, p4dval); - p4d = READ_ONCE(*p4dp); + } else { + BUG_ON(p4d_bad(p4d)); + pudp = pud_set_fixmap_offset(p4dp, addr); } - BUG_ON(p4d_bad(p4d)); - pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -385,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys = pgtable_alloc(P4D_SHIFT); + p4dp = p4d_set_fixmap(p4d_phys); + init_clear_pgtable(p4dp); + p4dp += p4d_index(addr); __pgd_populate(pgdp, p4d_phys, pgdval); - pgd = READ_ONCE(*pgdp); + } else { + BUG_ON(pgd_bad(pgd)); + p4dp = p4d_set_fixmap_offset(pgdp, addr); } - BUG_ON(pgd_bad(pgd)); - p4dp = p4d_set_fixmap_offset(pgdp, addr); do { p4d_t old_p4d = READ_ONCE(*p4dp); @@ -457,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, static phys_addr_t __pgd_pgtable_alloc(int shift) { - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); - BUG_ON(!ptr); + /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); + BUG_ON(!ptr); return __pa(ptr); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9d40f3ffd8d2..f4bc6c5bac06 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -135,14 +135,6 @@ SYM_FUNC_START(cpu_do_resume) msr tcr_el1, x8 msr vbar_el1, x9 - - /* - * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking - * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug - * exception. Mask them until local_daif_restore() in cpu_suspend() - * resets them. - */ - disable_daif msr mdscr_el1, x10 msr sctlr_el1, x12 @@ -466,8 +458,6 @@ SYM_FUNC_START(__cpu_setup) msr cpacr_el1, xzr // Reset cpacr_el1 mov x1, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x1 // access to the DCC from EL0 - isb // Unmask debug exceptions now, - enable_dbg // since this is per-cpu reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 23b1b34db088..b22ab2f97a30 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -297,4 +297,12 @@ #define A64_ADR(Rd, offset) \ aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR) +/* MRS */ +#define A64_MRS_TPIDR_EL1(Rt) \ + aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL1) +#define A64_MRS_TPIDR_EL2(Rt) \ + aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL2) +#define A64_MRS_SP_EL0(Rt) \ + aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 074b1d156223..720336d28856 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -29,6 +29,7 @@ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) +#define ARENA_VM_START (MAX_BPF_JIT_REG + 5) #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -67,6 +68,8 @@ static const int bpf2a64[] = { /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), [FP_BOTTOM] = A64_R(27), + /* callee saved register for kern_vm_start address */ + [ARENA_VM_START] = A64_R(28), }; struct jit_ctx { @@ -79,6 +82,7 @@ struct jit_ctx { __le32 *ro_image; u32 stack_size; int fpb_offset; + u64 user_vm_start; }; struct bpf_plt { @@ -295,7 +299,7 @@ static bool is_lsi_offset(int offset, int scale) #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, - bool is_exception_cb) + bool is_exception_cb, u64 arena_vm_start) { const struct bpf_prog *prog = ctx->prog; const bool is_main_prog = !bpf_is_subprog(prog); @@ -306,6 +310,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tcc = bpf2a64[TCALL_CNT]; const u8 fpb = bpf2a64[FP_BOTTOM]; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const int idx0 = ctx->idx; int cur_offset; @@ -411,6 +416,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + + if (arena_vm_start) + emit_a64_mov_i64(arena_vm_base, arena_vm_start, ctx); + return 0; } @@ -485,20 +494,26 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 dst = bpf2a64[insn->dst_reg]; const u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; const bool isdw = BPF_SIZE(code) == BPF_DW; + const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; const s16 off = insn->off; - u8 reg; + u8 reg = dst; - if (!off) { - reg = dst; - } else { - emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_ADD(1, tmp, tmp, dst), ctx); - reg = tmp; + if (off || arena) { + if (off) { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); + reg = tmp; + } + if (arena) { + emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); + reg = tmp; + } } switch (insn->imm) { @@ -567,6 +582,12 @@ static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 reg; s32 jmp_offset; + if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { + /* ll_sc based atomics don't support unsafe pointers yet. */ + pr_err_once("unknown atomic opcode %02x\n", code); + return -EINVAL; + } + if (!off) { reg = dst; } else { @@ -738,6 +759,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -745,7 +767,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); - regs->regs[dst_reg] = 0; + if (dst_reg != DONT_CLEAR) + regs->regs[dst_reg] = 0; regs->pc = (unsigned long)&ex->fixup - offset; return true; } @@ -765,7 +788,9 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; if (BPF_MODE(insn->code) != BPF_PROBE_MEM && - BPF_MODE(insn->code) != BPF_PROBE_MEMSX) + BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32 && + BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) return 0; if (!ctx->prog->aux->extable || @@ -810,6 +835,9 @@ static int add_exception_handler(const struct bpf_insn *insn, ex->insn = ins_offset; + if (BPF_CLASS(insn->code) != BPF_LDX) + dst_reg = DONT_CLEAR; + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); @@ -829,12 +857,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) { const u8 code = insn->code; - const u8 dst = bpf2a64[insn->dst_reg]; - const u8 src = bpf2a64[insn->src_reg]; + u8 dst = bpf2a64[insn->dst_reg]; + u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 fpb = bpf2a64[FP_BOTTOM]; + const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@ -853,6 +882,24 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits + emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); + emit(A64_LSL(1, dst, dst, 32), ctx); + emit(A64_CBZ(1, tmp, 2), ctx); + emit(A64_ORR(1, tmp, dst, tmp), ctx); + emit(A64_MOV(1, dst, tmp), ctx); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (dst != src) + emit(A64_MOV(1, dst, src), ctx); + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + emit(A64_MRS_TPIDR_EL2(tmp), ctx); + else + emit(A64_MRS_TPIDR_EL1(tmp), ctx); + emit(A64_ADD(1, dst, dst, tmp), ctx); + break; + } switch (insn->off) { case 0: emit(A64_MOV(is64, dst, src), ctx); @@ -1181,6 +1228,21 @@ emit_cond_jmp: const u8 r0 = bpf2a64[BPF_REG_0]; bool func_addr_fixed; u64 func_addr; + u32 cpu_offset; + + /* Implement helper call to bpf_get_smp_processor_id() inline */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + cpu_offset = offsetof(struct thread_info, cpu); + + emit(A64_MRS_SP_EL0(tmp), ctx); + if (is_lsi_offset(cpu_offset, 2)) { + emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); + } else { + emit_a64_mov_i(1, tmp2, cpu_offset, ctx); + emit(A64_LDR32(r0, tmp, tmp2), ctx); + } + break; + } ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); @@ -1237,7 +1299,15 @@ emit_cond_jmp: case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: - if (ctx->fpb_offset > 0 && src == fp) { + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); + src = tmp2; + } + if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { src_adj = fpb; off_adj = off + ctx->fpb_offset; } else { @@ -1322,7 +1392,15 @@ emit_cond_jmp: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: - if (ctx->fpb_offset > 0 && dst == fp) { + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); + dst = tmp2; + } + if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { dst_adj = fpb; off_adj = off + ctx->fpb_offset; } else { @@ -1365,6 +1443,10 @@ emit_cond_jmp: } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; /* STX: *(size *)(dst + off) = src */ @@ -1372,7 +1454,15 @@ emit_cond_jmp: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - if (ctx->fpb_offset > 0 && dst == fp) { + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); + dst = tmp2; + } + if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { dst_adj = fpb; off_adj = off + ctx->fpb_offset; } else { @@ -1413,16 +1503,26 @@ emit_cond_jmp: } break; } + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) ret = emit_lse_atomic(insn, ctx); else ret = emit_ll_sc_atomic(insn, ctx); if (ret) return ret; + + ret = add_exception_handler(insn, ctx, dst); + if (ret) + return ret; break; default: @@ -1594,6 +1694,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; + u64 arena_vm_start; u8 *image_ptr; u8 *ro_image_ptr; @@ -1611,6 +1712,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = tmp; } + arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); @@ -1641,6 +1743,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.fpb_offset = find_fpb_offset(prog); + ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); /* * 1. Initial fake pass to compute ctx->idx and ctx->offset. @@ -1648,7 +1751,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first. */ - if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) { + if (build_prologue(&ctx, was_classic, prog->aux->exception_cb, + arena_vm_start)) { prog = orig_prog; goto out_off; } @@ -1696,7 +1800,7 @@ skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; - build_prologue(&ctx, was_classic, prog->aux->exception_cb); + build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start); if (build_body(&ctx, extra_pass)) { prog = orig_prog; @@ -2165,12 +2269,9 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } -void arch_protect_bpf_trampoline(void *image, unsigned int size) -{ -} - -void arch_unprotect_bpf_trampoline(void *image, unsigned int size) +int arch_protect_bpf_trampoline(void *image, unsigned int size) { + return 0; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, @@ -2453,6 +2554,39 @@ bool bpf_jit_supports_exceptions(void) return true; } +bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) + return false; + } + return true; +} + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + default: + return false; + } +} + void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index e73323d759d0..7dbefd4ba210 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -1294,16 +1294,19 @@ skip_init_ctx: flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx)); if (!prog->is_func || extra_pass) { + int err; + if (extra_pass && ctx.idx != jit_data->ctx.idx) { pr_err_once("multi-func JIT bug %d != %d\n", ctx.idx, jit_data->ctx.idx); - bpf_jit_binary_free(header); - prog->bpf_func = NULL; - prog->jited = 0; - prog->jited_len = 0; - goto out_offset; + goto out_free; + } + err = bpf_jit_binary_lock_ro(header); + if (err) { + pr_err_once("bpf_jit_binary_lock_ro() returned %d\n", + err); + goto out_free; } - bpf_jit_binary_lock_ro(header); } else { jit_data->ctx = ctx; jit_data->image = image_ptr; @@ -1334,6 +1337,13 @@ out: out_offset = -1; return prog; + +out_free: + bpf_jit_binary_free(header); + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + goto out_offset; } /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 6ffa29585194..cc26df907bfe 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -3,8 +3,8 @@ config M68K bool default y select ARCH_32BIT_OFF_T - select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if M68K_NONCOHERENT_DMA && !COLDFIRE @@ -18,7 +18,7 @@ config M68K select DMA_DIRECT_REMAP if M68K_NONCOHERENT_DMA && !COLDFIRE select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES - select GENERIC_IOMAP + select GENERIC_IOMAP if HAS_IOPORT select GENERIC_IRQ_SHOW select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 99718f3dc686..d4b170c861bf 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -836,7 +836,7 @@ static void amiga_get_hardware_list(struct seq_file *m) seq_printf(m, "\tZorro II%s AutoConfig: %d Expansion " "Device%s\n", AMIGAHW_PRESENT(ZORRO3) ? "I" : "", - zorro_num_autocon, zorro_num_autocon == 1 ? "" : "s"); + zorro_num_autocon, str_plural(zorro_num_autocon)); #endif /* CONFIG_ZORRO */ #undef AMIGAHW_ANNOUNCE diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index a6f6607efe79..6bb202d03d34 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -26,6 +26,7 @@ CONFIG_AMIGA_BUILTIN_SERIAL=y CONFIG_SERIAL_CONSOLE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_ATARI_PARTITION=y CONFIG_MAC_PARTITION=y @@ -217,7 +218,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -624,8 +624,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 0ca1f9f930bc..3598e0cc66b0 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -21,6 +21,7 @@ CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -213,7 +214,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -581,8 +581,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index be030659d8d7..3fbb8a9a307d 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -29,6 +29,7 @@ CONFIG_ATARI_ETHERNEC=y CONFIG_ATARI_DSP56K=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_MAC_PARTITION=y @@ -220,7 +221,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -601,8 +601,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index ad8f81fbb630..9a2b0c7871ce 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -19,6 +19,7 @@ CONFIG_BVME6000=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -210,7 +211,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -573,8 +573,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index ff253b6accec..2408785e0e48 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -20,6 +20,7 @@ CONFIG_HP300=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -212,7 +213,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -583,8 +583,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index f92b866620a7..6789d03b7b52 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -20,6 +20,7 @@ CONFIG_MAC=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -211,7 +212,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -600,8 +600,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index bd813beaa8a7..b57af39db3b4 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -44,6 +44,7 @@ CONFIG_AMIGA_BUILTIN_SERIAL=y CONFIG_SERIAL_CONSOLE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_BSD_DISKLABEL=y CONFIG_MINIX_SUBPARTITION=y @@ -231,7 +232,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -686,8 +686,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 2237ee0fe433..29b70f27aedd 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -18,6 +18,7 @@ CONFIG_MVME147=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -209,7 +210,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -572,8 +572,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index afb5aa9c5012..757c582ffe84 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -19,6 +19,7 @@ CONFIG_MVME16x=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -210,7 +211,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -573,8 +573,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index e40f7a308966..f15d1009108a 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -19,6 +19,7 @@ CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -211,7 +212,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -590,8 +590,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 4df397c0395f..5218c1e614b5 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -16,6 +16,7 @@ CONFIG_SUN3=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -206,7 +207,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -570,8 +570,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index aa7719b3947f..acdf4eb7af28 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -16,6 +16,7 @@ CONFIG_SUN3X=y CONFIG_PROC_HARDWARE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_TRIM_UNUSED_KSYMS=y CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y CONFIG_ATARI_PARTITION=y @@ -207,7 +208,6 @@ CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m @@ -571,8 +571,6 @@ CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m -CONFIG_STRING_SELFTEST=m -CONFIG_TEST_STRING_HELPERS=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m diff --git a/arch/m68k/include/asm/pgtable.h b/arch/m68k/include/asm/pgtable.h index 27525c6a12fd..49fcfd734860 100644 --- a/arch/m68k/include/asm/pgtable.h +++ b/arch/m68k/include/asm/pgtable.h @@ -2,6 +2,8 @@ #ifndef __M68K_PGTABLE_H #define __M68K_PGTABLE_H +#include <asm/page.h> + #ifdef __uClinux__ #include <asm/pgtable_no.h> #else diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 31be2ad999ca..3e31adbddc75 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -12,14 +12,15 @@ */ #if PAGE_SHIFT < 13 #ifdef CONFIG_4KSTACKS -#define THREAD_SIZE 4096 +#define THREAD_SIZE_ORDER 0 #else -#define THREAD_SIZE 8192 +#define THREAD_SIZE_ORDER 1 #endif #else -#define THREAD_SIZE PAGE_SIZE +#define THREAD_SIZE_ORDER 0 #endif -#define THREAD_SIZE_ORDER ((THREAD_SIZE / PAGE_SIZE) - 1) + +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 3bcdd32a6b36..338b474910f7 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -430,7 +430,9 @@ resume: movec %a0,%dfc /* restore status register */ - movew %a1@(TASK_THREAD+THREAD_SR),%sr + movew %a1@(TASK_THREAD+THREAD_SR),%d0 + oriw #0x0700,%d0 + movew %d0,%sr rts diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 4c8f8cbfa05f..e7f0f72c1b36 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -453,30 +453,18 @@ void mac_poweroff(void) void mac_reset(void) { - if (macintosh_config->adb_type == MAC_ADB_II && - macintosh_config->ident != MAC_MODEL_SE30) { - /* need ROMBASE in booter */ - /* indeed, plus need to MAP THE ROM !! */ - - if (mac_bi_data.rombase == 0) - mac_bi_data.rombase = 0x40800000; - - /* works on some */ - rom_reset = (void *) (mac_bi_data.rombase + 0xa); - - local_irq_disable(); - rom_reset(); #ifdef CONFIG_ADB_CUDA - } else if (macintosh_config->adb_type == MAC_ADB_EGRET || - macintosh_config->adb_type == MAC_ADB_CUDA) { + if (macintosh_config->adb_type == MAC_ADB_EGRET || + macintosh_config->adb_type == MAC_ADB_CUDA) { cuda_restart(); + } else #endif #ifdef CONFIG_ADB_PMU - } else if (macintosh_config->adb_type == MAC_ADB_PB2) { + if (macintosh_config->adb_type == MAC_ADB_PB2) { pmu_restart(); + } else #endif - } else if (CPU_IS_030) { - + if (CPU_IS_030) { /* 030-specific reset routine. The idea is general, but the * specific registers to reset are '030-specific. Until I * have a non-030 machine, I can't test anything else. @@ -524,6 +512,18 @@ void mac_reset(void) "jmp %/a0@\n\t" /* jump to the reset vector */ ".chip 68k" : : "r" (offset), "a" (rombase) : "a0"); + } else { + /* need ROMBASE in booter */ + /* indeed, plus need to MAP THE ROM !! */ + + if (mac_bi_data.rombase == 0) + mac_bi_data.rombase = 0x40800000; + + /* works on some */ + rom_reset = (void *)(mac_bi_data.rombase + 0xa); + + local_irq_disable(); + rom_reset(); } /* should never get here */ diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 4da7bc4ac4a3..176314f3c9aa 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -4,7 +4,7 @@ CONFIG_AUDIT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_EXPERT=y -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y CONFIG_KALLSYMS_ALL=y CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1 CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1 diff --git a/arch/mips/configs/rs90_defconfig b/arch/mips/configs/rs90_defconfig index 4b9e36d6400e..a53dd66e9b86 100644 --- a/arch/mips/configs/rs90_defconfig +++ b/arch/mips/configs/rs90_defconfig @@ -9,7 +9,7 @@ CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y # CONFIG_SGETMASK_SYSCALL is not set # CONFIG_SYSFS_SYSCALL is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_TIMERFD is not set # CONFIG_AIO is not set # CONFIG_IO_URING is not set diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index 6c3704f51d0d..87f0a1436bf9 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -756,7 +756,7 @@ void __init arch_init_irq(void) NULL)) pr_err("Failed to register fpu interrupt\n"); desc_fpu = irq_to_desc(irq_fpu); - fpu_kstat_irq = this_cpu_ptr(desc_fpu->kstat_irqs); + fpu_kstat_irq = this_cpu_ptr(&desc_fpu->kstat_irqs->cnt); } if (dec_interrupt[DEC_IRQ_CASCADE] >= 0) { if (request_irq(dec_interrupt[DEC_IRQ_CASCADE], no_action, diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c index a40d926b6513..e355dfca4400 100644 --- a/arch/mips/net/bpf_jit_comp.c +++ b/arch/mips/net/bpf_jit_comp.c @@ -1012,7 +1012,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]); /* Set as read-only exec and flush instruction cache */ - bpf_jit_binary_lock_ro(header); + if (bpf_jit_binary_lock_ro(header)) + goto out_err; flush_icache_range((unsigned long)header, (unsigned long)&ctx.target[ctx.jit_index]); diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 3586cda55bde..69c0258700b2 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -188,6 +188,15 @@ config SMP If you don't know what to do here, say N. +config FPU + bool "FPU support" + default y + help + Say N here if you want to disable all floating-point related procedures + in the kernel and reduce binary size. + + If you don't know what to do here, say Y. + source "kernel/Kconfig.hz" config OPENRISC_NO_SPR_SR_DSX diff --git a/arch/openrisc/include/asm/fpu.h b/arch/openrisc/include/asm/fpu.h new file mode 100644 index 000000000000..57bc44d80d53 --- /dev/null +++ b/arch/openrisc/include/asm/fpu.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_OPENRISC_FPU_H +#define __ASM_OPENRISC_FPU_H + +struct task_struct; + +#ifdef CONFIG_FPU +static inline void save_fpu(struct task_struct *task) +{ + task->thread.fpcsr = mfspr(SPR_FPCSR); +} + +static inline void restore_fpu(struct task_struct *task) +{ + mtspr(SPR_FPCSR, task->thread.fpcsr); +} +#else +#define save_fpu(tsk) do { } while (0) +#define restore_fpu(tsk) do { } while (0) +#endif + +#endif /* __ASM_OPENRISC_FPU_H */ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 3b736e74e6ed..e05d1b59e24e 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -44,6 +44,7 @@ struct task_struct; struct thread_struct { + long fpcsr; /* Floating point control status register. */ }; /* diff --git a/arch/openrisc/include/asm/ptrace.h b/arch/openrisc/include/asm/ptrace.h index 375147ff71fc..1da3e66292e2 100644 --- a/arch/openrisc/include/asm/ptrace.h +++ b/arch/openrisc/include/asm/ptrace.h @@ -59,7 +59,7 @@ struct pt_regs { * -1 for all other exceptions. */ long orig_gpr11; /* For restarting system calls */ - long fpcsr; /* Floating point control status register. */ + long dummy; /* Cheap alignment fix */ long dummy2; /* Cheap alignment fix */ }; @@ -115,6 +115,5 @@ static inline long regs_return_value(struct pt_regs *regs) #define PT_GPR31 124 #define PT_PC 128 #define PT_ORIG_GPR11 132 -#define PT_FPCSR 136 #endif /* __ASM_OPENRISC_PTRACE_H */ diff --git a/arch/openrisc/include/uapi/asm/elf.h b/arch/openrisc/include/uapi/asm/elf.h index 6868f81c281e..441e343f8268 100644 --- a/arch/openrisc/include/uapi/asm/elf.h +++ b/arch/openrisc/include/uapi/asm/elf.h @@ -34,15 +34,72 @@ #include <asm/ptrace.h> /* The OR1K relocation types... not all relevant for module loader */ -#define R_OR32_NONE 0 -#define R_OR32_32 1 -#define R_OR32_16 2 -#define R_OR32_8 3 -#define R_OR32_CONST 4 -#define R_OR32_CONSTH 5 -#define R_OR32_JUMPTARG 6 -#define R_OR32_VTINHERIT 7 -#define R_OR32_VTENTRY 8 +#define R_OR1K_NONE 0 +#define R_OR1K_32 1 +#define R_OR1K_16 2 +#define R_OR1K_8 3 +#define R_OR1K_LO_16_IN_INSN 4 +#define R_OR1K_HI_16_IN_INSN 5 +#define R_OR1K_INSN_REL_26 6 +#define R_OR1K_GNU_VTENTRY 7 +#define R_OR1K_GNU_VTINHERIT 8 +#define R_OR1K_32_PCREL 9 +#define R_OR1K_16_PCREL 10 +#define R_OR1K_8_PCREL 11 +#define R_OR1K_GOTPC_HI16 12 +#define R_OR1K_GOTPC_LO16 13 +#define R_OR1K_GOT16 14 +#define R_OR1K_PLT26 15 +#define R_OR1K_GOTOFF_HI16 16 +#define R_OR1K_GOTOFF_LO16 17 +#define R_OR1K_COPY 18 +#define R_OR1K_GLOB_DAT 19 +#define R_OR1K_JMP_SLOT 20 +#define R_OR1K_RELATIVE 21 +#define R_OR1K_TLS_GD_HI16 22 +#define R_OR1K_TLS_GD_LO16 23 +#define R_OR1K_TLS_LDM_HI16 24 +#define R_OR1K_TLS_LDM_LO16 25 +#define R_OR1K_TLS_LDO_HI16 26 +#define R_OR1K_TLS_LDO_LO16 27 +#define R_OR1K_TLS_IE_HI16 28 +#define R_OR1K_TLS_IE_LO16 29 +#define R_OR1K_TLS_LE_HI16 30 +#define R_OR1K_TLS_LE_LO16 31 +#define R_OR1K_TLS_TPOFF 32 +#define R_OR1K_TLS_DTPOFF 33 +#define R_OR1K_TLS_DTPMOD 34 +#define R_OR1K_AHI16 35 +#define R_OR1K_GOTOFF_AHI16 36 +#define R_OR1K_TLS_IE_AHI16 37 +#define R_OR1K_TLS_LE_AHI16 38 +#define R_OR1K_SLO16 39 +#define R_OR1K_GOTOFF_SLO16 40 +#define R_OR1K_TLS_LE_SLO16 41 +#define R_OR1K_PCREL_PG21 42 +#define R_OR1K_GOT_PG21 43 +#define R_OR1K_TLS_GD_PG21 44 +#define R_OR1K_TLS_LDM_PG21 45 +#define R_OR1K_TLS_IE_PG21 46 +#define R_OR1K_LO13 47 +#define R_OR1K_GOT_LO13 48 +#define R_OR1K_TLS_GD_LO13 49 +#define R_OR1K_TLS_LDM_LO13 50 +#define R_OR1K_TLS_IE_LO13 51 +#define R_OR1K_SLO13 52 +#define R_OR1K_PLTA26 53 +#define R_OR1K_GOT_AHI16 54 + +/* Old relocation names */ +#define R_OR32_NONE R_OR1K_NONE +#define R_OR32_32 R_OR1K_32 +#define R_OR32_16 R_OR1K_16 +#define R_OR32_8 R_OR1K_8 +#define R_OR32_CONST R_OR1K_LO_16_IN_INSN +#define R_OR32_CONSTH R_OR1K_HI_16_IN_INSN +#define R_OR32_JUMPTARG R_OR1K_INSN_REL_26 +#define R_OR32_VTENTRY R_OR1K_GNU_VTENTRY +#define R_OR32_VTINHERIT R_OR1K_GNU_VTINHERIT typedef unsigned long elf_greg_t; diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index c9f48e750b72..440711d7bf40 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -106,8 +106,6 @@ l.mtspr r0,r3,SPR_EPCR_BASE ;\ l.lwz r3,PT_SR(r1) ;\ l.mtspr r0,r3,SPR_ESR_BASE ;\ - l.lwz r3,PT_FPCSR(r1) ;\ - l.mtspr r0,r3,SPR_FPCSR ;\ l.lwz r2,PT_GPR2(r1) ;\ l.lwz r3,PT_GPR3(r1) ;\ l.lwz r4,PT_GPR4(r1) ;\ @@ -177,8 +175,6 @@ handler: ;\ /* r30 already save */ ;\ l.sw PT_GPR31(r1),r31 ;\ TRACE_IRQS_OFF_ENTRY ;\ - l.mfspr r30,r0,SPR_FPCSR ;\ - l.sw PT_FPCSR(r1),r30 ;\ /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\ l.addi r30,r0,-1 ;\ l.sw PT_ORIG_GPR11(r1),r30 @@ -219,8 +215,6 @@ handler: ;\ /* Store -1 in orig_gpr11 for non-syscall exceptions */ ;\ l.addi r30,r0,-1 ;\ l.sw PT_ORIG_GPR11(r1),r30 ;\ - l.mfspr r30,r0,SPR_FPCSR ;\ - l.sw PT_FPCSR(r1),r30 ;\ l.addi r3,r1,0 ;\ /* r4 is exception EA */ ;\ l.addi r5,r0,vector ;\ @@ -852,6 +846,7 @@ _syscall_badsys: EXCEPTION_ENTRY(_fpe_trap_handler) CLEAR_LWA_FLAG(r3) + /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_fpe_trap l.addi r3,r1,0 /* pt_regs */ @@ -1100,10 +1095,6 @@ ENTRY(_switch) l.sw PT_GPR28(r1),r28 l.sw PT_GPR30(r1),r30 - /* Store the old FPU state to new pt_regs */ - l.mfspr r29,r0,SPR_FPCSR - l.sw PT_FPCSR(r1),r29 - l.addi r11,r10,0 /* Save old 'current' to 'last' return value*/ /* We use thread_info->ksp for storing the address of the above @@ -1126,10 +1117,6 @@ ENTRY(_switch) l.lwz r29,PT_SP(r1) l.sw TI_KSP(r10),r29 - /* Restore the old value of FPCSR */ - l.lwz r29,PT_FPCSR(r1) - l.mtspr r0,r29,SPR_FPCSR - /* ...and restore the registers, except r11 because the return value * has already been set above. */ diff --git a/arch/openrisc/kernel/module.c b/arch/openrisc/kernel/module.c index 532013f523ac..c9ff4c4a0b29 100644 --- a/arch/openrisc/kernel/module.c +++ b/arch/openrisc/kernel/module.c @@ -39,22 +39,32 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, value = sym->st_value + rel[i].r_addend; switch (ELF32_R_TYPE(rel[i].r_info)) { - case R_OR32_32: + case R_OR1K_32: *location = value; break; - case R_OR32_CONST: + case R_OR1K_LO_16_IN_INSN: *((uint16_t *)location + 1) = value; break; - case R_OR32_CONSTH: + case R_OR1K_HI_16_IN_INSN: *((uint16_t *)location + 1) = value >> 16; break; - case R_OR32_JUMPTARG: + case R_OR1K_INSN_REL_26: value -= (uint32_t)location; value >>= 2; value &= 0x03ffffff; value |= *location & 0xfc000000; *location = value; break; + case R_OR1K_AHI16: + /* Adjust the operand to match with a signed LO16. */ + value += 0x8000; + *((uint16_t *)location + 1) = value >> 16; + break; + case R_OR1K_SLO16: + /* Split value lower 16-bits. */ + value = ((value & 0xf800) << 10) | (value & 0x7ff); + *location = (*location & ~0x3e007ff) | value; + break; default: pr_err("module %s: Unknown relocation: %u\n", me->name, ELF32_R_TYPE(rel[i].r_info)); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 86e02929f3ac..eef99fee2110 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -36,6 +36,7 @@ #include <linux/reboot.h> #include <linux/uaccess.h> +#include <asm/fpu.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/spr_defs.h> @@ -65,7 +66,7 @@ void machine_restart(char *cmd) } /* - * This is used if pm_power_off has not been set by a power management + * This is used if a sys-off handler was not set by a power management * driver, in this case we can assume we are on a simulator. On * OpenRISC simulators l.nop 1 will trigger the simulator exit. */ @@ -89,10 +90,8 @@ void machine_halt(void) void machine_power_off(void) { printk(KERN_INFO "*** MACHINE POWER OFF ***\n"); - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } /* @@ -246,6 +245,8 @@ struct task_struct *__switch_to(struct task_struct *old, local_irq_save(flags); + save_fpu(current); + /* current_set is an array of saved current pointers * (one for each cpu). we need them at user->kernel transition, * while we save them at kernel->user transition @@ -258,6 +259,8 @@ struct task_struct *__switch_to(struct task_struct *old, current_thread_info_set[smp_processor_id()] = new_ti; last = (_switch(old_ti, new_ti))->task; + restore_fpu(current); + local_irq_restore(flags); return last; diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 1eeac3b62e9d..5091b18eab4c 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -88,6 +88,7 @@ static int genregs_set(struct task_struct *target, return ret; } +#ifdef CONFIG_FPU /* * As OpenRISC shares GPRs and floating point registers we don't need to export * the floating point registers again. So here we only export the fpcsr special @@ -97,9 +98,7 @@ static int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - const struct pt_regs *regs = task_pt_regs(target); - - return membuf_store(&to, regs->fpcsr); + return membuf_store(&to, target->thread.fpcsr); } static int fpregs_set(struct task_struct *target, @@ -107,21 +106,20 @@ static int fpregs_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct pt_regs *regs = task_pt_regs(target); - int ret; - /* FPCSR */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s->fpcsr, 0, 4); - return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpcsr, 0, 4); } +#endif /* * Define the register sets available on OpenRISC under Linux */ enum or1k_regset { REGSET_GENERAL, +#ifdef CONFIG_FPU REGSET_FPU, +#endif }; static const struct user_regset or1k_regsets[] = { @@ -133,6 +131,7 @@ static const struct user_regset or1k_regsets[] = { .regset_get = genregs_get, .set = genregs_set, }, +#ifdef CONFIG_FPU [REGSET_FPU] = { .core_note_type = NT_PRFPREG, .n = sizeof(struct __or1k_fpu_state) / sizeof(long), @@ -141,6 +140,7 @@ static const struct user_regset or1k_regsets[] = { .regset_get = fpregs_get, .set = fpregs_set, }, +#endif }; static const struct user_regset_view user_or1k_native_view = { diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index e2f21a5d8ad9..c7ab42e2cb7a 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -23,6 +23,7 @@ #include <linux/stddef.h> #include <linux/resume_user_mode.h> +#include <asm/fpu.h> #include <asm/processor.h> #include <asm/syscall.h> #include <asm/ucontext.h> @@ -39,6 +40,37 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs); asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall); +#ifdef CONFIG_FPU +static long restore_fp_state(struct sigcontext __user *sc) +{ + long err; + + err = __copy_from_user(¤t->thread.fpcsr, &sc->fpcsr, sizeof(unsigned long)); + if (unlikely(err)) + return err; + + /* Restore the FPU state */ + restore_fpu(current); + + return 0; +} + +static long save_fp_state(struct sigcontext __user *sc) +{ + long err; + + /* Sync the user FPU state so we can copy to sigcontext */ + save_fpu(current); + + err = __copy_to_user(&sc->fpcsr, ¤t->thread.fpcsr, sizeof(unsigned long)); + + return err; +} +#else +#define save_fp_state(sc) (0) +#define restore_fp_state(sc) (0) +#endif + static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { @@ -55,7 +87,7 @@ static int restore_sigcontext(struct pt_regs *regs, err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)); err |= __copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long)); err |= __copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long)); - err |= __copy_from_user(®s->fpcsr, &sc->fpcsr, sizeof(unsigned long)); + err |= restore_fp_state(sc); /* make sure the SM-bit is cleared so user-mode cannot fool us */ regs->sr &= ~SPR_SR_SM; @@ -118,7 +150,7 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.pc, ®s->pc, sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.sr, ®s->sr, sizeof(unsigned long)); - err |= __copy_to_user(&sc->fpcsr, ®s->fpcsr, sizeof(unsigned long)); + err |= save_fp_state(sc); return err; } diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 9370888c9a7e..c195be9cc9fc 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -31,6 +31,7 @@ #include <linux/uaccess.h> #include <asm/bug.h> +#include <asm/fpu.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/unwinder.h> @@ -51,16 +52,16 @@ static void print_trace(void *data, unsigned long addr, int reliable) { const char *loglvl = data; - printk("%s[<%p>] %s%pS\n", loglvl, (void *) addr, reliable ? "" : "? ", - (void *) addr); + pr_info("%s[<%p>] %s%pS\n", loglvl, (void *) addr, reliable ? "" : "? ", + (void *) addr); } static void print_data(unsigned long base_addr, unsigned long word, int i) { if (i == 0) - printk("(%08lx:)\t%08lx", base_addr + (i * 4), word); + pr_info("(%08lx:)\t%08lx", base_addr + (i * 4), word); else - printk(" %08lx:\t%08lx", base_addr + (i * 4), word); + pr_info(" %08lx:\t%08lx", base_addr + (i * 4), word); } /* displays a short stack trace */ @@ -69,7 +70,7 @@ void show_stack(struct task_struct *task, unsigned long *esp, const char *loglvl if (esp == NULL) esp = (unsigned long *)&esp; - printk("%sCall trace:\n", loglvl); + pr_info("%sCall trace:\n", loglvl); unwind_stack((void *)loglvl, esp, print_trace); } @@ -83,57 +84,56 @@ void show_registers(struct pt_regs *regs) if (user_mode(regs)) in_kernel = 0; - printk("CPU #: %d\n" - " PC: %08lx SR: %08lx SP: %08lx FPCSR: %08lx\n", - smp_processor_id(), regs->pc, regs->sr, regs->sp, - regs->fpcsr); - printk("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n", - 0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]); - printk("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n", - regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]); - printk("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n", - regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]); - printk("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n", - regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]); - printk("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n", - regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]); - printk("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n", - regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]); - printk("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n", - regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]); - printk("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n", - regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]); - printk(" RES: %08lx oGPR11: %08lx\n", - regs->gpr[11], regs->orig_gpr11); - - printk("Process %s (pid: %d, stackpage=%08lx)\n", - current->comm, current->pid, (unsigned long)current); + pr_info("CPU #: %d\n" + " PC: %08lx SR: %08lx SP: %08lx\n", + smp_processor_id(), regs->pc, regs->sr, regs->sp); + pr_info("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n", + 0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]); + pr_info("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n", + regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]); + pr_info("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n", + regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]); + pr_info("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n", + regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]); + pr_info("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n", + regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]); + pr_info("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n", + regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]); + pr_info("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n", + regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]); + pr_info("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n", + regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]); + pr_info(" RES: %08lx oGPR11: %08lx\n", + regs->gpr[11], regs->orig_gpr11); + + pr_info("Process %s (pid: %d, stackpage=%08lx)\n", + current->comm, current->pid, (unsigned long)current); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ if (in_kernel) { - printk("\nStack: "); + pr_info("\nStack: "); show_stack(NULL, (unsigned long *)esp, KERN_EMERG); if (esp < PAGE_OFFSET) goto bad_stack; - printk("\n"); + pr_info("\n"); for (i = -8; i < 24; i += 1) { unsigned long word; if (__get_user(word, &((unsigned long *)esp)[i])) { bad_stack: - printk(" Bad Stack value."); + pr_info(" Bad Stack value."); break; } print_data(esp, word, i); } - printk("\nCode: "); + pr_info("\nCode: "); if (regs->pc < PAGE_OFFSET) goto bad; @@ -142,14 +142,14 @@ bad_stack: if (__get_user(word, &((unsigned long *)regs->pc)[i])) { bad: - printk(" Bad PC value."); + pr_info(" Bad PC value."); break; } print_data(regs->pc, word, i); } } - printk("\n"); + pr_info("\n"); } /* This is normally the 'Oops' routine */ @@ -157,10 +157,10 @@ void __noreturn die(const char *str, struct pt_regs *regs, long err) { console_verbose(); - printk("\n%s#: %04lx\n", str, err & 0xffff); + pr_emerg("\n%s#: %04lx\n", str, err & 0xffff); show_registers(regs); #ifdef CONFIG_JUMP_UPON_UNHANDLED_EXCEPTION - printk("\n\nUNHANDLED_EXCEPTION: entering infinite loop\n"); + pr_emerg("\n\nUNHANDLED_EXCEPTION: entering infinite loop\n"); /* shut down interrupts */ local_irq_disable(); @@ -173,36 +173,51 @@ void __noreturn die(const char *str, struct pt_regs *regs, long err) asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector) { - printk("Unable to handle exception at EA =0x%x, vector 0x%x", - ea, vector); + pr_emerg("Unable to handle exception at EA =0x%x, vector 0x%x", + ea, vector); die("Oops", regs, 9); } asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address) { - int code = FPE_FLTUNK; - unsigned long fpcsr = regs->fpcsr; - - if (fpcsr & SPR_FPCSR_IVF) - code = FPE_FLTINV; - else if (fpcsr & SPR_FPCSR_OVF) - code = FPE_FLTOVF; - else if (fpcsr & SPR_FPCSR_UNF) - code = FPE_FLTUND; - else if (fpcsr & SPR_FPCSR_DZF) - code = FPE_FLTDIV; - else if (fpcsr & SPR_FPCSR_IXF) - code = FPE_FLTRES; - - /* Clear all flags */ - regs->fpcsr &= ~SPR_FPCSR_ALLF; - - force_sig_fault(SIGFPE, code, (void __user *)regs->pc); + if (user_mode(regs)) { + int code = FPE_FLTUNK; +#ifdef CONFIG_FPU + unsigned long fpcsr; + + save_fpu(current); + fpcsr = current->thread.fpcsr; + + if (fpcsr & SPR_FPCSR_IVF) + code = FPE_FLTINV; + else if (fpcsr & SPR_FPCSR_OVF) + code = FPE_FLTOVF; + else if (fpcsr & SPR_FPCSR_UNF) + code = FPE_FLTUND; + else if (fpcsr & SPR_FPCSR_DZF) + code = FPE_FLTDIV; + else if (fpcsr & SPR_FPCSR_IXF) + code = FPE_FLTRES; + + /* Clear all flags */ + current->thread.fpcsr &= ~SPR_FPCSR_ALLF; + restore_fpu(current); +#endif + force_sig_fault(SIGFPE, code, (void __user *)regs->pc); + } else { + pr_emerg("KERNEL: Illegal fpe exception 0x%.8lx\n", regs->pc); + die("Die:", regs, SIGFPE); + } } asmlinkage void do_trap(struct pt_regs *regs, unsigned long address) { - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc); + if (user_mode(regs)) { + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc); + } else { + pr_emerg("KERNEL: Illegal trap exception 0x%.8lx\n", regs->pc); + die("Die:", regs, SIGILL); + } } asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) @@ -211,8 +226,7 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) /* Send a SIGBUS */ force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address); } else { - printk("KERNEL: Unaligned Access 0x%.8lx\n", address); - show_registers(regs); + pr_emerg("KERNEL: Unaligned Access 0x%.8lx\n", address); die("Die:", regs, address); } @@ -224,8 +238,7 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) /* Send a SIGBUS */ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } else { /* Kernel mode */ - printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address); - show_registers(regs); + pr_emerg("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address); die("Die:", regs, address); } } @@ -419,9 +432,8 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs, /* Send a SIGILL */ force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address); } else { /* Kernel mode */ - printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n", - address); - show_registers(regs); + pr_emerg("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n", + address); die("Die:", regs, address); } } diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index ee4febb30386..5ce258f3fffa 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -131,7 +131,7 @@ CONFIG_PPDEV=m CONFIG_I2C=y CONFIG_HWMON=m CONFIG_DRM=m -CONFIG_DRM_DP_CEC=y +CONFIG_DRM_DISPLAY_DP_AUX_CEC=y # CONFIG_DRM_I2C_CH7006 is not set # CONFIG_DRM_I2C_SIL164 is not set CONFIG_DRM_RADEON=m diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 444154271f23..800eb64e91ad 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -344,7 +344,7 @@ static int smp_boot_one_cpu(int cpuid, struct task_struct *idle) struct irq_desc *desc = irq_to_desc(i); if (desc && desc->kstat_irqs) - *per_cpu_ptr(desc->kstat_irqs, cpuid) = 0; + *per_cpu_ptr(desc->kstat_irqs, cpuid) = (struct irqstat) { }; } #endif diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c index d6ee2fd45550..979f45d4d1fb 100644 --- a/arch/parisc/net/bpf_jit_core.c +++ b/arch/parisc/net/bpf_jit_core.c @@ -167,7 +167,13 @@ skip_init_ctx: bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); if (!prog->is_func || extra_pass) { - bpf_jit_binary_lock_ro(jit_data->header); + if (bpf_jit_binary_lock_ro(jit_data->header)) { + bpf_jit_binary_free(jit_data->header); + prog->bpf_func = NULL; + prog->jited = 0; + prog->jited_len = 0; + goto out_offset; + } prologue_len = ctx->epilogue_offset - ctx->body_len; for (i = 0; i < prog->len; i++) ctx->offset[i] += prologue_len; diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 7f35d5bc1229..97f4d4851735 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -4,7 +4,7 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index a98ef6a4abef..50cc59eb36cf 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 5c56d36cdfc5..6f449411abf7 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 56b876e418e9..77306be62e9e 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -7,7 +7,7 @@ CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 083c2e57520a..383c0966e92f 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -6,7 +6,7 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_MODULES=y diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 78302f6c2580..c6390890a60c 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -13,6 +13,17 @@ #define VDSO_HAS_TIME 1 +/* + * powerpc specific delta calculation. + * + * This variant removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +#define VDSO_DELTA_NOMASK 1 + static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3, const unsigned long _r4) { @@ -104,21 +115,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd) } #define vdso_clocksource_ok vdso_clocksource_ok -/* - * powerpc specific delta calculation. - * - * This variant removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. - */ -static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return (cycles - last) * mult; -} -#define vdso_calc_delta vdso_calc_delta - #ifndef __powerpc64__ static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index e42984878503..f2636414d82a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -837,7 +837,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr) */ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) { - this_cpu_inc_rm(desc->kstat_irqs); + this_cpu_inc_rm(&desc->kstat_irqs->cnt); __this_cpu_inc(kstat.irqs_sum); } diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index be09c8836d56..9e87287942dc 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -173,6 +173,8 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI select RISCV_ALTERNATIVE if !XIP_KERNEL + select RISCV_APLIC + select RISCV_IMSIC select RISCV_INTC select RISCV_TIMER if RISCV_SBI select SIFIVE_PLIC diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 2552e78074a3..af9601da4643 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -11,7 +11,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 8f67fb830585..dd460c649152 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=13 CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index de8143d1f738..d4b03dc3c2c0 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -10,7 +10,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set # CONFIG_TIMERFD is not set diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index f4b6b3b9edda..fdbf88ca8b70 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -81,6 +81,8 @@ struct rv_jit_context { int nexentries; unsigned long flags; int stack_size; + u64 arena_vm_start; + u64 user_vm_start; }; /* Convert from ninsns to bytes. */ @@ -606,7 +608,7 @@ static inline u32 rv_nop(void) return rv_i_insn(0, 0, 0, 0, 0x13); } -/* RVC instrutions. */ +/* RVC instructions. */ static inline u16 rvc_addi4spn(u8 rd, u32 imm10) { @@ -735,7 +737,7 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2) return rv_css_insn(0x6, imm, rs2, 0x2); } -/* RVZBB instrutions. */ +/* RVZBB instructions. */ static inline u32 rvzbb_sextb(u8 rd, u8 rs1) { return rv_i_insn(0x604, rs1, 1, rd, 0x13); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index ec9d692838fc..79a001d5533e 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -12,12 +12,14 @@ #include <linux/stop_machine.h> #include <asm/patch.h> #include <asm/cfi.h> +#include <asm/percpu.h> #include "bpf_jit.h" #define RV_FENTRY_NINSNS 2 #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ +#define RV_REG_ARENA RV_REG_S7 /* For storing arena_vm_start */ static const int regmap[] = { [BPF_REG_0] = RV_REG_A5, @@ -255,6 +257,10 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); store_offset -= 8; } + if (ctx->arena_vm_start) { + emit_ld(RV_REG_ARENA, store_offset, RV_REG_SP, ctx); + store_offset -= 8; + } emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ @@ -498,33 +504,33 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, break; /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ case BPF_ADD | BPF_FETCH: - emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : - rv_amoadd_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) : + rv_amoadd_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; case BPF_AND | BPF_FETCH: - emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : - rv_amoand_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) : + rv_amoand_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; case BPF_OR | BPF_FETCH: - emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : - rv_amoor_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) : + rv_amoor_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; case BPF_XOR | BPF_FETCH: - emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : - rv_amoxor_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) : + rv_amoxor_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ case BPF_XCHG: - emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : - rv_amoswap_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) : + rv_amoswap_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; @@ -548,6 +554,7 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */ bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -555,7 +562,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); - *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; + if (regs_offset != REG_DONT_CLEAR_MARKER) + *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; regs->epc = (unsigned long)&ex->fixup - offset; return true; @@ -572,7 +580,8 @@ static int add_exception_handler(const struct bpf_insn *insn, off_t fixup_offset; if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable || - (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32)) return 0; if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) @@ -1073,6 +1082,33 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + emit_mv(RV_REG_T1, rs, ctx); + emit_zextw(RV_REG_T1, RV_REG_T1, ctx); + emit_imm(rd, (ctx->user_vm_start >> 32) << 32, ctx); + emit(rv_beq(RV_REG_T1, RV_REG_ZERO, 4), ctx); + emit_or(RV_REG_T1, rd, RV_REG_T1, ctx); + emit_mv(rd, RV_REG_T1, ctx); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (rd != rs) + emit_mv(rd, rs, ctx); +#ifdef CONFIG_SMP + /* Load current CPU number in T1 */ + emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + /* << 3 because offsets are 8 bytes */ + emit_slli(RV_REG_T1, RV_REG_T1, 3, ctx); + /* Load address of __per_cpu_offset array in T2 */ + emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); + /* Add offset of current CPU to __per_cpu_offset */ + emit_add(RV_REG_T1, RV_REG_T2, RV_REG_T1, ctx); + /* Load __per_cpu_offset[cpu] in T1 */ + emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); + /* Add the offset to Rd */ + emit_add(rd, rd, RV_REG_T1, ctx); +#endif + } if (imm == 1) { /* Special mov32 for zext */ emit_zextw(rd, rd, ctx); @@ -1457,6 +1493,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool fixed_addr; u64 addr; + /* Inline calls to bpf_get_smp_processor_id() + * + * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is + * at offset 0 in task_struct. + * Load cpu from thread_info: + * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu + * + * This replicates the implementation of raw_smp_processor_id() on RISCV + */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + /* Load current CPU number in R0 */ + emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); @@ -1539,6 +1591,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + RV_REG_ARENA + off) */ + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: { int insn_len, insns_start; bool sign_ext; @@ -1546,6 +1603,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || BPF_MODE(insn->code) == BPF_PROBE_MEMSX; + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx); + rs = RV_REG_T2; + } + switch (BPF_SIZE(code)) { case BPF_B: if (is_12b_int(off)) { @@ -1682,6 +1744,86 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); break; + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + { + int insn_len, insns_start; + + emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx); + rd = RV_REG_T3; + + /* Load imm to a register then store it */ + emit_imm(RV_REG_T1, imm, ctx); + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sb(rd, off, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sh(rd, off, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sw(rd, off, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sd(rd, off, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, + insn_len); + if (ret) + return ret; + + break; + } + /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_B: if (is_12b_int(off)) { @@ -1728,6 +1870,84 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_atomic(rd, rs, off, imm, BPF_SIZE(code) == BPF_DW, ctx); break; + + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + { + int insn_len, insns_start; + + emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx); + rd = RV_REG_T2; + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sb(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sb(RV_REG_T1, 0, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sh(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sh(RV_REG_T1, 0, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sw(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit_sw(RV_REG_T1, 0, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sd(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit_sd(RV_REG_T1, 0, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, + insn_len); + if (ret) + return ret; + + break; + } + default: pr_err("bpf-jit: unknown opcode %02x\n", code); return -EINVAL; @@ -1759,6 +1979,8 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) stack_adjust += 8; if (seen_reg(RV_REG_S6, ctx)) stack_adjust += 8; + if (ctx->arena_vm_start) + stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; @@ -1810,6 +2032,10 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); store_offset -= 8; } + if (ctx->arena_vm_start) { + emit_sd(RV_REG_SP, store_offset, RV_REG_ARENA, ctx); + store_offset -= 8; + } emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); @@ -1823,6 +2049,9 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); ctx->stack_size = stack_adjust; + + if (ctx->arena_vm_start) + emit_imm(RV_REG_ARENA, ctx->arena_vm_start, ctx); } void bpf_jit_build_epilogue(struct rv_jit_context *ctx) @@ -1839,3 +2068,23 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + default: + return false; + } +} diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index e238fdbd5dbc..0a96abdaca65 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -80,6 +80,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto skip_init_ctx; } + ctx->arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + ctx->user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx->prog = prog; ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); if (!ctx->offset) { diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index db84942eb78f..7937765ccfa5 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -6,16 +6,13 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#define VDSO_DELTA_NOMASK 1 + #include <asm/syscall.h> #include <asm/timex.h> #include <asm/unistd.h> #include <linux/compiler.h> -#define vdso_calc_delta __arch_vdso_calc_delta -static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) -{ - return (cycles - last) * mult; -} static __always_inline const struct vdso_data *__arch_get_vdso_data(void) { diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5af0402e94b8..4be8f5cadd02 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1427,8 +1427,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \ (insn->imm & BPF_FETCH) ? src_reg : REG_W0, \ src_reg, dst_reg, off); \ - if (is32 && (insn->imm & BPF_FETCH)) \ - EMIT_ZERO(src_reg); \ + if (insn->imm & BPF_FETCH) { \ + /* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \ + _EMIT2(0x07e0); \ + if (is32) \ + EMIT_ZERO(src_reg); \ + } \ } while (0) case BPF_ADD: case BPF_ADD | BPF_FETCH: @@ -2108,7 +2112,11 @@ skip_init_ctx: print_fn_code(jit.prg_buf, jit.size_prg); } if (!fp->is_func || extra_pass) { - bpf_jit_binary_lock_ro(header); + if (bpf_jit_binary_lock_ro(header)) { + bpf_jit_binary_free(header); + fp = orig_fp; + goto free_addrs; + } } else { jit_data->header = header; jit_data->ctx = jit; diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2ad3e29f0ebe..6bc60f964f96 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -125,7 +125,8 @@ config ARCH_HAS_ILOG2_U64 config NO_IOPORT_MAP def_bool !PCI - depends on !SH_SHMIN && !SH_HP6XX && !SH_SOLUTION_ENGINE + depends on !SH_SHMIN && !SH_HP6XX && !SH_SOLUTION_ENGINE && \ + !SH_DREAMCAST config IO_TRAPPED bool diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c index f39c8196efdf..689ea14a6678 100644 --- a/arch/sh/boards/board-sh7757lcr.c +++ b/arch/sh/boards/board-sh7757lcr.c @@ -569,7 +569,7 @@ static int __init sh7757lcr_devices_setup(void) arch_initcall(sh7757lcr_devices_setup); /* Initialize IRQ setting */ -void __init init_sh7757lcr_IRQ(void) +static void __init init_sh7757lcr_IRQ(void) { plat_irq_setup_pins(IRQ_MODE_IRQ7654); plat_irq_setup_pins(IRQ_MODE_IRQ3210); diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c index 77dad1e511b4..25c4968f0d8b 100644 --- a/arch/sh/boards/board-sh7785lcr.c +++ b/arch/sh/boards/board-sh7785lcr.c @@ -295,7 +295,7 @@ static int __init sh7785lcr_devices_setup(void) device_initcall(sh7785lcr_devices_setup); /* Initialize IRQ setting */ -void __init init_sh7785lcr_IRQ(void) +static void __init init_sh7785lcr_IRQ(void) { plat_irq_setup_pins(IRQ_MODE_IRQ7654); plat_irq_setup_pins(IRQ_MODE_IRQ3210); diff --git a/arch/sh/boards/mach-dreamcast/setup.c b/arch/sh/boards/mach-dreamcast/setup.c index 2d966c1c2cc1..daa8455549fa 100644 --- a/arch/sh/boards/mach-dreamcast/setup.c +++ b/arch/sh/boards/mach-dreamcast/setup.c @@ -25,10 +25,13 @@ #include <asm/irq.h> #include <asm/rtc.h> #include <asm/machvec.h> +#include <cpu/addrspace.h> #include <mach/sysasic.h> static void __init dreamcast_setup(char **cmdline_p) { + /* GAPS PCI bridge assumes P2 area relative addresses. */ + __set_io_port_base(P2SEG); } static struct sh_machine_vector mv_dreamcast __initmv = { diff --git a/arch/sh/boards/mach-highlander/pinmux-r7785rp.c b/arch/sh/boards/mach-highlander/pinmux-r7785rp.c index 703179faf652..689bd8732d9e 100644 --- a/arch/sh/boards/mach-highlander/pinmux-r7785rp.c +++ b/arch/sh/boards/mach-highlander/pinmux-r7785rp.c @@ -5,6 +5,7 @@ #include <linux/init.h> #include <linux/gpio.h> #include <cpu/sh7785.h> +#include <mach/highlander.h> void __init highlander_plat_pinmux_setup(void) { diff --git a/arch/sh/boards/mach-sh03/rtc.c b/arch/sh/boards/mach-sh03/rtc.c index 7fb474844a2d..bc6cf995128c 100644 --- a/arch/sh/boards/mach-sh03/rtc.c +++ b/arch/sh/boards/mach-sh03/rtc.c @@ -120,7 +120,7 @@ static int set_rtc_mmss(struct rtc_time *tm) return retval; } -int sh03_rtc_settimeofday(struct device *dev, struct rtc_time *tm) +static int sh03_rtc_settimeofday(struct device *dev, struct rtc_time *tm) { return set_rtc_mmss(tm); } diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index f7f3e618e85b..cc88cb8908cc 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -10,6 +10,8 @@ #include <linux/of_fdt.h> #include <linux/clocksource.h> #include <linux/irqchip.h> + +#include <asm/clock.h> #include <asm/machvec.h> #include <asm/rtc.h> diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile index 6c6c791a1d06..54efed53c891 100644 --- a/arch/sh/boot/compressed/Makefile +++ b/arch/sh/boot/compressed/Makefile @@ -5,7 +5,7 @@ # create a compressed vmlinux image from the original vmlinux # -OBJECTS := head_32.o misc.o cache.o piggy.o \ +OBJECTS := head_32.o misc.o piggy.o \ ashiftrt.o ashldi3.o ashrsi3.o ashlsi3.o lshrsi3.o targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \ diff --git a/arch/sh/boot/compressed/cache.c b/arch/sh/boot/compressed/cache.c deleted file mode 100644 index 31e04ff4841e..000000000000 --- a/arch/sh/boot/compressed/cache.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -int cache_control(unsigned int command) -{ - volatile unsigned int *p = (volatile unsigned int *) 0x80000000; - int i; - - for (i = 0; i < (32 * 1024); i += 32) { - (void)*p; - p += (32 / sizeof(int)); - } - - return 0; -} diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index ca05c99a3d5b..3690379cc86b 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -16,6 +16,8 @@ #include <asm/addrspace.h> #include <asm/page.h> +#include "misc.h" + /* * gzip declarations */ @@ -26,11 +28,6 @@ #undef memcpy #define memzero(s, n) memset ((s), 0, (n)) -/* cache.c */ -#define CACHE_ENABLE 0 -#define CACHE_DISABLE 1 -int cache_control(unsigned int command); - extern char input_data[]; extern int input_len; static unsigned char *output; @@ -139,8 +136,6 @@ void decompress_kernel(void) free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; puts("Uncompressing Linux... "); - cache_control(CACHE_ENABLE); __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); - cache_control(CACHE_DISABLE); puts("Ok, booting the kernel.\n"); } diff --git a/arch/sh/boot/compressed/misc.h b/arch/sh/boot/compressed/misc.h new file mode 100644 index 000000000000..2b4534faa305 --- /dev/null +++ b/arch/sh/boot/compressed/misc.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef MISC_H +#define MISC_H + +void arch_ftrace_ops_list_func(void); +void decompress_kernel(void); +void ftrace_stub(void); + +#endif /* MISC_H */ diff --git a/arch/sh/boot/dts/j2_mimas_v2.dts b/arch/sh/boot/dts/j2_mimas_v2.dts index fa9562f78d53..faf884f53804 100644 --- a/arch/sh/boot/dts/j2_mimas_v2.dts +++ b/arch/sh/boot/dts/j2_mimas_v2.dts @@ -71,8 +71,6 @@ #address-cells = <1>; #size-cells = <0>; - spi-max-frequency = <25000000>; - reg = <0x40 0x8>; sdcard@0 { diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index 9ee35269bee2..ab3bf72264df 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -6,7 +6,7 @@ # CONFIG_PRINTK is not set # CONFIG_BUG is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 14d0f5ead502..4765966fec99 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -4,7 +4,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_KALLSYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index dc854293da43..20f07aee5bde 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SHMEM is not set CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index c891945b8a90..00862d3c030d 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -7,7 +7,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y # CONFIG_BUG is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_SHMEM is not set CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index e078b193a78a..bfeb004f130e 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -5,7 +5,7 @@ CONFIG_LOG_BUF_SHIFT=14 # CONFIG_HOTPLUG is not set # CONFIG_BUG is not set # CONFIG_ELF_CORE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SHMEM is not set diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c index 89cd4a3b4cca..87e5a8928873 100644 --- a/arch/sh/drivers/dma/dma-api.c +++ b/arch/sh/drivers/dma/dma-api.c @@ -41,21 +41,6 @@ struct dma_info *get_dma_info(unsigned int chan) } EXPORT_SYMBOL(get_dma_info); -struct dma_info *get_dma_info_by_name(const char *dmac_name) -{ - struct dma_info *info; - - list_for_each_entry(info, ®istered_dmac_list, list) { - if (dmac_name && (strcmp(dmac_name, info->name) != 0)) - continue; - else - return info; - } - - return NULL; -} -EXPORT_SYMBOL(get_dma_info_by_name); - static unsigned int get_nr_channels(void) { struct dma_info *info; @@ -101,93 +86,6 @@ int get_dma_residue(unsigned int chan) } EXPORT_SYMBOL(get_dma_residue); -static int search_cap(const char **haystack, const char *needle) -{ - const char **p; - - for (p = haystack; *p; p++) - if (strcmp(*p, needle) == 0) - return 1; - - return 0; -} - -/** - * request_dma_bycap - Allocate a DMA channel based on its capabilities - * @dmac: List of DMA controllers to search - * @caps: List of capabilities - * - * Search all channels of all DMA controllers to find a channel which - * matches the requested capabilities. The result is the channel - * number if a match is found, or %-ENODEV if no match is found. - * - * Note that not all DMA controllers export capabilities, in which - * case they can never be allocated using this API, and so - * request_dma() must be used specifying the channel number. - */ -int request_dma_bycap(const char **dmac, const char **caps, const char *dev_id) -{ - unsigned int found = 0; - struct dma_info *info; - const char **p; - int i; - - BUG_ON(!dmac || !caps); - - list_for_each_entry(info, ®istered_dmac_list, list) - if (strcmp(*dmac, info->name) == 0) { - found = 1; - break; - } - - if (!found) - return -ENODEV; - - for (i = 0; i < info->nr_channels; i++) { - struct dma_channel *channel = &info->channels[i]; - - if (unlikely(!channel->caps)) - continue; - - for (p = caps; *p; p++) { - if (!search_cap(channel->caps, *p)) - break; - if (request_dma(channel->chan, dev_id) == 0) - return channel->chan; - } - } - - return -EINVAL; -} -EXPORT_SYMBOL(request_dma_bycap); - -int dmac_search_free_channel(const char *dev_id) -{ - struct dma_channel *channel = { 0 }; - struct dma_info *info = get_dma_info(0); - int i; - - for (i = 0; i < info->nr_channels; i++) { - channel = &info->channels[i]; - if (unlikely(!channel)) - return -ENODEV; - - if (atomic_read(&channel->busy) == 0) - break; - } - - if (info->ops->request) { - int result = info->ops->request(channel); - if (result) - return result; - - atomic_set(&channel->busy, 1); - return channel->chan; - } - - return -ENOSYS; -} - int request_dma(unsigned int chan, const char *dev_id) { struct dma_channel *channel = { 0 }; @@ -240,35 +138,6 @@ void dma_wait_for_completion(unsigned int chan) } EXPORT_SYMBOL(dma_wait_for_completion); -int register_chan_caps(const char *dmac, struct dma_chan_caps *caps) -{ - struct dma_info *info; - unsigned int found = 0; - int i; - - list_for_each_entry(info, ®istered_dmac_list, list) - if (strcmp(dmac, info->name) == 0) { - found = 1; - break; - } - - if (unlikely(!found)) - return -ENODEV; - - for (i = 0; i < info->nr_channels; i++, caps++) { - struct dma_channel *channel; - - if ((info->first_channel_nr + i) != caps->ch_num) - return -EINVAL; - - channel = &info->channels[i]; - channel->caps = caps->caplist; - } - - return 0; -} -EXPORT_SYMBOL(register_chan_caps); - void dma_configure_channel(unsigned int chan, unsigned long flags) { struct dma_info *info = get_dma_info(chan); @@ -294,18 +163,6 @@ int dma_xfer(unsigned int chan, unsigned long from, } EXPORT_SYMBOL(dma_xfer); -int dma_extend(unsigned int chan, unsigned long op, void *param) -{ - struct dma_info *info = get_dma_info(chan); - struct dma_channel *channel = get_dma_channel(chan); - - if (info->ops->extend) - return info->ops->extend(channel, op, param); - - return -ENOSYS; -} -EXPORT_SYMBOL(dma_extend); - static int dma_proc_show(struct seq_file *m, void *v) { struct dma_info *info = v; diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 6ecba5f521eb..362e4860bf52 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -91,7 +91,7 @@ err: return ret; } -static int switch_drv_remove(struct platform_device *pdev) +static void switch_drv_remove(struct platform_device *pdev) { struct push_switch *psw = platform_get_drvdata(pdev); struct push_switch_platform_info *psw_info = pdev->dev.platform_data; @@ -106,13 +106,11 @@ static int switch_drv_remove(struct platform_device *pdev) free_irq(irq, pdev); kfree(psw); - - return 0; } static struct platform_driver switch_driver = { .probe = switch_drv_probe, - .remove = switch_drv_remove, + .remove_new = switch_drv_remove, .driver = { .name = DRV_NAME, }, diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 51112f54552b..e6642ff14889 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -104,6 +104,18 @@ void kunmap_coherent(void *kvaddr); void cpu_cache_init(void); +void __weak l2_cache_init(void); + +void __weak j2_cache_init(void); +void __weak sh2_cache_init(void); +void __weak sh2a_cache_init(void); +void __weak sh3_cache_init(void); +void __weak shx3_cache_init(void); +void __weak sh4_cache_init(void); +void __weak sh7705_cache_init(void); + +void __weak sh4__flush_region_init(void); + static inline void *sh_cacheop_vaddr(void *vaddr) { if (__in_29bit_mode()) diff --git a/arch/sh/include/asm/dma.h b/arch/sh/include/asm/dma.h index c8bee3f985a2..6b6d409956d1 100644 --- a/arch/sh/include/asm/dma.h +++ b/arch/sh/include/asm/dma.h @@ -56,7 +56,6 @@ struct dma_ops { int (*get_residue)(struct dma_channel *chan); int (*xfer)(struct dma_channel *chan); int (*configure)(struct dma_channel *chan, unsigned long flags); - int (*extend)(struct dma_channel *chan, unsigned long op, void *param); }; struct dma_channel { @@ -118,8 +117,6 @@ extern int dma_xfer(unsigned int chan, unsigned long from, #define dma_read_page(chan, from, to) \ dma_read(chan, from, to, PAGE_SIZE) -extern int request_dma_bycap(const char **dmac, const char **caps, - const char *dev_id); extern int get_dma_residue(unsigned int chan); extern struct dma_info *get_dma_info(unsigned int chan); extern struct dma_channel *get_dma_channel(unsigned int chan); @@ -128,10 +125,6 @@ extern void dma_configure_channel(unsigned int chan, unsigned long flags); extern int register_dmac(struct dma_info *info); extern void unregister_dmac(struct dma_info *info); -extern struct dma_info *get_dma_info_by_name(const char *dmac_name); - -extern int dma_extend(unsigned int chan, unsigned long op, void *param); -extern int register_chan_caps(const char *dmac, struct dma_chan_caps *capslist); /* arch/sh/drivers/dma/dma-sysfs.c */ extern int dma_create_sysfs_files(struct dma_channel *, struct dma_info *); diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h index 04584be8986c..0379f4cce5ed 100644 --- a/arch/sh/include/asm/fpu.h +++ b/arch/sh/include/asm/fpu.h @@ -64,6 +64,9 @@ static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs) preempt_enable(); } +void float_raise(unsigned int flags); +int float_rounding_mode(void); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SH_FPU_H */ diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index b1c1dc0cc261..1c10e1066390 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -33,6 +33,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr); + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -43,6 +45,14 @@ extern void *return_address(unsigned int); #define ftrace_return_address(n) return_address(n) +#ifdef CONFIG_DYNAMIC_FTRACE +extern void arch_ftrace_nmi_enter(void); +extern void arch_ftrace_nmi_exit(void); +#else +static inline void arch_ftrace_nmi_enter(void) { } +static inline void arch_ftrace_nmi_exit(void) { } +#endif + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SH_FTRACE_H */ diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h index 361a0f57bdeb..74a438cea655 100644 --- a/arch/sh/include/asm/hw_breakpoint.h +++ b/arch/sh/include/asm/hw_breakpoint.h @@ -52,6 +52,8 @@ struct pmu; /* arch/sh/kernel/hw_breakpoint.c */ extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +extern int arch_bp_generic_fields(int sh_len, int sh_type, int *gen_len, + int *gen_type); extern int hw_breakpoint_arch_parse(struct perf_event *bp, const struct perf_event_attr *attr, struct arch_hw_breakpoint *hw); diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h index fc807011187f..84bb23a771f3 100644 --- a/arch/sh/include/asm/setup.h +++ b/arch/sh/include/asm/setup.h @@ -21,5 +21,6 @@ void sh_mv_setup(void); void check_for_initrd(void); void per_cpu_trap_init(void); +void sh_fdt_init(phys_addr_t dt_phys); #endif /* _SH_SETUP_H */ diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h index 387105316d28..39240e06e8aa 100644 --- a/arch/sh/include/asm/syscalls.h +++ b/arch/sh/include/asm/syscalls.h @@ -8,6 +8,7 @@ asmlinkage int old_mmap(unsigned long addr, unsigned long len, asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); +asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op); #include <asm/syscalls_32.h> diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index aeb8915e9254..ddf324bfb9a0 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -24,6 +24,10 @@ static inline void tlb_unwire_entry(void) BUG(); } #endif /* CONFIG_CPU_SH4 */ + +asmlinkage int handle_tlbmiss(struct pt_regs *regs, unsigned long error_code, + unsigned long address); + #endif /* CONFIG_MMU */ #endif /* __ASSEMBLY__ */ #endif /* __ASM_SH_TLB_H */ diff --git a/arch/sh/kernel/cpu/sh2a/opcode_helper.c b/arch/sh/kernel/cpu/sh2a/opcode_helper.c index c509081d90b9..fcf53f5827eb 100644 --- a/arch/sh/kernel/cpu/sh2a/opcode_helper.c +++ b/arch/sh/kernel/cpu/sh2a/opcode_helper.c @@ -8,6 +8,8 @@ */ #include <linux/kernel.h> +#include <asm/processor.h> + /* * Instructions on SH are generally fixed at 16-bits, however, SH-2A * introduces some 32-bit instructions. Since there are no real diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c index 83ae1ad4a86e..d64d28c4f059 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c @@ -14,9 +14,12 @@ #include <linux/sh_timer.h> #include <linux/sh_intc.h> #include <linux/io.h> + +#include <asm/cacheflush.h> #include <asm/clock.h> #include <asm/mmzone.h> #include <asm/platform_early.h> + #include <cpu/sh7723.h> /* Serial */ diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 0d990ab1ba2a..ef4b26a4b3d6 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -21,6 +21,7 @@ #include <linux/io.h> #include <linux/notifier.h> +#include <asm/cacheflush.h> #include <asm/suspend.h> #include <asm/clock.h> #include <asm/mmzone.h> diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c index 67e330b7ea46..2ad19a0c5e04 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c @@ -17,8 +17,11 @@ #include <linux/sh_dma.h> #include <linux/sh_intc.h> #include <linux/usb/ohci_pdriver.h> + #include <cpu/dma-register.h> #include <cpu/sh7757.h> + +#include <asm/mmzone.h> #include <asm/platform_early.h> static struct plat_sci_port scif2_platform_data = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c index 74620f30b19b..c048842d8a58 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c @@ -400,20 +400,6 @@ static struct platform_device *sh7786_devices[] __initdata = { &usb_ohci_device, }; -/* - * Please call this function if your platform board - * use external clock for USB - * */ -#define USBCTL0 0xffe70858 -#define CLOCK_MODE_MASK 0xffffff7f -#define EXT_CLOCK_MODE 0x00000080 - -void __init sh7786_usb_use_exclock(void) -{ - u32 val = __raw_readl(USBCTL0) & CLOCK_MODE_MASK; - __raw_writel(val | EXT_CLOCK_MODE, USBCTL0); -} - #define USBINITREG1 0xffe70094 #define USBINITREG2 0xffe7009c #define USBINITVAL1 0x00ff0040 diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c index bf8682e71830..45c8ae20d109 100644 --- a/arch/sh/kernel/dwarf.c +++ b/arch/sh/kernel/dwarf.c @@ -344,7 +344,7 @@ out: * dwarf_lookup_fde - locate the FDE that covers pc * @pc: the program counter */ -struct dwarf_fde *dwarf_lookup_fde(unsigned long pc) +static struct dwarf_fde *dwarf_lookup_fde(unsigned long pc) { struct rb_node **rb_node = &fde_root.rb_node; struct dwarf_fde *fde = NULL; diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index aed1ea8e2c2f..49c4ffd782d6 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -39,22 +39,17 @@ static DEFINE_PER_CPU(struct kprobe, saved_next_opcode2); int __kprobes arch_prepare_kprobe(struct kprobe *p) { - kprobe_opcode_t opcode = *(kprobe_opcode_t *) (p->addr); + kprobe_opcode_t opcode = *p->addr; if (OPCODE_RTE(opcode)) return -EFAULT; /* Bad breakpoint */ + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = opcode; return 0; } -void __kprobes arch_copy_kprobe(struct kprobe *p) -{ - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; -} - void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; @@ -253,7 +248,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) p = get_kprobe(addr); if (!p) { /* Not one of ours: let kernel handle it */ - if (*(kprobe_opcode_t *)addr != BREAKPOINT_INSTRUCTION) { + if (*addr != BREAKPOINT_INSTRUCTION) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed @@ -301,7 +296,7 @@ static void __used kretprobe_trampoline_holder(void) /* * Called when we hit the probe point at __kretprobe_trampoline */ -int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { regs->pc = __kretprobe_trampoline_handler(regs, NULL); diff --git a/arch/sh/kernel/return_address.c b/arch/sh/kernel/return_address.c index 8838094c9ff9..2ce22f11eab3 100644 --- a/arch/sh/kernel/return_address.c +++ b/arch/sh/kernel/return_address.c @@ -7,7 +7,9 @@ */ #include <linux/kernel.h> #include <linux/module.h> + #include <asm/dwarf.h> +#include <asm/ftrace.h> #ifdef CONFIG_DWARF_UNWINDER diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 5cf35a774dc7..108d808767fa 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -21,6 +21,8 @@ #include <linux/sched/hotplug.h> #include <linux/atomic.h> #include <linux/clockchips.h> +#include <linux/profile.h> + #include <asm/processor.h> #include <asm/mmu_context.h> #include <asm/smp.h> @@ -170,7 +172,7 @@ void native_play_dead(void) } #endif -asmlinkage void start_secondary(void) +static asmlinkage void start_secondary(void) { unsigned int cpu = smp_processor_id(); struct mm_struct *mm = &init_mm; @@ -320,11 +322,13 @@ void smp_message_recv(unsigned int msg) } } +#ifdef CONFIG_PROFILING /* Not really SMP stuff ... */ int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif #ifdef CONFIG_MMU diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 01884054aeb2..4339c4cafa79 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -15,6 +15,8 @@ #include <linux/extable.h> #include <linux/module.h> /* print_modules */ + +#include <asm/ftrace.h> #include <asm/unwinder.h> #include <asm/traps.h> @@ -170,14 +172,6 @@ BUILD_TRAP_HANDLER(bug) force_sig(SIGTRAP); } -#ifdef CONFIG_DYNAMIC_FTRACE -extern void arch_ftrace_nmi_enter(void); -extern void arch_ftrace_nmi_exit(void); -#else -static inline void arch_ftrace_nmi_enter(void) { } -static inline void arch_ftrace_nmi_exit(void) { } -#endif - BUILD_TRAP_HANDLER(nmi) { TRAP_HANDLER_DECL; diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index 6cdda3a621a1..1271b839a107 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -27,6 +27,7 @@ #include <asm/alignment.h> #include <asm/fpu.h> #include <asm/kprobes.h> +#include <asm/setup.h> #include <asm/traps.h> #include <asm/bl_bit.h> @@ -568,7 +569,7 @@ uspace_segv: /* * SH-DSP support gerg@snapgear.com. */ -int is_dsp_inst(struct pt_regs *regs) +static int is_dsp_inst(struct pt_regs *regs) { unsigned short inst = 0; @@ -590,7 +591,7 @@ int is_dsp_inst(struct pt_regs *regs) return 0; } #else -#define is_dsp_inst(regs) (0) +static inline int is_dsp_inst(struct pt_regs *regs) { return 0; } #endif /* CONFIG_SH_DSP */ #ifdef CONFIG_CPU_SH2A diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S index 3e07074e0098..06fed5a21e8b 100644 --- a/arch/sh/lib/checksum.S +++ b/arch/sh/lib/checksum.S @@ -33,7 +33,8 @@ */ /* - * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); + * unsigned int csum_partial(const unsigned char *buf, int len, + * unsigned int sum); */ .text @@ -45,31 +46,11 @@ ENTRY(csum_partial) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ + mov r5, r1 mov r4, r0 - tst #3, r0 ! Check alignment. - bt/s 2f ! Jump if alignment is ok. - mov r4, r7 ! Keep a copy to check for alignment + tst #2, r0 ! Check alignment. + bt 2f ! Jump if alignment is ok. ! - tst #1, r0 ! Check alignment. - bt 21f ! Jump if alignment is boundary of 2bytes. - - ! buf is odd - tst r5, r5 - add #-1, r5 - bt 9f - mov.b @r4+, r0 - extu.b r0, r0 - addc r0, r6 ! t=0 from previous tst - mov r6, r0 - shll8 r6 - shlr16 r0 - shlr8 r0 - or r0, r6 - mov r4, r0 - tst #2, r0 - bt 2f -21: - ! buf is 2 byte aligned (len could be 0) add #-2, r5 ! Alignment uses up two bytes. cmp/pz r5 ! bt/s 1f ! Jump if we had at least two bytes. @@ -77,17 +58,16 @@ ENTRY(csum_partial) bra 6f add #2, r5 ! r5 was < 2. Deal with it. 1: + mov r5, r1 ! Save new len for later use. mov.w @r4+, r0 extu.w r0, r0 addc r0, r6 bf 2f add #1, r6 2: - ! buf is 4 byte aligned (len could be 0) - mov r5, r1 mov #-5, r0 - shld r0, r1 - tst r1, r1 + shld r0, r5 + tst r5, r5 bt/s 4f ! if it's =0, go to 4f clrt .align 2 @@ -109,31 +89,30 @@ ENTRY(csum_partial) addc r0, r6 addc r2, r6 movt r0 - dt r1 + dt r5 bf/s 3b cmp/eq #1, r0 - ! here, we know r1==0 - addc r1, r6 ! add carry to r6 + ! here, we know r5==0 + addc r5, r6 ! add carry to r6 4: - mov r5, r0 + mov r1, r0 and #0x1c, r0 tst r0, r0 - bt 6f - ! 4 bytes or more remaining - mov r0, r1 - shlr2 r1 + bt/s 6f + mov r0, r5 + shlr2 r5 mov #0, r2 5: addc r2, r6 mov.l @r4+, r2 movt r0 - dt r1 + dt r5 bf/s 5b cmp/eq #1, r0 addc r2, r6 - addc r1, r6 ! r1==0 here, so it means add carry-bit + addc r5, r6 ! r5==0 here, so it means add carry-bit 6: - ! 3 bytes or less remaining + mov r1, r5 mov #3, r0 and r0, r5 tst r5, r5 @@ -159,16 +138,6 @@ ENTRY(csum_partial) mov #0, r0 addc r0, r6 9: - ! Check if the buffer was misaligned, if so realign sum - mov r7, r0 - tst #1, r0 - bt 10f - mov r6, r0 - shll8 r6 - shlr16 r0 - shlr8 r0 - or r0, r6 -10: rts mov r6, r0 diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index cdaef6501d76..b65703e06573 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -15,6 +15,8 @@ #include <linux/perf_event.h> #include <linux/uaccess.h> + +#include <asm/fpu.h> #include <asm/processor.h> #include <asm/io.h> diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 862046f26981..195e739ee2be 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -376,8 +376,6 @@ static void __flush_cache_one(unsigned long addr, unsigned long phys, } while (--way_count != 0); } -extern void __weak sh4__flush_region_init(void); - /* * SH-4 has virtually indexed and physically tagged cache. */ diff --git a/arch/sh/mm/cache-shx3.c b/arch/sh/mm/cache-shx3.c index 24c58b7dc022..dec039a75664 100644 --- a/arch/sh/mm/cache-shx3.c +++ b/arch/sh/mm/cache-shx3.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/io.h> #include <asm/cache.h> +#include <asm/cacheflush.h> #define CCR_CACHE_SNM 0x40000 /* Hardware-assisted synonym avoidance */ #define CCR_CACHE_IBE 0x1000000 /* ICBI broadcast */ diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index 9bcaa5619eab..ceffd3ffc81e 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -320,30 +320,20 @@ void __init cpu_cache_init(void) goto skip; if (boot_cpu_data.type == CPU_J2) { - extern void __weak j2_cache_init(void); - j2_cache_init(); } else if (boot_cpu_data.family == CPU_FAMILY_SH2) { - extern void __weak sh2_cache_init(void); - sh2_cache_init(); } if (boot_cpu_data.family == CPU_FAMILY_SH2A) { - extern void __weak sh2a_cache_init(void); - sh2a_cache_init(); } if (boot_cpu_data.family == CPU_FAMILY_SH3) { - extern void __weak sh3_cache_init(void); - sh3_cache_init(); if ((boot_cpu_data.type == CPU_SH7705) && (boot_cpu_data.dcache.sets == 512)) { - extern void __weak sh7705_cache_init(void); - sh7705_cache_init(); } } @@ -351,14 +341,10 @@ void __init cpu_cache_init(void) if ((boot_cpu_data.family == CPU_FAMILY_SH4) || (boot_cpu_data.family == CPU_FAMILY_SH4A) || (boot_cpu_data.family == CPU_FAMILY_SH4AL_DSP)) { - extern void __weak sh4_cache_init(void); - sh4_cache_init(); if ((boot_cpu_data.type == CPU_SH7786) || (boot_cpu_data.type == CPU_SHX3)) { - extern void __weak shx3_cache_init(void); - shx3_cache_init(); } } diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c index 78c4b6e6d33b..fa3dc9428a73 100644 --- a/arch/sh/mm/nommu.c +++ b/arch/sh/mm/nommu.c @@ -10,6 +10,8 @@ #include <linux/init.h> #include <linux/string.h> #include <linux/mm.h> + +#include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/page.h> #include <linux/uaccess.h> diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c index cf7ce4b57359..3a4085ea0161 100644 --- a/arch/sh/mm/pgtable.c +++ b/arch/sh/mm/pgtable.c @@ -2,12 +2,14 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <asm/pgalloc.h> + static struct kmem_cache *pgd_cachep; #if PAGETABLE_LEVELS > 2 static struct kmem_cache *pmd_cachep; #endif -void pgd_ctor(void *x) +static void pgd_ctor(void *x) { pgd_t *pgd = x; diff --git a/arch/sh/mm/tlbex_32.c b/arch/sh/mm/tlbex_32.c index 1c53868632ee..7d58578c15f4 100644 --- a/arch/sh/mm/tlbex_32.c +++ b/arch/sh/mm/tlbex_32.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <asm/mmu_context.h> #include <asm/thread_info.h> +#include <asm/tlb.h> /* * Called with interrupts disabled. diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index fa0759bfe498..73bf0aea8baf 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1602,7 +1602,11 @@ skip_init_ctx: bpf_flush_icache(header, (u8 *)header + header->size); if (!prog->is_func || extra_pass) { - bpf_jit_binary_lock_ro(header); + if (bpf_jit_binary_lock_ro(header)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } } else { jit_data->ctx = ctx; jit_data->image = image_ptr; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 965d65edbae0..bc47bc9841ff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -170,6 +170,7 @@ config X86 select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY select GENERIC_VDSO_TIME_NS + select GENERIC_VDSO_OVERFLOW_PROTECT select GUP_GET_PXX_LOW_HIGH if X86_PAE select HARDIRQS_SW_RESEND select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 @@ -466,6 +467,17 @@ config X86_X2APIC If you don't know what to do here, say N. +config X86_POSTED_MSI + bool "Enable MSI and MSI-x delivery by posted interrupts" + depends on X86_64 && IRQ_REMAP + help + This enables MSIs that are under interrupt remapping to be delivered as + posted interrupts to the host kernel. Interrupt throughput can + potentially be improved by coalescing CPU notifications during high + frequency bursts. + + If you don't know what to do here, say N. + config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index ec71846d28c9..0457a9d7e515 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -335,26 +335,6 @@ finish: sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } -static void enforce_vmpl0(void) -{ - u64 attrs; - int err; - - /* - * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically - * higher) privilege level. Here, clear the VMPL1 permission mask of the - * GHCB page. If the guest is not running at VMPL0, this will fail. - * - * If the guest is running at VMPL0, it will succeed. Even if that operation - * modifies permission bits, it is still ok to do so currently because Linux - * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks - * changing is a don't-care. - */ - attrs = 1; - if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); -} - /* * SNP_FEATURES_IMPL_REQ is the mask of SNP features that will need * guest side implementation for proper functioning of the guest. If any @@ -413,6 +393,85 @@ void snp_check_features(void) } } +/* Search for Confidential Computing blob in the EFI config table. */ +static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) +{ + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + int ret; + + ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); + if (ret) + return NULL; + + return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, + cfg_table_len, + EFI_CC_BLOB_GUID); +} + +/* + * Initial set up of SNP relies on information provided by the + * Confidential Computing blob, which can be passed to the boot kernel + * by firmware/bootloader in the following ways: + * + * - via an entry in the EFI config table + * - via a setup_data structure, as defined by the Linux Boot Protocol + * + * Scan for the blob in that order. + */ +static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + cc_info = find_cc_blob_efi(bp); + if (cc_info) + goto found_cc_info; + + cc_info = find_cc_blob_setup_data(bp); + if (!cc_info) + return NULL; + +found_cc_info: + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + return cc_info; +} + +/* + * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks + * will verify the SNP CPUID/MSR bits. + */ +static bool early_snp_init(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + if (!bp) + return false; + + cc_info = find_cc_blob(bp); + if (!cc_info) + return false; + + /* + * If a SNP-specific Confidential Computing blob is present, then + * firmware/bootloader have indicated SNP support. Verifying this + * involves CPUID checks which will be more reliable if the SNP + * CPUID table is used. See comments over snp_setup_cpuid_table() for + * more details. + */ + setup_cpuid_table(cc_info); + + /* + * Pass run-time kernel a pointer to CC info via boot_params so EFI + * config table doesn't need to be searched again during early startup + * phase. + */ + bp->cc_blob_address = (u32)(unsigned long)cc_info; + + return true; +} + /* * sev_check_cpu_support - Check for SEV support in the CPU capabilities * @@ -463,7 +522,7 @@ void sev_enable(struct boot_params *bp) bp->cc_blob_address = 0; /* - * Do an initial SEV capability check before snp_init() which + * Do an initial SEV capability check before early_snp_init() which * loads the CPUID page and the same checks afterwards are done * without the hypervisor and are trustworthy. * @@ -478,7 +537,7 @@ void sev_enable(struct boot_params *bp) * Setup/preliminary detection of SNP. This will be sanity-checked * against CPUID/MSR values later. */ - snp = snp_init(bp); + snp = early_snp_init(bp); /* Now repeat the checks with the SNP CPUID table. */ @@ -509,7 +568,20 @@ void sev_enable(struct boot_params *bp) if (!(get_hv_features() & GHCB_HV_FT_SNP)) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); - enforce_vmpl0(); + /* + * Enforce running at VMPL0. + * + * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically + * higher) privilege level. Here, clear the VMPL1 permission mask of the + * GHCB page. If the guest is not running at VMPL0, this will fail. + * + * If the guest is running at VMPL0, it will succeed. Even if that operation + * modifies permission bits, it is still ok to do so currently because Linux + * SNP guests running at VMPL0 only run at VMPL0, so VMPL1 or higher + * permission mask changes are a don't-care. + */ + if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, 1)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); } if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) @@ -535,85 +607,6 @@ u64 sev_get_status(void) return m.q; } -/* Search for Confidential Computing blob in the EFI config table. */ -static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) -{ - unsigned long cfg_table_pa; - unsigned int cfg_table_len; - int ret; - - ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); - if (ret) - return NULL; - - return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, - cfg_table_len, - EFI_CC_BLOB_GUID); -} - -/* - * Initial set up of SNP relies on information provided by the - * Confidential Computing blob, which can be passed to the boot kernel - * by firmware/bootloader in the following ways: - * - * - via an entry in the EFI config table - * - via a setup_data structure, as defined by the Linux Boot Protocol - * - * Scan for the blob in that order. - */ -static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) -{ - struct cc_blob_sev_info *cc_info; - - cc_info = find_cc_blob_efi(bp); - if (cc_info) - goto found_cc_info; - - cc_info = find_cc_blob_setup_data(bp); - if (!cc_info) - return NULL; - -found_cc_info: - if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); - - return cc_info; -} - -/* - * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks - * will verify the SNP CPUID/MSR bits. - */ -bool snp_init(struct boot_params *bp) -{ - struct cc_blob_sev_info *cc_info; - - if (!bp) - return false; - - cc_info = find_cc_blob(bp); - if (!cc_info) - return false; - - /* - * If a SNP-specific Confidential Computing blob is present, then - * firmware/bootloader have indicated SNP support. Verifying this - * involves CPUID checks which will be more reliable if the SNP - * CPUID table is used. See comments over snp_setup_cpuid_table() for - * more details. - */ - setup_cpuid_table(cc_info); - - /* - * Pass run-time kernel a pointer to CC info via boot_params so EFI - * config table doesn't need to be searched again during early startup - * phase. - */ - bp->cc_blob_address = (u32)(unsigned long)cc_info; - - return true; -} - void sev_prep_identity_maps(unsigned long top_level_pgt) { /* diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index 89c1476fcdd9..f004a4dc74c2 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -117,6 +117,8 @@ static idtentry_t sysvec_table[NR_SYSTEM_VECTORS] __ro_after_init = { SYSVEC(POSTED_INTR_VECTOR, kvm_posted_intr_ipi), SYSVEC(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), SYSVEC(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), + + SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, posted_msi_notification), }; static bool fred_setup_done __initdata; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index f896eed4516c..5af926c050f0 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -56,6 +56,8 @@ static inline void disable_acpi(void) extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); +extern int acpi_blacklisted(void); + static inline void acpi_noirq_set(void) { acpi_noirq = 1; } static inline void acpi_disable_pci(void) { diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 0cb2396de066..ba99ef75f56c 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -286,20 +286,6 @@ static inline int alternatives_text_reserved(void *start, void *end) asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : : "i" (0), ## input) -/* - * This is similar to alternative_input. But it has two features and - * respective instructions. - * - * If CPU has feature2, newinstr2 is used. - * Otherwise, if CPU has feature1, newinstr1 is used. - * Otherwise, oldinstr is used. - */ -#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ - ft_flags2, input...) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ - newinstr2, ft_flags2) \ - : : "i" (0), ## input) - /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \ asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 839c58f95114..9327eb00e96d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -14,6 +14,7 @@ #include <asm/msr.h> #include <asm/hardirq.h> #include <asm/io.h> +#include <asm/posted_intr.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -500,6 +501,11 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector) return !!(irr & (1U << (vector % 32))); } +static inline bool is_vector_pending(unsigned int vector) +{ + return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector); +} + /* * Warm reset vector position: */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b96f9cabea0c..0b9611da6c53 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -149,8 +149,12 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; extern void setup_clear_cpu_cap(unsigned int bit); extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); -#define setup_force_cpu_cap(bit) do { \ - set_cpu_cap(&boot_cpu_data, bit); \ +#define setup_force_cpu_cap(bit) do { \ + \ + if (!boot_cpu_has(bit)) \ + WARN_ON(alternatives_patched); \ + \ + set_cpu_cap(&boot_cpu_data, bit); \ set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index fbc7722b87d1..c67fa6ad098a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -44,10 +44,16 @@ typedef struct { unsigned int irq_hv_reenlightenment_count; unsigned int hyperv_stimer0_count; #endif +#ifdef CONFIG_X86_POSTED_MSI + unsigned int posted_msi_notification_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +#ifdef CONFIG_X86_POSTED_MSI +DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); +#endif #define __ARCH_IRQ_STAT #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 749c7411d2f1..d4f24499b256 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -751,6 +751,12 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested # define fred_sysvec_kvm_posted_intr_nested_ipi NULL #endif +# ifdef CONFIG_X86_POSTED_MSI +DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification); +#else +# define fred_sysvec_posted_msi_notification NULL +# endif + #if IS_ENABLED(CONFIG_HYPERV) DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 7a2ed154a5e1..5036f13ab69f 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) return x86_vector_domain; } +extern bool enable_posted_msi; + +static inline bool posted_msi_supported(void) +{ + return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP); +} + #else /* CONFIG_IRQ_REMAP */ static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d18bfb238f66..13aea8fc3d45 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -97,10 +97,16 @@ #define LOCAL_TIMER_VECTOR 0xec +/* + * Posted interrupt notification vector for all device MSIs delivered to + * the host kernel. + */ +#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb + #define NR_VECTORS 256 #ifdef CONFIG_X86_LOCAL_APIC -#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR #else #define FIRST_SYSTEM_VECTOR NR_VECTORS #endif diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c72c7ff78fcd..d593e52e6635 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -16,10 +16,10 @@ extern int pic_mode; * Summit or generic (i.e. installer) kernels need lots of bus entries. * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#if CONFIG_BASE_SMALL == 0 -# define MAX_MP_BUSSES 260 -#else +#ifdef CONFIG_BASE_SMALL # define MAX_MP_BUSSES 32 +#else +# define MAX_MP_BUSSES 260 #endif #define MAX_IRQ_SOURCES 256 diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h new file mode 100644 index 000000000000..de788b400fba --- /dev/null +++ b/arch/x86/include/asm/posted_intr.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_POSTED_INTR_H +#define _X86_POSTED_INTR_H +#include <asm/irq_vectors.h> + +#define POSTED_INTR_ON 0 +#define POSTED_INTR_SN 1 + +#define PID_TABLE_ENTRY_VALID 1 + +/* Posted-Interrupt Descriptor */ +struct pi_desc { + union { + u32 pir[8]; /* Posted interrupt requested */ + u64 pir64[4]; + }; + union { + struct { + u16 notifications; /* Suppress and outstanding bits */ + u8 nv; + u8 rsvd_2; + u32 ndst; + }; + u64 control; + }; + u32 rsvd[6]; +} __aligned(64); + +static inline bool pi_test_and_set_on(struct pi_desc *pi_desc) +{ + return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) +{ + return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); +} + +static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) +{ + return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); +} + +static inline void pi_set_sn(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline void pi_set_on(struct pi_desc *pi_desc) +{ + set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_on(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline void pi_clear_sn(struct pi_desc *pi_desc) +{ + clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_on(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control); +} + +static inline bool pi_test_sn(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control); +} + +/* Non-atomic helpers */ +static inline void __pi_set_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications |= BIT(POSTED_INTR_SN); +} + +static inline void __pi_clear_sn(struct pi_desc *pi_desc) +{ + pi_desc->notifications &= ~BIT(POSTED_INTR_SN); +} + +#ifdef CONFIG_X86_POSTED_MSI +/* + * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's + * own interrupts. Here we do not distinguish them since those vector bits in + * PIR will always be zero. + */ +static inline bool pi_pending_this_cpu(unsigned int vector) +{ + struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc); + + if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR)) + return false; + + return test_bit(vector, (unsigned long *)pid->pir); +} + +extern void intel_posted_msi_init(void); +#else +static inline bool pi_pending_this_cpu(unsigned int vector) { return false; } + +static inline void intel_posted_msi_init(void) {}; +#endif /* X86_POSTED_MSI */ + +#endif /* _X86_POSTED_INTR_H */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 93ed60080cfe..ca20cc4e5826 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -140,7 +140,7 @@ struct secrets_os_area { #define VMPCK_KEY_LEN 32 /* See the SNP spec version 0.9 for secrets page format */ -struct snp_secrets_page_layout { +struct snp_secrets_page { u32 version; u32 imien : 1, rsvd1 : 31; diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 1be13b2dfe8b..64df897c0ee3 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -37,8 +37,6 @@ extern int phys_to_target_node(phys_addr_t start); #define phys_to_target_node phys_to_target_node extern int memory_add_physaddr_to_nid(u64 start); #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid -extern int numa_fill_memblks(u64 start, u64 end); -#define numa_fill_memblks numa_fill_memblks #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 345aafbc1964..6259f1937fe7 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -15,7 +15,7 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); -extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len); +extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len); /* * Clear and restore the kernel write-protection flag on the local CPU. diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 8e048ca980df..0ef36190abe6 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -300,7 +300,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) #define vdso_cycles_ok arch_vdso_cycles_ok /* - * x86 specific delta calculation. + * x86 specific calculation of nanoseconds for the current cycle count * * The regular implementation assumes that clocksource reads are globally * monotonic. The TSC can be slightly off across sockets which can cause @@ -308,8 +308,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * jump. * * Therefore it needs to be verified that @cycles are greater than - * @last. If not then use @last, which is the base time of the current - * conversion period. + * @vd->cycles_last. If not then use @vd->cycles_last, which is the base + * time of the current conversion period. * * This variant also uses a custom mask because while the clocksource mask of * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses @@ -317,25 +317,37 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * declares everything with the MSB/Sign-bit set as invalid. Therefore the * effective mask is S64_MAX. */ -static __always_inline -u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) { - /* - * Due to the MSB/Sign-bit being used as invalid marker (see - * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. - */ - u64 delta = (cycles - last) & S64_MAX; + u64 delta = cycles - vd->cycle_last; /* - * Due to the above mentioned TSC wobbles, filter out negative motion. - * Per the above masking, the effective sign bit is now bit 62. + * Negative motion and deltas which can cause multiplication + * overflow require special treatment. This check covers both as + * negative motion is guaranteed to be greater than @vd::max_cycles + * due to unsigned comparison. + * + * Due to the MSB/Sign-bit being used as invalid marker (see + * arch_vdso_cycles_valid() above), the effective mask is S64_MAX, + * but that case is also unlikely and will also take the unlikely path + * here. */ - if (unlikely(delta & (1ULL << 62))) - return 0; + if (unlikely(delta > vd->max_cycles)) { + /* + * Due to the above mentioned TSC wobbles, filter out + * negative motion. Per the above masking, the effective + * sign bit is now bit 62. + */ + if (delta & (1ULL << 62)) + return base >> vd->shift; + + /* Handle multiplication overflow gracefully */ + return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift); + } - return delta * mult; + return ((delta * vd->mult) + base) >> vd->shift; } -#define vdso_calc_delta vdso_calc_delta +#define vdso_calc_ns vdso_calc_ns #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 45a280f2161c..7555c15b7183 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -125,6 +125,20 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = }; /* + * Nomenclature for variable names to simplify and clarify this code and ease + * any potential staring at it: + * + * @instr: source address of the original instructions in the kernel text as + * generated by the compiler. + * + * @buf: temporary buffer on which the patching operates. This buffer is + * eventually text-poked into the kernel image. + * + * @replacement/@repl: pointer to the opcodes which are replacing @instr, located + * in the .altinstr_replacement section. + */ + +/* * Fill the buffer with a single effective instruction of size @len. * * In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info) @@ -133,28 +147,28 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and * *jump* over instead of executing long and daft NOPs. */ -static void add_nop(u8 *instr, unsigned int len) +static void add_nop(u8 *buf, unsigned int len) { - u8 *target = instr + len; + u8 *target = buf + len; if (!len) return; if (len <= ASM_NOP_MAX) { - memcpy(instr, x86_nops[len], len); + memcpy(buf, x86_nops[len], len); return; } if (len < 128) { - __text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE); - instr += JMP8_INSN_SIZE; + __text_gen_insn(buf, JMP8_INSN_OPCODE, buf, target, JMP8_INSN_SIZE); + buf += JMP8_INSN_SIZE; } else { - __text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE); - instr += JMP32_INSN_SIZE; + __text_gen_insn(buf, JMP32_INSN_OPCODE, buf, target, JMP32_INSN_SIZE); + buf += JMP32_INSN_SIZE; } - for (;instr < target; instr++) - *instr = INT3_INSN_OPCODE; + for (;buf < target; buf++) + *buf = INT3_INSN_OPCODE; } extern s32 __retpoline_sites[], __retpoline_sites_end[]; @@ -187,12 +201,12 @@ static bool insn_is_nop(struct insn *insn) * Find the offset of the first non-NOP instruction starting at @offset * but no further than @len. */ -static int skip_nops(u8 *instr, int offset, int len) +static int skip_nops(u8 *buf, int offset, int len) { struct insn insn; for (; offset < len; offset += insn.length) { - if (insn_decode_kernel(&insn, &instr[offset])) + if (insn_decode_kernel(&insn, &buf[offset])) break; if (!insn_is_nop(&insn)) @@ -203,66 +217,32 @@ static int skip_nops(u8 *instr, int offset, int len) } /* - * Optimize a sequence of NOPs, possibly preceded by an unconditional jump - * to the end of the NOP sequence into a single NOP. - */ -static bool -__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target) -{ - int i = *next - insn->length; - - switch (insn->opcode.bytes[0]) { - case JMP8_INSN_OPCODE: - case JMP32_INSN_OPCODE: - *prev = i; - *target = *next + insn->immediate.value; - return false; - } - - if (insn_is_nop(insn)) { - int nop = i; - - *next = skip_nops(instr, *next, len); - if (*target && *next == *target) - nop = *prev; - - add_nop(instr + nop, *next - nop); - DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next); - return true; - } - - *target = 0; - return false; -} - -/* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification. */ -static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) +static void noinline optimize_nops(const u8 * const instr, u8 *buf, size_t len) { - int prev, target = 0; - for (int next, i = 0; i < len; i = next) { struct insn insn; - if (insn_decode_kernel(&insn, &instr[i])) + if (insn_decode_kernel(&insn, &buf[i])) return; next = i + insn.length; - __optimize_nops(instr, len, &insn, &next, &prev, &target); - } -} + if (insn_is_nop(&insn)) { + int nop = i; -static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len) -{ - unsigned long flags; + /* Has the NOP already been optimized? */ + if (i + insn.length == len) + return; - local_irq_save(flags); - optimize_nops(instr, len); - sync_core(); - local_irq_restore(flags); + next = skip_nops(buf, next, len); + + add_nop(buf + nop, next - nop); + DUMP_BYTES(ALT, buf, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, next); + } + } } /* @@ -335,11 +315,9 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len) return (target < src || target > src + src_len); } -void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +static void __apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len) { - int prev, target = 0; - - for (int next, i = 0; i < len; i = next) { + for (int next, i = 0; i < instrlen; i = next) { struct insn insn; if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i]))) @@ -347,9 +325,6 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) next = i + insn.length; - if (__optimize_nops(buf, len, &insn, &next, &prev, &target)) - continue; - switch (insn.opcode.bytes[0]) { case 0x0f: if (insn.opcode.bytes[1] < 0x80 || @@ -361,10 +336,10 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) case JMP8_INSN_OPCODE: case JMP32_INSN_OPCODE: case CALL_INSN_OPCODE: - if (need_reloc(next + insn.immediate.value, src, src_len)) { + if (need_reloc(next + insn.immediate.value, repl, repl_len)) { apply_reloc(insn.immediate.nbytes, buf + i + insn_offset_immediate(&insn), - src - dest); + repl - instr); } /* @@ -372,7 +347,7 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) */ if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) { s32 imm = insn.immediate.value; - imm += src - dest; + imm += repl - instr; imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE; if ((imm >> 31) == (imm >> 7)) { buf[i+0] = JMP8_INSN_OPCODE; @@ -385,15 +360,21 @@ void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) } if (insn_rip_relative(&insn)) { - if (need_reloc(next + insn.displacement.value, src, src_len)) { + if (need_reloc(next + insn.displacement.value, repl, repl_len)) { apply_reloc(insn.displacement.nbytes, buf + i + insn_offset_displacement(&insn), - src - dest); + repl - instr); } } } } +void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len) +{ + __apply_relocation(buf, instr, instrlen, repl, repl_len); + optimize_nops(instr, buf, repl_len); +} + /* Low-level backend functions usable from alternative code replacements. */ DEFINE_ASM_FUNC(nop_func, "", .entry.text); EXPORT_SYMBOL_GPL(nop_func); @@ -464,9 +445,9 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a) void __init_or_module noinline apply_alternatives(struct alt_instr *start, struct alt_instr *end) { - struct alt_instr *a; - u8 *instr, *replacement; u8 insn_buff[MAX_PATCH_LEN]; + u8 *instr, *replacement; + struct alt_instr *a; DPRINTK(ALT, "alt table %px, -> %px", start, end); @@ -504,7 +485,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * patch if feature is *NOT* present. */ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { - optimize_nops_inplace(instr, a->instrlen); + memcpy(insn_buff, instr, a->instrlen); + optimize_nops(instr, insn_buff, a->instrlen); + text_poke_early(instr, insn_buff, a->instrlen); continue; } @@ -526,7 +509,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, for (; insn_buff_sz < a->instrlen; insn_buff_sz++) insn_buff[insn_buff_sz] = 0x90; - apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen); + apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen); DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); @@ -761,7 +744,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { - optimize_nops(bytes, len); + optimize_nops(addr, bytes, len); DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); text_poke_early(addr, bytes, len); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0d22aefbde7f..66fd4b2a37a3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -631,7 +631,7 @@ void lapic_update_tsc_freq(void) static __initdata int lapic_cal_loops = -1; static __initdata long lapic_cal_t1, lapic_cal_t2; static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata u32 lapic_cal_pm1, lapic_cal_pm2; static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; /* @@ -641,7 +641,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) { unsigned long long tsc = 0; long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); + u32 pm = acpi_pm_read_early(); if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); @@ -666,7 +666,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } static int __init -calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) +calibrate_by_pmtimer(u32 deltapm, long *delta, long *deltatsc) { const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; const long pm_thresh = pm_100ms / 100; @@ -677,7 +677,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) return -1; #endif - apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); + apic_printk(APIC_VERBOSE, "... PM-Timer delta = %u\n", deltapm); /* Check, if the PM timer is available */ if (!deltapm) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 185738c72766..9eec52925fa3 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -965,7 +965,7 @@ static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) lockdep_assert_held(&vector_lock); hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) { - unsigned int irr, vector = apicd->prev_vector; + unsigned int vector = apicd->prev_vector; /* * Paranoia: Check if the vector that needs to be cleaned @@ -979,8 +979,7 @@ static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr) * fixup_irqs() was just called to scan IRR for set bits and * forward them to new destination CPUs via IPIs. */ - irr = check_irr ? apic_read(APIC_IRR + (vector / 32 * 0x10)) : 0; - if (irr & (1U << (vector % 32))) { + if (check_irr && is_vector_pending(vector)) { pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq); rearm = true; continue; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index e92ff0c11db8..465647456753 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -185,8 +185,7 @@ static void *patch_dest(void *dest, bool direct) u8 *pad = dest - tsize; memcpy(insn_buff, skl_call_thunk_template, tsize); - apply_relocation(insn_buff, tsize, pad, - skl_call_thunk_template, tsize); + apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize); /* Already patched? */ if (!bcmp(pad, insn_buff, tsize)) @@ -308,8 +307,7 @@ static bool is_callthunk(void *addr) pad = (void *)(dest - tmpl_size); memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, pad, - skl_call_thunk_template, tmpl_size); + apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size); return !bcmp(pad, insn_buff, tmpl_size); } @@ -327,8 +325,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) return 0; memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, ip, - skl_call_thunk_template, tmpl_size); + apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size); memcpy(*pprog, insn_buff, tmpl_size); *pprog += tmpl_size; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cdaa795a9371..2b170da84f97 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -68,6 +68,7 @@ #include <asm/traps.h> #include <asm/sev.h> #include <asm/tdx.h> +#include <asm/posted_intr.h> #include "cpu.h" @@ -2222,6 +2223,8 @@ void cpu_init(void) barrier(); x2apic_setup(); + + intel_posted_msi_init(); } mmgrab(&init_mm); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 946813d816bf..b7d9f530ae16 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -114,6 +114,9 @@ static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) if (WARN_ON(feature >= MAX_FEATURE_BITS)) return; + if (boot_cpu_has(feature)) + WARN_ON(alternatives_patched); + clear_feature(c, feature); /* Collect all features to disable, handling dependencies */ diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index fbe8b61c3413..4284749ec803 100644 --- a/arch/x86/kernel/cpu/mce/genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -16,14 +16,14 @@ * used to save error information organized in a lock-less list. * * This memory pool is only to be used to save MCE records in MCE context. - * MCE events are rare, so a fixed size memory pool should be enough. Use - * 2 pages to save MCE events for now (~80 MCE records at most). + * MCE events are rare, so a fixed size memory pool should be enough. + * Allocate on a sliding scale based on number of CPUs. */ -#define MCE_POOLSZ (2 * PAGE_SIZE) +#define MCE_MIN_ENTRIES 80 +#define MCE_PER_CPU 2 static struct gen_pool *mce_evt_pool; static LLIST_HEAD(mce_event_llist); -static char gen_pool_buf[MCE_POOLSZ]; /* * Compare the record "t" with each of the records on list "l" to see if @@ -118,22 +118,32 @@ int mce_gen_pool_add(struct mce *mce) static int mce_gen_pool_create(void) { - struct gen_pool *tmpp; + int mce_numrecords, mce_poolsz, order; + struct gen_pool *gpool; int ret = -ENOMEM; - - tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1); - if (!tmpp) - goto out; - - ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1); + void *mce_pool; + + order = order_base_2(sizeof(struct mce_evt_llist)); + gpool = gen_pool_create(order, -1); + if (!gpool) + return ret; + + mce_numrecords = max(MCE_MIN_ENTRIES, num_possible_cpus() * MCE_PER_CPU); + mce_poolsz = mce_numrecords * (1 << order); + mce_pool = kmalloc(mce_poolsz, GFP_KERNEL); + if (!mce_pool) { + gen_pool_destroy(gpool); + return ret; + } + ret = gen_pool_add(gpool, (unsigned long)mce_pool, mce_poolsz, -1); if (ret) { - gen_pool_destroy(tmpp); - goto out; + gen_pool_destroy(gpool); + kfree(mce_pool); + return ret; } - mce_evt_pool = tmpp; + mce_evt_pool = gpool; -out: return ret; } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 13b45b9c806d..c0d56c02b8da 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -84,8 +84,6 @@ struct microcode_amd { unsigned int mpb[]; }; -#define PATCH_MAX_SIZE (3 * PAGE_SIZE) - static struct equiv_cpu_table { unsigned int num_entries; struct equiv_cpu_entry *entry; @@ -465,7 +463,7 @@ static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, siz return !__apply_microcode_amd(mc); } -static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) +static bool get_builtin_microcode(struct cpio_data *cp, u8 family) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct firmware fw; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 232026a239a6..b3658d11e7b6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -60,11 +60,6 @@ module_param(force_minrev, bool, S_IRUSR | S_IWUSR); */ struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -struct cpu_info_ctx { - struct cpu_signature *cpu_sig; - int err; -}; - /* * Those patch levels cannot be updated to newer ones and thus should be final. */ diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 19b4fdb94a36..a113d9aba553 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -56,14 +56,9 @@ int max_name_width, max_data_width; */ bool rdt_alloc_capable; -static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r); -static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); -static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r); +static void mba_wrmsr_intel(struct msr_param *m); +static void cat_wrmsr(struct msr_param *m); +static void mba_wrmsr_amd(struct msr_param *m); #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains) @@ -309,12 +304,11 @@ static void rdt_get_cdp_l2_config(void) rdt_get_cdp_config(RDT_RESOURCE_L2); } -static void -mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +static void mba_wrmsr_amd(struct msr_param *m) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); @@ -334,25 +328,22 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) return r->default_ctrl; } -static void -mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r) +static void mba_wrmsr_intel(struct msr_param *m) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); /* Write the delay values for mba. */ for (i = m->low; i < m->high; i++) - wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r)); + wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res)); } -static void -cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +static void cat_wrmsr(struct msr_param *m) { + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom); unsigned int i; - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); for (i = m->low; i < m->high; i++) wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]); @@ -362,6 +353,8 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) { struct rdt_domain *d; + lockdep_assert_cpus_held(); + list_for_each_entry(d, &r->domains, list) { /* Find the domain that contains this CPU */ if (cpumask_test_cpu(cpu, &d->cpu_mask)) @@ -378,19 +371,11 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r) void rdt_ctrl_update(void *arg) { + struct rdt_hw_resource *hw_res; struct msr_param *m = arg; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res); - struct rdt_resource *r = m->res; - int cpu = smp_processor_id(); - struct rdt_domain *d; - d = get_domain_from_cpu(cpu, r); - if (d) { - hw_res->msr_update(d, m, r); - return; - } - pr_warn_once("cpu %d not found in any domain for resource %s\n", - cpu, r->name); + hw_res = resctrl_to_arch_res(m->res); + hw_res->msr_update(m); } /* @@ -463,9 +448,11 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) hw_dom->ctrl_val = dc; setup_default_ctrlval(r, dc); + m.res = r; + m.dom = d; m.low = 0; m.high = hw_res->num_closid; - hw_res->msr_update(d, &m, r); + hw_res->msr_update(&m); return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 7997b47743a2..b7291f60399c 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -272,22 +272,6 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type) } } -static bool apply_config(struct rdt_hw_domain *hw_dom, - struct resctrl_staged_config *cfg, u32 idx, - cpumask_var_t cpu_mask) -{ - struct rdt_domain *dom = &hw_dom->d_resctrl; - - if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) { - cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask); - hw_dom->ctrl_val[idx] = cfg->new_ctrl; - - return true; - } - - return false; -} - int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { @@ -302,9 +286,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, hw_dom->ctrl_val[idx] = cfg_val; msr_param.res = r; + msr_param.dom = d; msr_param.low = idx; msr_param.high = idx + 1; - hw_res->msr_update(d, &msr_param, r); + hw_res->msr_update(&msr_param); return 0; } @@ -315,48 +300,39 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) struct rdt_hw_domain *hw_dom; struct msr_param msr_param; enum resctrl_conf_type t; - cpumask_var_t cpu_mask; struct rdt_domain *d; u32 idx; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - - msr_param.res = NULL; list_for_each_entry(d, &r->domains, list) { hw_dom = resctrl_to_arch_dom(d); + msr_param.res = NULL; for (t = 0; t < CDP_NUM_TYPES; t++) { cfg = &hw_dom->d_resctrl.staged_config[t]; if (!cfg->have_new_ctrl) continue; idx = get_config_index(closid, t); - if (!apply_config(hw_dom, cfg, idx, cpu_mask)) + if (cfg->new_ctrl == hw_dom->ctrl_val[idx]) continue; + hw_dom->ctrl_val[idx] = cfg->new_ctrl; if (!msr_param.res) { msr_param.low = idx; msr_param.high = msr_param.low + 1; msr_param.res = r; + msr_param.dom = d; } else { msr_param.low = min(msr_param.low, idx); msr_param.high = max(msr_param.high, idx + 1); } } + if (msr_param.res) + smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); } - if (cpumask_empty(cpu_mask)) - goto done; - - /* Update resource control msr on all the CPUs. */ - on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); - -done: - free_cpumask_var(cpu_mask); - return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 1a8687f8073a..f1d926832ec8 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -379,11 +379,13 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r) /** * struct msr_param - set a range of MSRs from a domain * @res: The resource to use + * @dom: The domain to update * @low: Beginning index from base MSR * @high: End index */ struct msr_param { struct rdt_resource *res; + struct rdt_domain *dom; u32 low; u32 high; }; @@ -443,8 +445,7 @@ struct rdt_hw_resource { struct rdt_resource r_resctrl; u32 num_closid; unsigned int msr_base; - void (*msr_update) (struct rdt_domain *d, struct msr_param *m, - struct rdt_resource *r); + void (*msr_update)(struct msr_param *m); unsigned int mon_scale; unsigned int mbm_width; unsigned int mbm_cfg_mask; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c34a35ec0f03..2345e6836593 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -24,6 +24,7 @@ #include <asm/resctrl.h> #include "internal.h" +#include "trace.h" /** * struct rmid_entry - dirty tracking for all RMID. @@ -354,6 +355,16 @@ void __check_limbo(struct rdt_domain *d, bool force_free) rmid_dirty = true; } else { rmid_dirty = (val >= resctrl_rmid_realloc_threshold); + + /* + * x86's CLOSID and RMID are independent numbers, so the entry's + * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the + * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't + * used to select the configuration. It is thus necessary to track both + * CLOSID and RMID because there may be dependencies between them + * on some architectures. + */ + trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->id, val); } if (force_free || !rmid_dirty) { diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 04584a76ceb4..aacf236dfe3b 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -31,7 +31,7 @@ #include "internal.h" #define CREATE_TRACE_POINTS -#include "pseudo_lock_event.h" +#include "trace.h" /* * The bits needed to disable hardware prefetching varies based on the diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 011e17efb1a6..02f213f1c51c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2813,16 +2813,12 @@ static int reset_all_ctrls(struct rdt_resource *r) struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_domain *hw_dom; struct msr_param msr_param; - cpumask_var_t cpu_mask; struct rdt_domain *d; int i; /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - msr_param.res = r; msr_param.low = 0; msr_param.high = hw_res->num_closid; @@ -2834,17 +2830,13 @@ static int reset_all_ctrls(struct rdt_resource *r) */ list_for_each_entry(d, &r->domains, list) { hw_dom = resctrl_to_arch_dom(d); - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); for (i = 0; i < hw_res->num_closid; i++) hw_dom->ctrl_val[i] = r->default_ctrl; + msr_param.dom = d; + smp_call_function_any(&d->cpu_mask, rdt_ctrl_update, &msr_param, 1); } - /* Update CBM on all the CPUs in cpu_mask */ - on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1); - - free_cpumask_var(cpu_mask); - return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h b/arch/x86/kernel/cpu/resctrl/trace.h index 428ebbd4270b..2a506316b303 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h +++ b/arch/x86/kernel/cpu/resctrl/trace.h @@ -2,8 +2,8 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM resctrl -#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_PSEUDO_LOCK_H +#if !defined(_TRACE_RESCTRL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RESCTRL_H #include <linux/tracepoint.h> @@ -35,9 +35,25 @@ TRACE_EVENT(pseudo_lock_l3, TP_printk("hits=%llu miss=%llu", __entry->l3_hits, __entry->l3_miss)); -#endif /* _TRACE_PSEUDO_LOCK_H */ +TRACE_EVENT(mon_llc_occupancy_limbo, + TP_PROTO(u32 ctrl_hw_id, u32 mon_hw_id, int domain_id, u64 llc_occupancy_bytes), + TP_ARGS(ctrl_hw_id, mon_hw_id, domain_id, llc_occupancy_bytes), + TP_STRUCT__entry(__field(u32, ctrl_hw_id) + __field(u32, mon_hw_id) + __field(int, domain_id) + __field(u64, llc_occupancy_bytes)), + TP_fast_assign(__entry->ctrl_hw_id = ctrl_hw_id; + __entry->mon_hw_id = mon_hw_id; + __entry->domain_id = domain_id; + __entry->llc_occupancy_bytes = llc_occupancy_bytes;), + TP_printk("ctrl_hw_id=%u mon_hw_id=%u domain_id=%d llc_occupancy_bytes=%llu", + __entry->ctrl_hw_id, __entry->mon_hw_id, __entry->domain_id, + __entry->llc_occupancy_bytes) + ); + +#endif /* _TRACE_RESCTRL_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE pseudo_lock_event +#define TRACE_INCLUDE_FILE trace #include <trace/define_trace.h> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index fc37c8d83daf..f445bec516a0 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -163,6 +163,9 @@ static const __initconst struct idt_data apic_idts[] = { # endif INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt), INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt), +# ifdef CONFIG_X86_POSTED_MSI + INTG(POSTED_MSI_NOTIFICATION_VECTOR, asm_sysvec_posted_msi_notification), +# endif #endif }; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 35fde0107901..385e3a5fc304 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -22,6 +22,8 @@ #include <asm/desc.h> #include <asm/traps.h> #include <asm/thermal.h> +#include <asm/posted_intr.h> +#include <asm/irq_remapping.h> #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> @@ -182,6 +184,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) irq_stats(j)->kvm_posted_intr_wakeup_ipis); seq_puts(p, " Posted-interrupt wakeup event\n"); #endif +#ifdef CONFIG_X86_POSTED_MSI + seq_printf(p, "%*s: ", prec, "PMN"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->posted_msi_notification_count); + seq_puts(p, " Posted MSI notification event\n"); +#endif return 0; } @@ -240,24 +249,16 @@ static __always_inline void handle_irq(struct irq_desc *desc, __handle_irq(desc, regs); } -/* - * common_interrupt() handles all normal device IRQ's (the special SMP - * cross-CPU interrupts have their own entry points). - */ -DEFINE_IDTENTRY_IRQ(common_interrupt) +static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) { - struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc; - - /* entry code tells RCU that we're not quiescent. Check it. */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); + int ret = 0; desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { handle_irq(desc, regs); } else { - apic_eoi(); - + ret = -EINVAL; if (desc == VECTOR_UNUSED) { pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", __func__, smp_processor_id(), @@ -267,6 +268,23 @@ DEFINE_IDTENTRY_IRQ(common_interrupt) } } + return ret; +} + +/* + * common_interrupt() handles all normal device IRQ's (the special SMP + * cross-CPU interrupts have their own entry points). + */ +DEFINE_IDTENTRY_IRQ(common_interrupt) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* entry code tells RCU that we're not quiescent. Check it. */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); + + if (unlikely(call_irq_handler(vector, regs))) + apic_eoi(); + set_irq_regs(old_regs); } @@ -334,12 +352,139 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) } #endif +#ifdef CONFIG_X86_POSTED_MSI + +/* Posted Interrupt Descriptors for coalesced MSIs to be posted */ +DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); + +void intel_posted_msi_init(void) +{ + u32 destination; + u32 apic_id; + + this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); + + /* + * APIC destination ID is stored in bit 8:15 while in XAPIC mode. + * VT-d spec. CH 9.11 + */ + apic_id = this_cpu_read(x86_cpu_to_apicid); + destination = x2apic_enabled() ? apic_id : apic_id << 8; + this_cpu_write(posted_msi_pi_desc.ndst, destination); +} + +/* + * De-multiplexing posted interrupts is on the performance path, the code + * below is written to optimize the cache performance based on the following + * considerations: + * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently + * accessed by both CPU and IOMMU. + * 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg + * for checking and clearing posted interrupt request (PIR), a 256 bit field + * within the PID. + * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache + * line when posting interrupts and setting control bits. + * 4.The CPU can access the cache line a magnitude faster than the IOMMU. + * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID + * cache line. The cache line states after each operation are as follows: + * CPU IOMMU PID Cache line state + * --------------------------------------------------------------- + *...read64 exclusive + *...lock xchg64 modified + *... post/atomic swap invalid + *...------------------------------------------------------------- + * + * To reduce L1 data cache miss, it is important to avoid contention with + * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used + * to dispatch interrupt handlers. + * + * In addition, the code is trying to keep the cache line state consistent + * as much as possible. e.g. when making a copy and clearing the PIR + * (assuming non-zero PIR bits are present in the entire PIR), it does: + * read, read, read, read, xchg, xchg, xchg, xchg + * instead of: + * read, xchg, read, xchg, read, xchg, read, xchg + */ +static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) +{ + int i, vec = FIRST_EXTERNAL_VECTOR; + unsigned long pir_copy[4]; + bool handled = false; + + for (i = 0; i < 4; i++) + pir_copy[i] = pir[i]; + + for (i = 0; i < 4; i++) { + if (!pir_copy[i]) + continue; + + pir_copy[i] = arch_xchg(&pir[i], 0); + handled = true; + } + + if (handled) { + for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) + call_irq_handler(vec, regs); + } + + return handled; +} + +/* + * Performance data shows that 3 is good enough to harvest 90+% of the benefit + * on high IRQ rate workload. + */ +#define MAX_POSTED_MSI_COALESCING_LOOP 3 + +/* + * For MSIs that are delivered as posted interrupts, the CPU notifications + * can be coalesced if the MSIs arrive in high frequency bursts. + */ +DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct pi_desc *pid; + int i = 0; + + pid = this_cpu_ptr(&posted_msi_pi_desc); + + inc_irq_stat(posted_msi_notification_count); + irq_enter(); + + /* + * Max coalescing count includes the extra round of handle_pending_pir + * after clearing the outstanding notification bit. Hence, at most + * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. + */ + while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { + if (!handle_pending_pir(pid->pir64, regs)) + break; + } + + /* + * Clear outstanding notification bit to allow new IRQ notifications, + * do this last to maximize the window of interrupt coalescing. + */ + pi_clear_on(pid); + + /* + * There could be a race of PI notification and the clearing of ON bit, + * process PIR bits one last time such that handling the new interrupts + * are not delayed until the next IRQ. + */ + handle_pending_pir(pid->pir64, regs); + + apic_eoi(); + irq_exit(); + set_irq_regs(old_regs); +} +#endif /* X86_POSTED_MSI */ #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irr, vector; + unsigned int vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; @@ -366,8 +511,7 @@ void fixup_irqs(void) if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; - irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); - if (irr & (1 << (vector % 32))) { + if (is_vector_pending(vector)) { desc = __this_cpu_read(vector_irq[vector]); raw_spin_lock(&desc->lock); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 995f94467101..3342ed58e168 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -648,7 +648,7 @@ static u64 __init get_secrets_page(void) static u64 __init get_snp_jump_table_addr(void) { - struct snp_secrets_page_layout *layout; + struct snp_secrets_page *secrets; void __iomem *mem; u64 pa, addr; @@ -662,9 +662,9 @@ static u64 __init get_snp_jump_table_addr(void) return 0; } - layout = (__force struct snp_secrets_page_layout *)mem; + secrets = (__force struct snp_secrets_page *)mem; - addr = layout->os_area.ap_jump_table_pa; + addr = secrets->os_area.ap_jump_table_pa; iounmap(mem); return addr; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 1123ef3ccf90..4334033658ed 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -193,11 +193,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) cur->warned = false; /* - * If a non-zero TSC value for socket 0 may be valid then the default - * adjusted value cannot assumed to be zero either. + * The default adjust value cannot be assumed to be zero on any socket. */ - if (tsc_async_resets) - cur->adjusted = bootval; + cur->adjusted = bootval; /* * Check whether this CPU is the first in a package to come up. In diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index af662312fd07..ec08fa3caf43 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -107,7 +107,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) * handle task migration (@cpu != vcpu->cpu). */ new.ndst = dest; - new.sn = 0; + __pi_clear_sn(&new); /* * Restore the notification vector; in the blocking case, the @@ -157,7 +157,7 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); - WARN(pi_desc->sn, "PI descriptor SN field set before blocking"); + WARN(pi_test_sn(pi_desc), "PI descriptor SN field set before blocking"); old.control = READ_ONCE(pi_desc->control); do { diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 26992076552e..6b2a0226257e 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -1,98 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_VMX_POSTED_INTR_H #define __KVM_X86_VMX_POSTED_INTR_H - -#define POSTED_INTR_ON 0 -#define POSTED_INTR_SN 1 - -#define PID_TABLE_ENTRY_VALID 1 - -/* Posted-Interrupt Descriptor */ -struct pi_desc { - u32 pir[8]; /* Posted interrupt requested */ - union { - struct { - /* bit 256 - Outstanding Notification */ - u16 on : 1, - /* bit 257 - Suppress Notification */ - sn : 1, - /* bit 271:258 - Reserved */ - rsvd_1 : 14; - /* bit 279:272 - Notification Vector */ - u8 nv; - /* bit 287:280 - Reserved */ - u8 rsvd_2; - /* bit 319:288 - Notification Destination */ - u32 ndst; - }; - u64 control; - }; - u32 rsvd[6]; -} __aligned(64); - -static inline bool pi_test_and_set_on(struct pi_desc *pi_desc) -{ - return test_and_set_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} - -static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) -{ - return test_and_clear_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} - -static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc) -{ - return test_and_clear_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} - -static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) -{ - return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); -} - -static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) -{ - return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); -} - -static inline void pi_set_sn(struct pi_desc *pi_desc) -{ - set_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} - -static inline void pi_set_on(struct pi_desc *pi_desc) -{ - set_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} - -static inline void pi_clear_on(struct pi_desc *pi_desc) -{ - clear_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} - -static inline void pi_clear_sn(struct pi_desc *pi_desc) -{ - clear_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} - -static inline bool pi_test_on(struct pi_desc *pi_desc) -{ - return test_bit(POSTED_INTR_ON, - (unsigned long *)&pi_desc->control); -} - -static inline bool pi_test_sn(struct pi_desc *pi_desc) -{ - return test_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); -} +#include <asm/posted_intr.h> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 22411f4aff53..becefaf95cab 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -70,6 +70,7 @@ #include "x86.h" #include "smm.h" #include "vmx_onhyperv.h" +#include "posted_intr.h" MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -4844,7 +4845,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) * or POSTED_INTR_WAKEUP_VECTOR. */ vmx->pi_desc.nv = POSTED_INTR_VECTOR; - vmx->pi_desc.sn = 1; + __pi_set_sn(&vmx->pi_desc); } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 90f9e4434646..7e483366b31e 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -7,10 +7,10 @@ #include <asm/kvm.h> #include <asm/intel_pt.h> #include <asm/perf_event.h> +#include <asm/posted_intr.h> #include "capabilities.h" #include "../kvm_cache_regs.h" -#include "posted_intr.h" #include "vmcs.h" #include "vmx_ops.h" #include "../cpuid.h" diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 65e9a6e391c0..ce84ba86e69e 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -929,6 +929,8 @@ int memory_add_physaddr_to_nid(u64 start) } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + static int __init cmp_memblk(const void *a, const void *b) { const struct numa_memblk *ma = *(const struct numa_memblk **)a; @@ -1001,5 +1003,3 @@ int __init numa_fill_memblks(u64 start, u64 end) } return 0; } - -#endif diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 59cbc94b6e69..5159c7a22922 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -816,9 +816,10 @@ done: static void emit_mov_imm64(u8 **pprog, u32 dst_reg, const u32 imm32_hi, const u32 imm32_lo) { + u64 imm64 = ((u64)imm32_hi << 32) | (u32)imm32_lo; u8 *prog = *pprog; - if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { + if (is_uimm32(imm64)) { /* * For emitting plain u32, where sign bit must not be * propagated LLVM tends to load imm64 over mov32 @@ -826,6 +827,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, * 'mov %eax, imm32' instead. */ emit_mov_imm32(&prog, false, dst_reg, imm32_lo); + } else if (is_simm32(imm64)) { + emit_mov_imm32(&prog, true, dst_reg, imm32_lo); } else { /* movabsq rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); @@ -1169,6 +1172,54 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, return 0; } +static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size, + u32 dst_reg, u32 src_reg, u32 index_reg, int off) +{ + u8 *prog = *pprog; + + EMIT1(0xF0); /* lock prefix */ + switch (size) { + case BPF_W: + EMIT1(add_3mod(0x40, dst_reg, src_reg, index_reg)); + break; + case BPF_DW: + EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg)); + break; + default: + pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n"); + return -EFAULT; + } + + /* emit opcode */ + switch (atomic_op) { + case BPF_ADD: + case BPF_AND: + case BPF_OR: + case BPF_XOR: + /* lock *(u32/u64*)(dst_reg + idx_reg + off) <op>= src_reg */ + EMIT1(simple_alu_opcodes[atomic_op]); + break; + case BPF_ADD | BPF_FETCH: + /* src_reg = atomic_fetch_add(dst_reg + idx_reg + off, src_reg); */ + EMIT2(0x0F, 0xC1); + break; + case BPF_XCHG: + /* src_reg = atomic_xchg(dst_reg + idx_reg + off, src_reg); */ + EMIT1(0x87); + break; + case BPF_CMPXCHG: + /* r0 = atomic_cmpxchg(dst_reg + idx_reg + off, r0, src_reg); */ + EMIT2(0x0F, 0xB1); + break; + default: + pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); + return -EFAULT; + } + emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off); + *pprog = prog; + return 0; +} + #define DONT_CLEAR 1 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) @@ -1351,8 +1402,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image break; case BPF_ALU64 | BPF_MOV | BPF_X: - if (insn->off == BPF_ADDR_SPACE_CAST && - insn->imm == 1U << 16) { + if (insn_is_cast_user(insn)) { if (dst_reg != src_reg) /* 32-bit mov */ emit_mov_reg(&prog, false, dst_reg, src_reg); @@ -1383,6 +1433,16 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image maybe_emit_mod(&prog, AUX_REG, dst_reg, true); EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg)); break; + } else if (insn_is_mov_percpu_addr(insn)) { + /* mov <dst>, <src> (if necessary) */ + EMIT_mov(dst_reg, src_reg); +#ifdef CONFIG_SMP + /* add <dst>, gs:[<off>] */ + EMIT2(0x65, add_1mod(0x48, dst_reg)); + EMIT3(0x03, add_2reg(0x04, 0, dst_reg), 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); +#endif + break; } fallthrough; case BPF_ALU | BPF_MOV | BPF_X: @@ -1963,6 +2023,15 @@ populate_extable: return err; break; + case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: + case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: + start_of_ldx = prog; + err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code), + dst_reg, src_reg, X86_REG_R12, insn->off); + if (err) + return err; + goto populate_extable; + /* call */ case BPF_JMP | BPF_CALL: { u8 *ip = image + addrs[i - 1]; @@ -2994,12 +3063,9 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } -void arch_protect_bpf_trampoline(void *image, unsigned int size) -{ -} - -void arch_unprotect_bpf_trampoline(void *image, unsigned int size) +int arch_protect_bpf_trampoline(void *image, unsigned int size) { + return 0; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, @@ -3359,6 +3425,11 @@ bool bpf_jit_supports_subprog_tailcalls(void) return true; } +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { @@ -3462,6 +3533,21 @@ bool bpf_jit_supports_arena(void) return true; } +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) +{ + if (!in_arena) + return true; + switch (insn->code) { + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm == (BPF_AND | BPF_FETCH) || + insn->imm == (BPF_OR | BPF_FETCH) || + insn->imm == (BPF_XOR | BPF_FETCH)) + return false; + } + return true; +} + bool bpf_jit_supports_ptr_xchg(void) { return true; diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index c10083a8e68e..de0f9e5f9f73 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2600,8 +2600,7 @@ out_image: if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, proglen, pass + 1, image); - if (image) { - bpf_jit_binary_lock_ro(header); + if (image && !bpf_jit_binary_lock_ro(header)) { prog->bpf_func = (void *)image; prog->jited = 1; prog->jited_len = proglen; |