diff options
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi | 8 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts | 10 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts | 16 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/armada-7040-mochabin.dts | 16 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts | 4 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi | 24 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/armada-8040-puzzle-m801.dts | 16 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/cn9130-crb.dtsi | 6 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/cn9130-db.dtsi | 8 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/cn9131-db.dtsi | 8 | ||||
-rw-r--r-- | arch/arm64/boot/dts/marvell/cn9132-db.dtsi | 8 | ||||
-rw-r--r-- | arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 48 | ||||
-rw-r--r-- | arch/arm64/include/asm/insn.h | 3 | ||||
-rw-r--r-- | arch/arm64/lib/insn.c | 30 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit.h | 7 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 724 |
16 files changed, 852 insertions, 84 deletions
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi index 41702e7386e3..a7dcbecc1f41 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-clearfog-itx.dtsi @@ -34,28 +34,28 @@ sfp0: sfp-0 { compatible = "sff,sfp"; i2c-bus = <&sfp0_i2c>; - mod-def0-gpio = <&gpio2 0 GPIO_ACTIVE_LOW>; + mod-def0-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; maximum-power-milliwatt = <2000>; }; sfp1: sfp-1 { compatible = "sff,sfp"; i2c-bus = <&sfp1_i2c>; - mod-def0-gpio = <&gpio2 9 GPIO_ACTIVE_LOW>; + mod-def0-gpios = <&gpio2 9 GPIO_ACTIVE_LOW>; maximum-power-milliwatt = <2000>; }; sfp2: sfp-2 { compatible = "sff,sfp"; i2c-bus = <&sfp2_i2c>; - mod-def0-gpio = <&gpio2 10 GPIO_ACTIVE_LOW>; + mod-def0-gpios = <&gpio2 10 GPIO_ACTIVE_LOW>; maximum-power-milliwatt = <2000>; }; sfp3: sfp-3 { compatible = "sff,sfp"; i2c-bus = <&sfp3_i2c>; - mod-def0-gpio = <&gpio2 11 GPIO_ACTIVE_LOW>; + mod-def0-gpios = <&gpio2 11 GPIO_ACTIVE_LOW>; maximum-power-milliwatt = <2000>; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index de8d0cfa4cb4..ada164d423f3 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -100,11 +100,11 @@ sfp: sfp { compatible = "sff,sfp"; i2c-bus = <&i2c0>; - los-gpio = <&moxtet_sfp 0 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&moxtet_sfp 1 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&moxtet_sfp 2 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&moxtet_sfp 4 GPIO_ACTIVE_HIGH>; - rate-select0-gpio = <&moxtet_sfp 5 GPIO_ACTIVE_HIGH>; + los-gpios = <&moxtet_sfp 0 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&moxtet_sfp 1 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&moxtet_sfp 2 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&moxtet_sfp 4 GPIO_ACTIVE_HIGH>; + rate-select0-gpios = <&moxtet_sfp 5 GPIO_ACTIVE_HIGH>; maximum-power-milliwatt = <3000>; /* enabled by U-Boot if SFP module is present */ diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts index a35317d24d6c..b20c8e7d923b 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts @@ -65,20 +65,20 @@ sfp_eth0: sfp-eth0 { compatible = "sff,sfp"; i2c-bus = <&i2c0>; - los-gpio = <&gpiosb 2 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&gpiosb 3 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&gpiosb 4 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&gpiosb 5 GPIO_ACTIVE_HIGH>; + los-gpios = <&gpiosb 2 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&gpiosb 4 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&gpiosb 5 GPIO_ACTIVE_HIGH>; maximum-power-milliwatt = <3000>; }; sfp_eth1: sfp-eth1 { compatible = "sff,sfp"; i2c-bus = <&i2c1>; - los-gpio = <&gpiosb 7 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&gpiosb 8 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&gpiosb 9 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&gpiosb 10 GPIO_ACTIVE_HIGH>; + los-gpios = <&gpiosb 7 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&gpiosb 8 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&gpiosb 9 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&gpiosb 10 GPIO_ACTIVE_HIGH>; maximum-power-milliwatt = <3000>; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-7040-mochabin.dts b/arch/arm64/boot/dts/marvell/armada-7040-mochabin.dts index b9ba7c452a77..7ca71f2d7afb 100644 --- a/arch/arm64/boot/dts/marvell/armada-7040-mochabin.dts +++ b/arch/arm64/boot/dts/marvell/armada-7040-mochabin.dts @@ -34,20 +34,20 @@ sfp_eth0: sfp-eth0 { compatible = "sff,sfp"; i2c-bus = <&cp0_i2c1>; - los-gpio = <&sfp_gpio 3 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&sfp_gpio 2 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&sfp_gpio 1 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&sfp_gpio 0 GPIO_ACTIVE_HIGH>; + los-gpios = <&sfp_gpio 3 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&sfp_gpio 2 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&sfp_gpio 1 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&sfp_gpio 0 GPIO_ACTIVE_HIGH>; }; /* SFP 1G */ sfp_eth2: sfp-eth2 { compatible = "sff,sfp"; i2c-bus = <&cp0_i2c0>; - los-gpio = <&sfp_gpio 7 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&sfp_gpio 6 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&sfp_gpio 5 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&sfp_gpio 4 GPIO_ACTIVE_HIGH>; + los-gpios = <&sfp_gpio 7 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&sfp_gpio 6 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&sfp_gpio 5 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&sfp_gpio 4 GPIO_ACTIVE_HIGH>; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index 15f6ca4df121..4125202028c8 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -64,8 +64,8 @@ sfp_cp0_eth0: sfp-cp0-eth0 { compatible = "sff,sfp"; i2c-bus = <&cp0_i2c1>; - mod-def0-gpio = <&cp0_gpio2 17 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp1_gpio1 29 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp0_gpio2 17 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp1_gpio1 29 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp0_sfp_present_pins &cp1_sfp_tx_disable_pins>; maximum-power-milliwatt = <2000>; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi index c0389dd17340..ca1aeb69a892 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi @@ -65,10 +65,10 @@ /* CON15,16 - CPM lane 4 */ compatible = "sff,sfp"; i2c-bus = <&sfpp0_i2c>; - los-gpio = <&cp1_gpio1 28 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&cp1_gpio1 27 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp1_gpio1 29 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp1_gpio1 26 GPIO_ACTIVE_HIGH>; + los-gpios = <&cp1_gpio1 28 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp1_gpio1 27 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp1_gpio1 29 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp1_gpio1 26 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfpp0_pins>; maximum-power-milliwatt = <2000>; @@ -78,10 +78,10 @@ /* CON17,18 - CPS lane 4 */ compatible = "sff,sfp"; i2c-bus = <&sfpp1_i2c>; - los-gpio = <&cp1_gpio1 8 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&cp1_gpio1 11 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp1_gpio1 10 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp0_gpio2 30 GPIO_ACTIVE_HIGH>; + los-gpios = <&cp1_gpio1 8 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp1_gpio1 11 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp1_gpio1 10 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp0_gpio2 30 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfpp1_pins &cp0_sfpp1_pins>; maximum-power-milliwatt = <2000>; @@ -91,10 +91,10 @@ /* CON13,14 - CPS lane 5 */ compatible = "sff,sfp"; i2c-bus = <&sfp_1g_i2c>; - los-gpio = <&cp0_gpio2 22 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&cp0_gpio2 21 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp1_gpio1 24 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp0_gpio2 19 GPIO_ACTIVE_HIGH>; + los-gpios = <&cp0_gpio2 22 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp0_gpio2 21 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp1_gpio1 24 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp0_gpio2 19 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp0_sfp_1g_pins &cp1_sfp_1g_pins>; maximum-power-milliwatt = <2000>; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-puzzle-m801.dts b/arch/arm64/boot/dts/marvell/armada-8040-puzzle-m801.dts index cf868e0bbb9c..eb0473503936 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-puzzle-m801.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-puzzle-m801.dts @@ -67,20 +67,20 @@ sfp_cp0_eth0: sfp-cp0-eth0 { compatible = "sff,sfp"; i2c-bus = <&sfpplus0_i2c>; - los-gpio = <&sfpplus_gpio 11 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&sfpplus_gpio 10 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&sfpplus_gpio 9 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&sfpplus_gpio 8 GPIO_ACTIVE_HIGH>; + los-gpios = <&sfpplus_gpio 11 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&sfpplus_gpio 10 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&sfpplus_gpio 9 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&sfpplus_gpio 8 GPIO_ACTIVE_HIGH>; maximum-power-milliwatt = <3000>; }; sfp_cp1_eth0: sfp-cp1-eth0 { compatible = "sff,sfp"; i2c-bus = <&sfpplus1_i2c>; - los-gpio = <&sfpplus_gpio 3 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&sfpplus_gpio 2 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&sfpplus_gpio 1 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&sfpplus_gpio 0 GPIO_ACTIVE_HIGH>; + los-gpios = <&sfpplus_gpio 3 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&sfpplus_gpio 2 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&sfpplus_gpio 1 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&sfpplus_gpio 0 GPIO_ACTIVE_HIGH>; maximum-power-milliwatt = <3000>; }; diff --git a/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi b/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi index 1acd746284dc..8e4ec243fb8f 100644 --- a/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi @@ -78,9 +78,9 @@ compatible = "sff,sfp"; i2c-bus = <&cp0_i2c1>; mod-def0-gpios = <&expander0 3 GPIO_ACTIVE_LOW>; - los-gpio = <&expander0 15 GPIO_ACTIVE_HIGH>; - tx-disable-gpio = <&expander0 2 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp0_gpio1 24 GPIO_ACTIVE_HIGH>; + los-gpios = <&expander0 15 GPIO_ACTIVE_HIGH>; + tx-disable-gpios = <&expander0 2 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp0_gpio1 24 GPIO_ACTIVE_HIGH>; maximum-power-milliwatt = <3000>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/marvell/cn9130-db.dtsi b/arch/arm64/boot/dts/marvell/cn9130-db.dtsi index f58402eb9536..c7de1ea0d470 100644 --- a/arch/arm64/boot/dts/marvell/cn9130-db.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9130-db.dtsi @@ -90,10 +90,10 @@ cp0_sfp_eth0: sfp-eth@0 { compatible = "sff,sfp"; i2c-bus = <&cp0_sfpp0_i2c>; - los-gpio = <&cp0_module_expander1 11 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&cp0_module_expander1 10 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp0_module_expander1 9 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp0_module_expander1 8 GPIO_ACTIVE_HIGH>; + los-gpios = <&cp0_module_expander1 11 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp0_module_expander1 10 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp0_module_expander1 9 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp0_module_expander1 8 GPIO_ACTIVE_HIGH>; /* * SFP cages are unconnected on early PCBs because of an the I2C * lanes not being connected. Prevent the port for being diff --git a/arch/arm64/boot/dts/marvell/cn9131-db.dtsi b/arch/arm64/boot/dts/marvell/cn9131-db.dtsi index b7fc241a228c..ff8422fae31b 100644 --- a/arch/arm64/boot/dts/marvell/cn9131-db.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9131-db.dtsi @@ -37,10 +37,10 @@ cp1_sfp_eth1: sfp-eth1 { compatible = "sff,sfp"; i2c-bus = <&cp1_i2c0>; - los-gpio = <&cp1_gpio1 11 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&cp1_gpio1 10 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp1_gpio1 9 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp1_gpio1 8 GPIO_ACTIVE_HIGH>; + los-gpios = <&cp1_gpio1 11 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp1_gpio1 10 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp1_gpio1 9 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp1_gpio1 8 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfp_pins>; /* diff --git a/arch/arm64/boot/dts/marvell/cn9132-db.dtsi b/arch/arm64/boot/dts/marvell/cn9132-db.dtsi index 3f1795fb4fe7..512a4fa2861e 100644 --- a/arch/arm64/boot/dts/marvell/cn9132-db.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9132-db.dtsi @@ -57,10 +57,10 @@ cp2_sfp_eth0: sfp-eth0 { compatible = "sff,sfp"; i2c-bus = <&cp2_sfpp0_i2c>; - los-gpio = <&cp2_module_expander1 11 GPIO_ACTIVE_HIGH>; - mod-def0-gpio = <&cp2_module_expander1 10 GPIO_ACTIVE_LOW>; - tx-disable-gpio = <&cp2_module_expander1 9 GPIO_ACTIVE_HIGH>; - tx-fault-gpio = <&cp2_module_expander1 8 GPIO_ACTIVE_HIGH>; + los-gpios = <&cp2_module_expander1 11 GPIO_ACTIVE_HIGH>; + mod-def0-gpios = <&cp2_module_expander1 10 GPIO_ACTIVE_LOW>; + tx-disable-gpios = <&cp2_module_expander1 9 GPIO_ACTIVE_HIGH>; + tx-fault-gpios = <&cp2_module_expander1 8 GPIO_ACTIVE_HIGH>; /* * SFP cages are unconnected on early PCBs because of an the I2C * lanes not being connected. Prevent the port for being diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index 1d3ffbf3cde8..5e34bd0b214d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -458,6 +458,54 @@ status = "okay"; }; +&mdio0 { + #address-cells = <1>; + #size-cells = <0>; + + switch@0 { + compatible = "mediatek,mt7531"; + reg = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@1 { + reg = <1>; + label = "lan0"; + }; + + port@2 { + reg = <2>; + label = "lan1"; + }; + + port@3 { + reg = <3>; + label = "lan2"; + }; + + port@4 { + reg = <4>; + label = "lan3"; + }; + + port@5 { + reg = <5>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "rgmii"; + + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + }; + }; +}; + &mdio1 { rgmii_phy1: ethernet-phy@0 { compatible = "ethernet-phy-ieee802.3-c22"; diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 6aa2dc836db1..834bff720582 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -510,6 +510,9 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, unsigned int imm, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit); u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 695d7368fadc..49e972beeac7 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -323,7 +323,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, return insn; } -static inline long branch_imm_common(unsigned long pc, unsigned long addr, +static inline long label_imm_common(unsigned long pc, unsigned long addr, long range) { long offset; @@ -354,7 +354,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, * ARM64 virtual address arrangement guarantees all kernel and module * texts are within +/-128M. */ - offset = branch_imm_common(pc, addr, SZ_128M); + offset = label_imm_common(pc, addr, SZ_128M); if (offset >= SZ_128M) return AARCH64_BREAK_FAULT; @@ -382,7 +382,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset; - offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); if (offset >= SZ_1M) return AARCH64_BREAK_FAULT; @@ -421,7 +421,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset; - offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); insn = aarch64_insn_get_bcond_value(); @@ -543,6 +543,28 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); } +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit) +{ + u32 insn; + long offset; + + offset = label_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_ldr_lit_value(); + + if (is64bit) + insn |= BIT(30); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, + offset >> 2); +} + u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 194c95ccc1cf..a6acb94ea3d6 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -80,6 +80,12 @@ #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) +/* LDR (literal) */ +#define A64_LDR32LIT(Wt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Wt, false) +#define A64_LDR64LIT(Xt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Xt, true) + /* Load/store register pair */ #define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \ @@ -270,6 +276,7 @@ #define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) #define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) #define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC) +#define A64_NOP A64_HINT(AARCH64_INSN_HINT_NOP) /* DMB */ #define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 42f2e9a8616c..7ca8779ae34f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/memory.h> #include <linux/printk.h> #include <linux/slab.h> @@ -18,6 +19,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/insn.h> +#include <asm/patching.h> #include <asm/set_memory.h> #include "bpf_jit.h" @@ -78,6 +80,15 @@ struct jit_ctx { int fpb_offset; }; +struct bpf_plt { + u32 insn_ldr; /* load target */ + u32 insn_br; /* branch to target */ + u64 target; /* target value */ +}; + +#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) +#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) + static inline void emit(const u32 insn, struct jit_ctx *ctx) { if (ctx->image != NULL) @@ -140,6 +151,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } } +static inline void emit_bti(u32 insn, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(insn, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -159,6 +176,14 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } +static inline void emit_call(u64 target, struct jit_ctx *ctx) +{ + u8 tmp = bpf2a64[TMP_REG_1]; + + emit_addr_mov_i64(tmp, target, ctx); + emit(A64_BLR(tmp), ctx); +} + static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { @@ -235,17 +260,35 @@ static bool is_lsi_offset(int offset, int scale) return true; } +/* generated prologue: + * bti c // if CONFIG_ARM64_BTI_KERNEL + * mov x9, lr + * nop // POKE_OFFSET + * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL + * stp x29, lr, [sp, #-16]! + * mov x29, sp + * stp x19, x20, [sp, #-16]! + * stp x21, x22, [sp, #-16]! + * stp x25, x26, [sp, #-16]! + * stp x27, x28, [sp, #-16]! + * mov x25, sp + * mov tcc, #0 + * // PROLOGUE_OFFSET + */ + +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) +#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) + +/* Offset of nop instruction in bpf prog entry to be poked */ +#define POKE_OFFSET (BTI_INSNS + 1) + /* Tail call offset to jump into */ -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ - IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) -#define PROLOGUE_OFFSET 9 -#else -#define PROLOGUE_OFFSET 8 -#endif +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { const struct bpf_prog *prog = ctx->prog; + const bool is_main_prog = prog->aux->func_idx == 0; const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; @@ -279,12 +322,14 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + emit_bti(A64_BTI_C, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); + /* Sign lr */ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) emit(A64_PACIASP, ctx); - /* BTI landing pad */ - else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_C, ctx); /* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); @@ -299,7 +344,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); - if (!ebpf_from_cbpf) { + if (!ebpf_from_cbpf && is_main_prog) { /* Initialize tail_call_cnt */ emit(A64_MOVZ(1, tcc, 0, 0), ctx); @@ -311,8 +356,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) } /* BTI landing pad for the tail call, done with a BR */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_J, ctx); + emit_bti(A64_BTI_J, ctx); } emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); @@ -556,6 +600,53 @@ static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .global dummy_tramp\n" +" .type dummy_tramp, %function\n" +"dummy_tramp:" +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +" bti j\n" /* dummy_tramp is called via "br x10" */ +#endif +" mov x10, x30\n" +" mov x30, x9\n" +" ret x10\n" +" .size dummy_tramp, .-dummy_tramp\n" +" .popsection\n" +); + +/* build a plt initialized like this: + * + * plt: + * ldr tmp, target + * br tmp + * target: + * .quad dummy_tramp + * + * when a long jump trampoline is attached, target is filled with the + * trampoline address, and when the trampoline is removed, target is + * restored to dummy_tramp address. + */ +static void build_plt(struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + struct bpf_plt *plt = NULL; + + /* make sure target is 64-bit aligned */ + if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) + emit(A64_NOP, ctx); + + plt = (struct bpf_plt *)(ctx->image + ctx->idx); + /* plt is called via bl, no BTI needed here */ + emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); + emit(A64_BR(tmp), ctx); + + if (ctx->image) + plt->target = (u64)&dummy_tramp; +} + static void build_epilogue(struct jit_ctx *ctx) { const u8 r0 = bpf2a64[BPF_REG_0]; @@ -990,8 +1081,7 @@ emit_cond_jmp: &func_addr, &func_addr_fixed); if (ret < 0) return ret; - emit_addr_mov_i64(tmp, func_addr, ctx); - emit(A64_BLR(tmp), ctx); + emit_call(func_addr, ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; } @@ -1335,6 +1425,13 @@ static int validate_code(struct jit_ctx *ctx) if (a64_insn == AARCH64_BREAK_FAULT) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) return -1; @@ -1355,7 +1452,7 @@ struct arm64_jit_data { struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { - int image_size, prog_size, extable_size; + int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; @@ -1425,13 +1522,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + build_plt(&ctx); + extable_align = __alignof__(struct exception_table_entry); extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); /* Now we know the actual image size. */ prog_size = sizeof(u32) * ctx.idx; - image_size = prog_size + extable_size; + /* also allocate space for plt target */ + extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); + image_size = extable_offset + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -1443,7 +1544,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.image = (__le32 *)image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + prog_size; + prog->aux->extable = (void *)image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; @@ -1457,9 +1558,10 @@ skip_init_ctx: } build_epilogue(&ctx); + build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; @@ -1530,3 +1632,589 @@ void bpf_jit_free_exec(void *addr) { return vfree(addr); } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} + +static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int args_off, int retval_off, int run_ctx_off, + bool save_ret) +{ + u32 *branch; + u64 enter_prog; + u64 exit_prog; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + if (p->aux->sleepable) { + enter_prog = (u64)__bpf_prog_enter_sleepable; + exit_prog = (u64)__bpf_prog_exit_sleepable; + } else { + enter_prog = (u64)__bpf_prog_enter; + exit_prog = (u64)__bpf_prog_exit; + } + + if (l->cookie == 0) { + /* if cookie is zero, one instruction is enough to store it */ + emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); + } else { + emit_a64_mov_i64(A64_R(10), l->cookie, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), + ctx); + } + + /* save p to callee saved register x19 to avoid loading p with mov_i64 + * each time. + */ + emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: &run_ctx */ + emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); + + emit_call(enter_prog, ctx); + + /* if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + */ + branch = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + + emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + if (!p->jited) + emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); + + emit_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + + if (ctx->image) { + int offset = &ctx->image[ctx->idx] - branch; + *branch = A64_CBZ(1, A64_R(0), offset); + } + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: start time */ + emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); + /* arg3: &run_ctx */ + emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); + + emit_call(exit_prog, ctx); +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, + u32 **branches) +{ + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); + for (i = 0; i < tl->nr_links; i++) { + invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + run_ctx_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); + /* Save the location of branch, and generate a nop. + * This nop will be replaced with a cbnz later. + */ + branches[i] = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } +} + +static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_STR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_LDR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). + * + * bpf prog and function entry before bpf trampoline hooked: + * mov x9, lr + * nop + * + * bpf prog and function entry after bpf trampoline hooked: + * mov x9, lr + * bl <bpf_trampoline or plt> + * + */ +static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_links *tlinks, void *orig_call, + int nargs, u32 flags) +{ + int i; + int stack_size; + int retaddr_off; + int regs_off; + int retval_off; + int args_off; + int nargs_off; + int ip_off; + int run_ctx_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + u32 **branches = NULL; + + /* trampoline stack layout: + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] + * + * [ padding ] align SP to multiples of 16 + * + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 + * + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * + * [ argN ] + * [ ... ] + * SP + args_off [ arg1 ] + * + * SP + nargs_off [ args count ] + * + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * + * SP + run_ctx_off [ bpf_tramp_run_ctx ] + */ + + stack_size = 0; + run_ctx_off = stack_size; + /* room for bpf_tramp_run_ctx */ + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + + ip_off = stack_size; + /* room for IP address argument */ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; + + nargs_off = stack_size; + /* room for args count */ + stack_size += 8; + + args_off = stack_size; + /* room for args */ + stack_size += nargs * 8; + + /* room for return value */ + retval_off = stack_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + + /* room for callee saved registers, currently x19 and x20 are used */ + regs_off = stack_size; + stack_size += 16; + + /* round up to multiples of 16 to avoid SPAlignmentFault */ + stack_size = round_up(stack_size, 16); + + /* return address locates above FP */ + retaddr_off = stack_size + 8; + + /* bpf trampoline may be invoked by 3 instruction types: + * 1. bl, attached to bpf prog or kernel function via short jump + * 2. br, attached to bpf prog or kernel function via long jump + * 3. blr, working as a function pointer, used by struct_ops. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* frame for patched function */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* allocate stack space */ + emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + + if (flags & BPF_TRAMP_F_IP_ARG) { + /* save ip address of the traced function */ + emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); + } + + /* save args count*/ + emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx); + + /* save args */ + save_args(ctx, args_off, nargs); + + /* save callee saved registers */ + emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_links; i++) + invoke_bpf_prog(ctx, fentry->links[i], args_off, + retval_off, run_ctx_off, + flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, args_off, nargs); + /* call original func */ + emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); + emit(A64_BLR(A64_R(10)), ctx); + /* store return value */ + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } + + /* update the branches saved in invoke_bpf_mod_ret with cbnz */ + for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { + int offset = &ctx->image[ctx->idx] - branches[i]; + *branches[i] = A64_CBNZ(1, A64_R(10), offset); + } + + for (i = 0; i < fexit->nr_links; i++) + invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + run_ctx_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->image + ctx->idx; + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, args_off, nargs); + + /* restore callee saved register x19 and x20 */ + emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (save_ret) + emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); + + /* reset SP */ + emit(A64_MOV(1, A64_SP, A64_FP), ctx); + + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } + + if (ctx->image) + bpf_flush_icache(ctx->image, ctx->image + ctx->idx); + + kfree(branches); + + return ctx->idx; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *orig_call) +{ + int ret; + int nargs = m->nr_args; + int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + + /* the first 8 arguments are passed by registers */ + if (nargs > 8) + return -ENOTSUPP; + + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + if (ret < 0) + return ret; + + if (ret > max_insns) + return -EFBIG; + + ctx.image = image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(image_end - image)); + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + + if (ret > 0 && validate_code(&ctx) < 0) + ret = -EINVAL; + + if (ret > 0) + ret *= AARCH64_INSN_SIZE; + + return ret; +} + +static bool is_long_jump(void *ip, void *target) +{ + long offset; + + /* NULL target means this is a NOP */ + if (!target) + return false; + + offset = (long)target - (long)ip; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, void *plt, u32 *insn) +{ + void *target; + + if (!addr) { + *insn = aarch64_insn_gen_nop(); + return 0; + } + + if (is_long_jump(ip, addr)) + target = plt; + else + target = addr; + + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)target, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf + * trampoline with the branch instruction from @ip to @new_addr. If @old_addr + * or @new_addr is NULL, the old or new instruction is NOP. + * + * When @ip is the bpf prog entry, a bpf trampoline is being attached or + * detached. Since bpf trampoline and bpf prog are allocated separately with + * vmalloc, the address distance may exceed 128MB, the maximum branch range. + * So long jump should be handled. + * + * When a bpf prog is constructed, a plt pointing to empty trampoline + * dummy_tramp is placed at the end: + * + * bpf_prog: + * mov x9, lr + * nop // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * This is also the state when no trampoline is attached. + * + * When a short-jump bpf trampoline is attached, the patchsite is patched + * to a bl instruction to the trampoline directly: + * + * bpf_prog: + * mov x9, lr + * bl <short-jump bpf trampoline address> // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * When a long-jump bpf trampoline is attached, the plt target is filled with + * the trampoline address and the patchsite is patched to a bl instruction to + * the plt: + * + * bpf_prog: + * mov x9, lr + * bl plt // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad <long-jump bpf trampoline address> // plt target + * + * The dummy_tramp is used to prevent another CPU from jumping to unknown + * locations during the patching process, making the patching process easier. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + struct bpf_plt *plt = NULL; + unsigned long size = 0UL; + unsigned long offset = ~0UL; + enum aarch64_insn_branch_type branch_type; + char namebuf[KSYM_NAME_LEN]; + void *image = NULL; + u64 plt_target = 0ULL; + bool poking_bpf_entry; + + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + /* Only poking bpf text is supported. Since kernel function + * entry is set up by ftrace, we reply on ftrace to poke kernel + * functions. + */ + return -ENOTSUPP; + + image = ip - offset; + /* zero offset means we're poking bpf prog entry */ + poking_bpf_entry = (offset == 0UL); + + /* bpf prog entry, find plt and the real patchsite */ + if (poking_bpf_entry) { + /* plt locates at the end of bpf prog */ + plt = image + size - PLT_TARGET_OFFSET; + + /* skip to the nop instruction in bpf prog entry: + * bti c // if BTI enabled + * mov x9, x30 + * nop + */ + ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; + } + + /* long jump is only possible at bpf prog entry */ + if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && + !poking_bpf_entry)) + return -EINVAL; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) + return -EFAULT; + + if (is_long_jump(ip, new_addr)) + plt_target = (u64)new_addr; + else if (is_long_jump(ip, old_addr)) + /* if the old target is a long jump and the new target is not, + * restore the plt target to dummy_tramp, so there is always a + * legal and harmless address stored in plt target, and we'll + * never jump from plt to an unknown place. + */ + plt_target = (u64)&dummy_tramp; + + if (plt_target) { + /* non-zero plt_target indicates we're patching a bpf prog, + * which is read only. + */ + if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) + return -EFAULT; + WRITE_ONCE(plt->target, plt_target); + set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); + /* since plt target points to either the new trampoline + * or dummy_tramp, even if another CPU reads the old plt + * target value before fetching the bl instruction to plt, + * it will be brought back by dummy_tramp, so no barrier is + * required here. + */ + } + + /* if the old target and the new target are both long jumps, no + * patching is required + */ + if (old_insn == new_insn) + return 0; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + /* We call aarch64_insn_patch_text_nosync() to replace instruction + * atomically, so no other CPUs will fetch a half-new and half-old + * instruction. But there is chance that another CPU executes the + * old instruction after the patching operation finishes (e.g., + * pipeline not flushed, or icache not synchronized yet). + * + * 1. when a new trampoline is attached, it is not a problem for + * different CPUs to jump to different trampolines temporarily. + * + * 2. when an old trampoline is freed, we should wait for all other + * CPUs to exit the trampoline and make sure the trampoline is no + * longer reachable, since bpf_tramp_image_put() function already + * uses percpu_ref and task-based rcu to do the sync, no need to call + * the sync version here, see bpf_tramp_image_put() for details. + */ + ret = aarch64_insn_patch_text_nosync(ip, new_insn); +out: + mutex_unlock(&text_mutex); + + return ret; +} |