diff options
Diffstat (limited to 'arch/arm')
62 files changed, 1102 insertions, 1231 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 843edfd000be..f69613fd4e68 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -9,6 +9,7 @@ config ARM select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL if ARM_LPAE select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_SET_MEMORY @@ -298,10 +299,6 @@ config PGTABLE_LEVELS default 3 if ARM_LPAE default 2 -source "init/Kconfig" - -source "kernel/Kconfig.freezer" - menu "System Type" config MMU @@ -337,8 +334,8 @@ config ARCH_MULTIPLATFORM select TIMER_OF select COMMON_CLK select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_MULTI_HANDLER select MIGHT_HAVE_PCI - select MULTI_IRQ_HANDLER select PCI_DOMAINS if PCI select SPARSE_IRQ select USE_OF @@ -465,9 +462,9 @@ config ARCH_DOVE bool "Marvell Dove" select CPU_PJ4 select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB select MIGHT_HAVE_PCI - select MULTI_IRQ_HANDLER select MVEBU_MBUS select PINCTRL select PINCTRL_DOVE @@ -512,8 +509,8 @@ config ARCH_LPC32XX select COMMON_CLK select CPU_ARM926T select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select MULTI_IRQ_HANDLER select SPARSE_IRQ select USE_OF help @@ -532,11 +529,11 @@ config ARCH_PXA select TIMER_OF select CPU_XSCALE if !CPU_XSC3 select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_MULTI_HANDLER select GPIO_PXA select GPIOLIB select HAVE_IDE select IRQ_DOMAIN - select MULTI_IRQ_HANDLER select PLAT_PXA select SPARSE_IRQ help @@ -572,11 +569,11 @@ config ARCH_SA1100 select CPU_FREQ select CPU_SA1100 select GENERIC_CLOCKEVENTS + select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB select HAVE_IDE select IRQ_DOMAIN select ISA - select MULTI_IRQ_HANDLER select NEED_MACH_MEMORY_H select SPARSE_IRQ help @@ -590,10 +587,10 @@ config ARCH_S3C24XX select GENERIC_CLOCKEVENTS select GPIO_SAMSUNG select GPIOLIB + select GENERIC_IRQ_MULTI_HANDLER select HAVE_S3C2410_I2C if I2C select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS - select MULTI_IRQ_HANDLER select NEED_MACH_IO_H select SAMSUNG_ATAGS select USE_OF @@ -627,10 +624,10 @@ config ARCH_OMAP1 select CLKSRC_MMIO select GENERIC_CLOCKEVENTS select GENERIC_IRQ_CHIP + select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB select HAVE_IDE select IRQ_DOMAIN - select MULTI_IRQ_HANDLER select NEED_MACH_IO_H if PCCARD select NEED_MACH_MEMORY_H select SPARSE_IRQ @@ -921,11 +918,6 @@ config IWMMXT Enable support for iWMMXt context switching at run time if running on a CPU that supports it. -config MULTI_IRQ_HANDLER - bool - help - Allow each machine to specify it's own IRQ handler at run time. - if !MMU source "arch/arm/Kconfig-nommu" endif @@ -1485,8 +1477,6 @@ config ARCH_NR_GPIO If unsure, leave the default value. -source kernel/Kconfig.preempt - config HZ_FIXED int default 200 if ARCH_EBSA110 @@ -1721,8 +1711,6 @@ config ARM_MODULE_PLTS Disabling this is usually safe for small single-platform configurations. If unsure, say y. -source "mm/Kconfig" - config FORCE_MAX_ZONEORDER int "Maximum zone order" default "12" if SOC_AM33XX @@ -2175,12 +2163,6 @@ config KERNEL_MODE_NEON endmenu -menu "Userspace binary formats" - -source "fs/Kconfig.binfmt" - -endmenu - menu "Power management options" source "kernel/power/Kconfig" @@ -2201,23 +2183,10 @@ config ARCH_HIBERNATION_POSSIBLE endmenu -source "net/Kconfig" - -source "drivers/Kconfig" - source "drivers/firmware/Kconfig" -source "fs/Kconfig" - -source "arch/arm/Kconfig.debug" - -source "security/Kconfig" - -source "crypto/Kconfig" if CRYPTO source "arch/arm/crypto/Kconfig" endif -source "lib/Kconfig" - source "arch/arm/kvm/Kconfig" diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 693f84392f1b..b48dc083d1b1 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config ARM_PTDUMP_CORE def_bool n @@ -1863,5 +1860,3 @@ config PID_IN_CONTEXTIDR are planning to use hardware trace tools with this kernel. source "drivers/hwtracing/coresight/Kconfig" - -endmenu diff --git a/arch/arm/Makefile b/arch/arm/Makefile index fc26c3d7b9b6..e7d703d8fac3 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,9 +10,6 @@ # # Copyright (C) 1995-2001 by Russell King -# Ensure linker flags are correct -LDFLAGS := - LDFLAGS_vmlinux :=-p --no-undefined -X --pic-veneer ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 @@ -46,12 +43,12 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__ARMEB__ AS += -EB -LD += -EB +LDFLAGS += -EB else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__ARMEL__ AS += -EL -LD += -EL +LDFLAGS += -EL endif # diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index fb5c954ab95a..6f258b50eb44 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -156,6 +156,100 @@ }; }; + /* This is a RealTek RTL8366RB switch and PHY using SMI over GPIO */ + switch { + compatible = "realtek,rtl8366rb"; + /* 22 = MDIO (has input reads), 21 = MDC (clock, output only) */ + mdc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>; + mdio-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio0 14 GPIO_ACTIVE_LOW>; + realtek,disable-leds; + + switch_intc: interrupt-controller { + /* GPIO 15 provides the interrupt */ + interrupt-parent = <&gpio0>; + interrupts = <15 IRQ_TYPE_LEVEL_LOW>; + interrupt-controller; + #address-cells = <0>; + #interrupt-cells = <1>; + }; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "lan0"; + phy-handle = <&phy0>; + }; + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&phy1>; + }; + port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&phy2>; + }; + port@3 { + reg = <3>; + label = "lan3"; + phy-handle = <&phy3>; + }; + port@4 { + reg = <4>; + label = "wan"; + phy-handle = <&phy4>; + }; + rtl8366rb_cpu_port: port@5 { + reg = <5>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "rgmii"; + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + + }; + + mdio { + compatible = "realtek,smi-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + phy0: phy@0 { + reg = <0>; + interrupt-parent = <&switch_intc>; + interrupts = <0>; + }; + phy1: phy@1 { + reg = <1>; + interrupt-parent = <&switch_intc>; + interrupts = <1>; + }; + phy2: phy@2 { + reg = <2>; + interrupt-parent = <&switch_intc>; + interrupts = <2>; + }; + phy3: phy@3 { + reg = <3>; + interrupt-parent = <&switch_intc>; + interrupts = <3>; + }; + phy4: phy@4 { + reg = <4>; + interrupt-parent = <&switch_intc>; + interrupts = <12>; + }; + }; + }; + soc { flash@30000000 { /* @@ -223,10 +317,12 @@ * gpio0bgrp cover line 7 used by WPS LED * gpio0cgrp cover line 8, 13 used by keys * and 11, 12 used by the HD LEDs + * and line 14, 15 used by RTL8366 + * RESET and phy ready * gpio0egrp cover line 16 used by VDISP * gpio0fgrp cover line 17 used by TK IRQ * gpio0ggrp cover line 20 used by panel CS - * gpio0hgrp cover line 21,22 used by RTL8366RB + * gpio0hgrp cover line 21,22 used by RTL8366RB MDIO */ gpio0_default_pins: pinctrl-gpio0 { mux { @@ -250,6 +346,32 @@ groups = "gpio1bgrp"; }; }; + pinctrl-gmii { + mux { + function = "gmii"; + groups = "gmii_gmac0_grp"; + }; + conf0 { + pins = "V8 GMAC0 RXDV", "T10 GMAC1 RXDV", + "Y7 GMAC0 RXC", "Y11 GMAC1 RXC", + "T8 GMAC0 TXEN", "W11 GMAC1 TXEN", + "U8 GMAC0 TXC", "V11 GMAC1 TXC", + "W8 GMAC0 RXD0", "V9 GMAC0 RXD1", + "Y8 GMAC0 RXD2", "U9 GMAC0 RXD3", + "T7 GMAC0 TXD0", "U6 GMAC0 TXD1", + "V7 GMAC0 TXD2", "U7 GMAC0 TXD3", + "Y12 GMAC1 RXD0", "V12 GMAC1 RXD1", + "T11 GMAC1 RXD2", "W12 GMAC1 RXD3", + "U10 GMAC1 TXD0", "Y10 GMAC1 TXD1", + "W10 GMAC1 TXD2", "T9 GMAC1 TXD3"; + skew-delay = <7>; + }; + /* Set up drive strength on GMAC0 to 16 mA */ + conf1 { + groups = "gmii_gmac0_grp"; + drive-strength = <16>; + }; + }; }; }; @@ -291,6 +413,22 @@ <0x6000 0 0 4 &pci_intc 2>; }; + ethernet@60000000 { + status = "okay"; + + ethernet-port@0 { + phy-mode = "rgmii"; + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + ethernet-port@1 { + /* Not used in this platform */ + }; + }; + ata@63000000 { status = "okay"; }; diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S index 3fecb2124c35..451a849ad518 100644 --- a/arch/arm/crypto/chacha20-neon-core.S +++ b/arch/arm/crypto/chacha20-neon-core.S @@ -51,9 +51,8 @@ ENTRY(chacha20_block_xor_neon) .Ldoubleround: // x0 += x1, x3 = rotl32(x3 ^ x0, 16) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 + veor q3, q3, q0 + vrev32.16 q3, q3 // x2 += x3, x1 = rotl32(x1 ^ x2, 12) vadd.i32 q2, q2, q3 @@ -82,9 +81,8 @@ ENTRY(chacha20_block_xor_neon) // x0 += x1, x3 = rotl32(x3 ^ x0, 16) vadd.i32 q0, q0, q1 - veor q4, q3, q0 - vshl.u32 q3, q4, #16 - vsri.u32 q3, q4, #16 + veor q3, q3, q0 + vrev32.16 q3, q3 // x2 += x3, x1 = rotl32(x1 ^ x2, 12) vadd.i32 q2, q2, q3 diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index d9bb52cae2ac..8930fc4e7c22 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -152,7 +152,7 @@ static struct shash_alg ghash_alg = { .cra_name = "__ghash", .cra_driver_name = "__driver-ghash-ce", .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL, + .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct ghash_key), .cra_module = THIS_MODULE, @@ -308,9 +308,8 @@ static struct ahash_alg ghash_async_alg = { .cra_name = "ghash", .cra_driver_name = "ghash-ce", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_type = &crypto_ahash_type, .cra_ctxsize = sizeof(struct ghash_async_ctx), .cra_module = THIS_MODULE, .cra_init = ghash_async_init_tfm, diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index 555f72b5e659..b732522e20f8 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -75,7 +75,6 @@ static struct shash_alg alg = { .cra_name = "sha1", .cra_driver_name = "sha1-ce", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 6fc73bf8766d..98ab8239f919 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -67,7 +67,6 @@ static struct shash_alg alg = { .cra_name = "sha1", .cra_driver_name= "sha1-asm", .cra_priority = 150, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 4e22f122f966..d15e0ea2c95e 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -83,7 +83,6 @@ static struct shash_alg alg = { .cra_name = "sha1", .cra_driver_name = "sha1-neon", .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index df4dcef054ae..1211a5c129fc 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -78,7 +78,6 @@ static struct shash_alg algs[] = { { .cra_name = "sha224", .cra_driver_name = "sha224-ce", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -93,7 +92,6 @@ static struct shash_alg algs[] = { { .cra_name = "sha256", .cra_driver_name = "sha256-ce", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index a84e869ef900..bf8ccff2c9d0 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -71,7 +71,6 @@ static struct shash_alg algs[] = { { .cra_name = "sha256", .cra_driver_name = "sha256-asm", .cra_priority = 150, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -86,7 +85,6 @@ static struct shash_alg algs[] = { { .cra_name = "sha224", .cra_driver_name = "sha224-asm", .cra_priority = 150, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index 39ccd658817e..9bbee56fbdc8 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -79,7 +79,6 @@ struct shash_alg sha256_neon_algs[] = { { .cra_name = "sha256", .cra_driver_name = "sha256-neon", .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -94,7 +93,6 @@ struct shash_alg sha256_neon_algs[] = { { .cra_name = "sha224", .cra_driver_name = "sha224-neon", .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 269a394e4a53..86540cd4a6fa 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -63,7 +63,6 @@ static struct shash_alg sha512_arm_algs[] = { { .cra_name = "sha384", .cra_driver_name = "sha384-arm", .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -78,7 +77,6 @@ static struct shash_alg sha512_arm_algs[] = { { .cra_name = "sha512", .cra_driver_name = "sha512-arm", .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c index 32693684a3ab..8a5642b41fd6 100644 --- a/arch/arm/crypto/sha512-neon-glue.c +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -75,7 +75,6 @@ struct shash_alg sha512_neon_algs[] = { { .cra_name = "sha384", .cra_driver_name = "sha384-neon", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -91,7 +90,6 @@ struct shash_alg sha512_neon_algs[] = { { .cra_name = "sha512", .cra_driver_name = "sha512-neon", .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 0cd4dccbae78..b17ee03d280b 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -460,6 +460,10 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) adds \tmp, \addr, #\size - 1 sbcccs \tmp, \tmp, \limit bcs \bad +#ifdef CONFIG_CPU_SPECTRE + movcs \addr, #0 + csdb +#endif #endif .endm diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 66d0e215a773..f74756641410 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -130,7 +130,7 @@ static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new) } #define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { int oldval, newval; unsigned long tmp; @@ -156,6 +156,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return oldval; } +#define atomic_fetch_add_unless atomic_fetch_add_unless #else /* ARM_ARCH_6 */ @@ -215,15 +216,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) return ret; } -static inline int __atomic_add_unless(atomic_t *v, int a, int u) -{ - int c, old; - - c = atomic_read(v); - while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c) - c = old; - return c; -} +#define atomic_fetch_andnot atomic_fetch_andnot #endif /* __LINUX_ARM_ARCH__ */ @@ -254,17 +247,6 @@ ATOMIC_OPS(xor, ^=, eor) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) -#define atomic_inc(v) atomic_add(1, v) -#define atomic_dec(v) atomic_sub(1, v) - -#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) -#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_inc_return_relaxed(v) (atomic_add_return_relaxed(1, v)) -#define atomic_dec_return_relaxed(v) (atomic_sub_return_relaxed(1, v)) -#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) - -#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) - #ifndef CONFIG_GENERIC_ATOMIC64 typedef struct { long long counter; @@ -494,12 +476,13 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) return result; } +#define atomic64_dec_if_positive atomic64_dec_if_positive -static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, + long long u) { - long long val; + long long oldval, newval; unsigned long tmp; - int ret = 1; smp_mb(); prefetchw(&v->counter); @@ -508,33 +491,23 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) "1: ldrexd %0, %H0, [%4]\n" " teq %0, %5\n" " teqeq %H0, %H5\n" -" moveq %1, #0\n" " beq 2f\n" -" adds %Q0, %Q0, %Q6\n" -" adc %R0, %R0, %R6\n" -" strexd %2, %0, %H0, [%4]\n" +" adds %Q1, %Q0, %Q6\n" +" adc %R1, %R0, %R6\n" +" strexd %2, %1, %H1, [%4]\n" " teq %2, #0\n" " bne 1b\n" "2:" - : "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter) + : "=&r" (oldval), "=&r" (newval), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "r" (u), "r" (a) : "cc"); - if (ret) + if (oldval != u) smp_mb(); - return ret; + return oldval; } - -#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) -#define atomic64_inc(v) atomic64_add(1LL, (v)) -#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1LL, (v)) -#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) -#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) -#define atomic64_dec(v) atomic64_sub(1LL, (v)) -#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1LL, (v)) -#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) +#define atomic64_fetch_add_unless atomic64_fetch_add_unless #endif /* !CONFIG_GENERIC_ATOMIC64 */ #endif diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 4cab9bb823fb..c92e42a5c8f7 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -215,7 +215,6 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); #if __LINUX_ARM_ARCH__ < 5 -#include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/fls.h> @@ -223,93 +222,20 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); #else -static inline int constant_fls(int x) -{ - int r = 32; - - if (!x) - return 0; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; -} - -/* - * On ARMv5 and above those functions can be implemented around the - * clz instruction for much better code efficiency. __clz returns - * the number of leading zeros, zero input will return 32, and - * 0x80000000 will return 0. - */ -static inline unsigned int __clz(unsigned int x) -{ - unsigned int ret; - - asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); - - return ret; -} - -/* - * fls() returns zero if the input is zero, otherwise returns the bit - * position of the last set bit, where the LSB is 1 and MSB is 32. - */ -static inline int fls(int x) -{ - if (__builtin_constant_p(x)) - return constant_fls(x); - - return 32 - __clz(x); -} - -/* - * __fls() returns the bit position of the last bit set, where the - * LSB is 0 and MSB is 31. Zero input is undefined. - */ -static inline unsigned long __fls(unsigned long x) -{ - return fls(x) - 1; -} - -/* - * ffs() returns zero if the input was zero, otherwise returns the bit - * position of the first set bit, where the LSB is 1 and MSB is 32. - */ -static inline int ffs(int x) -{ - return fls(x & -x); -} - /* - * __ffs() returns the bit position of the first bit set, where the - * LSB is 0 and MSB is 31. Zero input is undefined. + * On ARMv5 and above, the gcc built-ins may rely on the clz instruction + * and produce optimal inlined code in all cases. On ARMv7 it is even + * better by also using the rbit instruction. */ -static inline unsigned long __ffs(unsigned long x) -{ - return ffs(x) - 1; -} - -#define ffz(x) __ffs( ~(x) ) +#include <asm-generic/bitops/builtin-__fls.h> +#include <asm-generic/bitops/builtin-__ffs.h> +#include <asm-generic/bitops/builtin-fls.h> +#include <asm-generic/bitops/builtin-ffs.h> #endif +#include <asm-generic/bitops/ffz.h> + #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/sched.h> diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 17f1f1a814ff..38badaae8d9d 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -58,6 +58,9 @@ void efi_virtmap_unload(void); #define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) #define efi_is_64bit() (false) +#define efi_table_attr(table, attr, instance) \ + ((table##_t *)instance)->attr + #define efi_call_proto(protocol, f, instance, ...) \ ((protocol##_t *)instance)->f(instance, ##__VA_ARGS__) diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index e46e4e7bdba3..ac54c06764e6 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -111,14 +111,17 @@ static inline void decode_ctrl_reg(u32 reg, asm volatile("mcr p14, 0, %0, " #N "," #M ", " #OP2 : : "r" (VAL));\ } while (0) +struct perf_event_attr; struct notifier_block; struct perf_event; struct pmu; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, int *gen_len, int *gen_type); -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw); +extern int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index b6f319606e30..c883fcbe93b6 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -31,11 +31,6 @@ extern void asm_do_IRQ(unsigned int, struct pt_regs *); void handle_IRQ(unsigned int, struct pt_regs *); void init_IRQ(void); -#ifdef CONFIG_MULTI_IRQ_HANDLER -extern void (*handle_arch_irq)(struct pt_regs *); -extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); -#endif - #ifdef CONFIG_SMP extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 59655459da59..82290f212d8e 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,8 +44,6 @@ struct prev_kprobe { struct kprobe_ctlblk { unsigned int kprobe_status; struct prev_kprobe prev_kprobe; - struct pt_regs jprobe_saved_regs; - char jprobes_stack[MAX_STACK_SIZE]; }; void arch_remove_kprobe(struct kprobe *); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 6493bd479ddc..fe2fb1ddd771 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -26,13 +26,13 @@ #include <asm/cputype.h> /* arm64 compatibility macros */ -#define COMPAT_PSR_MODE_ABT ABT_MODE -#define COMPAT_PSR_MODE_UND UND_MODE -#define COMPAT_PSR_T_BIT PSR_T_BIT -#define COMPAT_PSR_I_BIT PSR_I_BIT -#define COMPAT_PSR_A_BIT PSR_A_BIT -#define COMPAT_PSR_E_BIT PSR_E_BIT -#define COMPAT_PSR_IT_MASK PSR_IT_MASK +#define PSR_AA32_MODE_ABT ABT_MODE +#define PSR_AA32_MODE_UND UND_MODE +#define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_I_BIT PSR_I_BIT +#define PSR_AA32_A_BIT PSR_A_BIT +#define PSR_AA32_E_BIT PSR_E_BIT +#define PSR_AA32_IT_MASK PSR_IT_MASK unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 5c1ad11aa392..bb8851208e17 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -59,7 +59,7 @@ struct machine_desc { void (*init_time)(void); void (*init_machine)(void); void (*init_late)(void); -#ifdef CONFIG_MULTI_IRQ_HANDLER +#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER void (*handle_irq)(struct pt_regs *); #endif void (*restart)(enum reboot_mode, const char *); diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 0f79e4dec7f9..4ac3a019a46f 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -13,7 +13,6 @@ extern void timer_tick(void); typedef void (*clock_access_fn)(struct timespec64 *); -extern int register_persistent_clock(clock_access_fn read_boot, - clock_access_fn read_persistent); +extern int register_persistent_clock(clock_access_fn read_persistent); #endif diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h index 1e5b9bb92270..991c9127c650 100644 --- a/arch/arm/include/asm/probes.h +++ b/arch/arm/include/asm/probes.h @@ -51,7 +51,6 @@ struct arch_probes_insn { * We assume one instruction can consume at most 64 bytes stack, which is * 'push {r0-r15}'. Instructions consume more or unknown stack space like * 'str r0, [sp, #-80]' and 'str r0, [sp, r1]' should be prohibit to probe. - * Both kprobe and jprobe use this macro. */ #define MAX_STACK_SIZE 64 diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index e71cc35de163..9b37b6ab27fe 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -123,8 +123,8 @@ struct user_vfp_exc; extern int vfp_preserve_user_clear_hwstate(struct user_vfp __user *, struct user_vfp_exc __user *); -extern int vfp_restore_user_hwstate(struct user_vfp __user *, - struct user_vfp_exc __user *); +extern int vfp_restore_user_hwstate(struct user_vfp *, + struct user_vfp_exc *); #endif /* diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index d5562f9ce600..f854148c8d7c 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -292,5 +292,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, { } +static inline void tlb_flush_remove_tables(struct mm_struct *mm) +{ +} + +static inline void tlb_flush_remove_tables_local(void *arg) +{ +} + #endif /* CONFIG_MMU */ #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 3d614e90c19f..5451e1f05a19 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -85,6 +85,13 @@ static inline void set_fs(mm_segment_t fs) flag; }) /* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __inttype(x) \ + __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) + +/* * Single-value transfer routines. They automatically use the right * size if we just have the right pointer type. Note that the functions * which read from user space (*get_*) need to take care not to leak @@ -153,7 +160,7 @@ extern int __get_user_64t_4(void *); ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ register typeof(*(p)) __user *__p asm("r0") = (p); \ - register typeof(x) __r2 asm("r2"); \ + register __inttype(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ unsigned int __ua_flags = uaccess_save_and_enable(); \ @@ -243,6 +250,16 @@ static inline void set_fs(mm_segment_t fs) #define user_addr_max() \ (uaccess_kernel() ? ~0UL : get_fs()) +#ifdef CONFIG_CPU_SPECTRE +/* + * When mitigating Spectre variant 1, it is not worth fixing the non- + * verifying accessors, because we need to add verification of the + * address space there. Force these to use the standard get_user() + * version instead. + */ +#define __get_user(x, ptr) get_user(x, ptr) +#else + /* * The "__xxx" versions of the user access functions do not verify the * address space - it must have been done previously with a separate @@ -259,12 +276,6 @@ static inline void set_fs(mm_segment_t fs) __gu_err; \ }) -#define __get_user_error(x, ptr, err) \ -({ \ - __get_user_err((x), (ptr), err); \ - (void) 0; \ -}) - #define __get_user_err(x, ptr, err) \ do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ @@ -324,6 +335,7 @@ do { \ #define __get_user_asm_word(x, addr, err) \ __get_user_asm(x, addr, err, ldr) +#endif #define __put_user_switch(x, ptr, __err, __fn) \ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 179a9f6bd1e3..e85a3af9ddeb 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -22,7 +22,7 @@ #include <asm/glue-df.h> #include <asm/glue-pf.h> #include <asm/vfpmacros.h> -#ifndef CONFIG_MULTI_IRQ_HANDLER +#ifndef CONFIG_GENERIC_IRQ_MULTI_HANDLER #include <mach/entry-macro.S> #endif #include <asm/thread_notify.h> @@ -39,7 +39,7 @@ * Interrupt handling. */ .macro irq_handler -#ifdef CONFIG_MULTI_IRQ_HANDLER +#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER ldr r1, =handle_arch_irq mov r0, sp badr lr, 9997f @@ -1226,9 +1226,3 @@ vector_addrexcptn: .globl cr_alignment cr_alignment: .space 4 - -#ifdef CONFIG_MULTI_IRQ_HANDLER - .globl handle_arch_irq -handle_arch_irq: - .space 4 -#endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 106a1466518d..746565a876dc 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -48,6 +48,7 @@ saved_pc .req lr * from those features make this path too inefficient. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts @@ -78,6 +79,7 @@ fast_work_pending: * call. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 @@ -255,7 +257,7 @@ local_restart: tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace - invoke_syscall tbl, scno, r10, ret_fast_syscall + invoke_syscall tbl, scno, r10, __ret_fast_syscall add r1, sp, #S_OFF 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 7a9b86978ee1..ec29de250076 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -53,7 +53,11 @@ ENTRY(stext) THUMB(1: ) #endif - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install +#endif + @ ensure svc mode and all interrupts masked + safe_svcmode_maskall r9 @ and irqs disabled #if defined(CONFIG_CPU_CP15) mrc p15, 0, r9, c0, c0 @ get processor id @@ -89,7 +93,11 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install_secondary +#endif + safe_svcmode_maskall r9 + #ifndef CONFIG_CPU_CP15 ldr r9, =CONFIG_PROCESSOR_ID #else diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 629e25152c0d..1d5fbf1d1c67 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -456,14 +456,13 @@ static int get_hbp_len(u8 hbp_len) /* * Check whether bp virtual address is in kernel space. */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) { unsigned int len; unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - va = info->address; - len = get_hbp_len(info->ctrl.len); + va = hw->address; + len = get_hbp_len(hw->ctrl.len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -518,42 +517,42 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, /* * Construct an arch_hw_breakpoint from a perf_event. */ -static int arch_build_bp_info(struct perf_event *bp) +static int arch_build_bp_info(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - /* Type */ - switch (bp->attr.bp_type) { + switch (attr->bp_type) { case HW_BREAKPOINT_X: - info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + hw->ctrl.type = ARM_BREAKPOINT_EXECUTE; break; case HW_BREAKPOINT_R: - info->ctrl.type = ARM_BREAKPOINT_LOAD; + hw->ctrl.type = ARM_BREAKPOINT_LOAD; break; case HW_BREAKPOINT_W: - info->ctrl.type = ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_STORE; break; case HW_BREAKPOINT_RW: - info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; break; default: return -EINVAL; } /* Len */ - switch (bp->attr.bp_len) { + switch (attr->bp_len) { case HW_BREAKPOINT_LEN_1: - info->ctrl.len = ARM_BREAKPOINT_LEN_1; + hw->ctrl.len = ARM_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - info->ctrl.len = ARM_BREAKPOINT_LEN_2; + hw->ctrl.len = ARM_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_4: - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; break; case HW_BREAKPOINT_LEN_8: - info->ctrl.len = ARM_BREAKPOINT_LEN_8; - if ((info->ctrl.type != ARM_BREAKPOINT_EXECUTE) + hw->ctrl.len = ARM_BREAKPOINT_LEN_8; + if ((hw->ctrl.type != ARM_BREAKPOINT_EXECUTE) && max_watchpoint_len >= 8) break; default: @@ -566,24 +565,24 @@ static int arch_build_bp_info(struct perf_event *bp) * by the hardware and must be aligned to the appropriate number of * bytes. */ - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE && - info->ctrl.len != ARM_BREAKPOINT_LEN_2 && - info->ctrl.len != ARM_BREAKPOINT_LEN_4) + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE && + hw->ctrl.len != ARM_BREAKPOINT_LEN_2 && + hw->ctrl.len != ARM_BREAKPOINT_LEN_4) return -EINVAL; /* Address */ - info->address = bp->attr.bp_addr; + hw->address = attr->bp_addr; /* Privilege */ - info->ctrl.privilege = ARM_BREAKPOINT_USER; - if (arch_check_bp_in_kernelspace(bp)) - info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; + hw->ctrl.privilege = ARM_BREAKPOINT_USER; + if (arch_check_bp_in_kernelspace(hw)) + hw->ctrl.privilege |= ARM_BREAKPOINT_PRIV; /* Enabled? */ - info->ctrl.enabled = !bp->attr.disabled; + hw->ctrl.enabled = !attr->disabled; /* Mismatch */ - info->ctrl.mismatch = 0; + hw->ctrl.mismatch = 0; return 0; } @@ -591,9 +590,10 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings. */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret = 0; u32 offset, alignment_mask = 0x3; @@ -602,14 +602,14 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) return -ENODEV; /* Build the arch_hw_breakpoint. */ - ret = arch_build_bp_info(bp); + ret = arch_build_bp_info(bp, attr, hw); if (ret) goto out; /* Check address alignment. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; switch (offset) { case 0: /* Aligned */ @@ -617,19 +617,19 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) case 1: case 2: /* Allow halfword watchpoints and breakpoints. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; case 3: /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; default: ret = -EINVAL; goto out; } - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + hw->ctrl.len <<= offset; if (is_default_overflow_handler(bp)) { /* @@ -640,7 +640,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) return -EINVAL; /* We don't allow mismatch breakpoints in kernel space. */ - if (arch_check_bp_in_kernelspace(bp)) + if (arch_check_bp_in_kernelspace(hw)) return -EPERM; /* @@ -655,8 +655,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * reports them. */ if (!debug_exception_updates_fsr() && - (info->ctrl.type == ARM_BREAKPOINT_LOAD || - info->ctrl.type == ARM_BREAKPOINT_STORE)) + (hw->ctrl.type == ARM_BREAKPOINT_LOAD || + hw->ctrl.type == ARM_BREAKPOINT_STORE)) return -EINVAL; } diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ece04a457486..9908dacf9229 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -102,16 +102,6 @@ void __init init_IRQ(void) uniphier_cache_init(); } -#ifdef CONFIG_MULTI_IRQ_HANDLER -void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) -{ - if (handle_arch_irq) - return; - - handle_arch_irq = handle_irq; -} -#endif - #ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index be42c4f66a40..1ae99deeec54 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -233,7 +233,7 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr, return ret; } -static inline u32 armv6pmu_read_counter(struct perf_event *event) +static inline u64 armv6pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -251,7 +251,7 @@ static inline u32 armv6pmu_read_counter(struct perf_event *event) return value; } -static inline void armv6pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -411,6 +411,12 @@ armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void armv6pmu_disable_event(struct perf_event *event) { unsigned long val, mask, evt, flags; @@ -491,11 +497,11 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; } static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) @@ -542,11 +548,11 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv6pmu_read_counter; cpu_pmu->write_counter = armv6pmu_write_counter; cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; cpu_pmu->start = armv6pmu_start; cpu_pmu->stop = armv6pmu_stop; cpu_pmu->map_event = armv6mpcore_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 57f01e059f39..a4fb0f8b8f84 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -743,7 +743,7 @@ static inline void armv7_pmnc_select_counter(int idx) isb(); } -static inline u32 armv7pmu_read_counter(struct perf_event *event) +static inline u64 armv7pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -763,7 +763,7 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event) return value; } -static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -1058,6 +1058,12 @@ static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, return -EAGAIN; } +static void armv7pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + /* * Add an event filter to a given event. This will only work for PMUv2 PMUs. */ @@ -1167,10 +1173,10 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv7pmu_read_counter; cpu_pmu->write_counter = armv7pmu_write_counter; cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->clear_event_idx = armv7pmu_clear_event_idx; cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; - cpu_pmu->max_period = (1LLU << 32) - 1; }; static void armv7_read_num_pmnc_events(void *info) @@ -1638,6 +1644,7 @@ static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool krait_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || krait_event) { bit = krait_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); @@ -1967,6 +1974,7 @@ static void scorpion_pmu_clear_event_idx(struct pmu_hw_events *cpuc, bool venum_event = EVENT_VENUM(hwc->config_base); bool scorpion_event = EVENT_CPU(hwc->config_base); + armv7pmu_clear_event_idx(cpuc, event); if (venum_event || scorpion_event) { bit = scorpion_event_to_bit(event, region, group); clear_bit(bit, cpuc->used_mask); @@ -2030,6 +2038,7 @@ static struct platform_driver armv7_pmu_driver = { .driver = { .name = "armv7-pmu", .of_match_table = armv7_pmu_of_device_ids, + .suppress_bind_attrs = true, }, .probe = armv7_pmu_device_probe, }; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 88d1a76f5367..f6cdcacfb96d 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -292,6 +292,12 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, } } +static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + static void xscale1pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags, val; @@ -316,7 +322,7 @@ static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 xscale1pmu_read_counter(struct perf_event *event) +static inline u64 xscale1pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -337,7 +343,7 @@ static inline u32 xscale1pmu_read_counter(struct perf_event *event) return val; } -static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val) +static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -370,11 +376,11 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale1pmu_read_counter; cpu_pmu->write_counter = xscale1pmu_write_counter; cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale1pmu_start; cpu_pmu->stop = xscale1pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 3; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } @@ -679,7 +685,7 @@ static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static inline u32 xscale2pmu_read_counter(struct perf_event *event) +static inline u64 xscale2pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -706,7 +712,7 @@ static inline u32 xscale2pmu_read_counter(struct perf_event *event) return val; } -static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) +static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val) { struct hw_perf_event *hwc = &event->hw; int counter = hwc->idx; @@ -739,11 +745,11 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = xscale2pmu_read_counter; cpu_pmu->write_counter = xscale2pmu_write_counter; cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->clear_event_idx = xscalepmu_clear_event_idx; cpu_pmu->start = xscale2pmu_start; cpu_pmu->stop = xscale2pmu_stop; cpu_pmu->map_event = xscale_map_event; cpu_pmu->num_events = 5; - cpu_pmu->max_period = (1LLU << 32) - 1; return 0; } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 35ca494c028c..4c249cb261f3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1145,7 +1145,7 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); -#ifdef CONFIG_MULTI_IRQ_HANDLER +#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER handle_arch_irq = mdesc->handle_irq; #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index dec130e7078c..b8f766cf3a90 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -150,22 +150,18 @@ static int preserve_vfp_context(struct vfp_sigframe __user *frame) static int restore_vfp_context(char __user **auxp) { - struct vfp_sigframe __user *frame = - (struct vfp_sigframe __user *)*auxp; - unsigned long magic; - unsigned long size; - int err = 0; - - __get_user_error(magic, &frame->magic, err); - __get_user_error(size, &frame->size, err); + struct vfp_sigframe frame; + int err; + err = __copy_from_user(&frame, *auxp, sizeof(frame)); if (err) - return -EFAULT; - if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return err; + + if (frame.magic != VFP_MAGIC || frame.size != VFP_STORAGE_SIZE) return -EINVAL; - *auxp += size; - return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc); + *auxp += sizeof(frame); + return vfp_restore_user_hwstate(&frame.ufp, &frame.ufp_exc); } #endif @@ -176,6 +172,7 @@ static int restore_vfp_context(char __user **auxp) static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) { + struct sigcontext context; char __user *aux; sigset_t set; int err; @@ -184,23 +181,26 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) if (err == 0) set_current_blocked(&set); - __get_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err); - __get_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err); - __get_user_error(regs->ARM_r2, &sf->uc.uc_mcontext.arm_r2, err); - __get_user_error(regs->ARM_r3, &sf->uc.uc_mcontext.arm_r3, err); - __get_user_error(regs->ARM_r4, &sf->uc.uc_mcontext.arm_r4, err); - __get_user_error(regs->ARM_r5, &sf->uc.uc_mcontext.arm_r5, err); - __get_user_error(regs->ARM_r6, &sf->uc.uc_mcontext.arm_r6, err); - __get_user_error(regs->ARM_r7, &sf->uc.uc_mcontext.arm_r7, err); - __get_user_error(regs->ARM_r8, &sf->uc.uc_mcontext.arm_r8, err); - __get_user_error(regs->ARM_r9, &sf->uc.uc_mcontext.arm_r9, err); - __get_user_error(regs->ARM_r10, &sf->uc.uc_mcontext.arm_r10, err); - __get_user_error(regs->ARM_fp, &sf->uc.uc_mcontext.arm_fp, err); - __get_user_error(regs->ARM_ip, &sf->uc.uc_mcontext.arm_ip, err); - __get_user_error(regs->ARM_sp, &sf->uc.uc_mcontext.arm_sp, err); - __get_user_error(regs->ARM_lr, &sf->uc.uc_mcontext.arm_lr, err); - __get_user_error(regs->ARM_pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->ARM_cpsr, &sf->uc.uc_mcontext.arm_cpsr, err); + err |= __copy_from_user(&context, &sf->uc.uc_mcontext, sizeof(context)); + if (err == 0) { + regs->ARM_r0 = context.arm_r0; + regs->ARM_r1 = context.arm_r1; + regs->ARM_r2 = context.arm_r2; + regs->ARM_r3 = context.arm_r3; + regs->ARM_r4 = context.arm_r4; + regs->ARM_r5 = context.arm_r5; + regs->ARM_r6 = context.arm_r6; + regs->ARM_r7 = context.arm_r7; + regs->ARM_r8 = context.arm_r8; + regs->ARM_r9 = context.arm_r9; + regs->ARM_r10 = context.arm_r10; + regs->ARM_fp = context.arm_fp; + regs->ARM_ip = context.arm_ip; + regs->ARM_sp = context.arm_sp; + regs->ARM_lr = context.arm_lr; + regs->ARM_pc = context.arm_pc; + regs->ARM_cpsr = context.arm_cpsr; + } err |= !valid_user_regs(regs); diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 1df21a61e379..f0dd4b6ebb63 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -329,9 +329,11 @@ asmlinkage long sys_oabi_semtimedop(int semid, return -ENOMEM; err = 0; for (i = 0; i < nsops; i++) { - __get_user_error(sops[i].sem_num, &tsops->sem_num, err); - __get_user_error(sops[i].sem_op, &tsops->sem_op, err); - __get_user_error(sops[i].sem_flg, &tsops->sem_flg, err); + struct oabi_sembuf osb; + err |= __copy_from_user(&osb, tsops, sizeof(osb)); + sops[i].sem_num = osb.sem_num; + sops[i].sem_op = osb.sem_op; + sops[i].sem_flg = osb.sem_flg; tsops++; } if (timeout) { diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index cf2701cb0de8..078b259ead4e 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -83,29 +83,18 @@ static void dummy_clock_access(struct timespec64 *ts) } static clock_access_fn __read_persistent_clock = dummy_clock_access; -static clock_access_fn __read_boot_clock = dummy_clock_access; void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); } -void read_boot_clock64(struct timespec64 *ts) -{ - __read_boot_clock(ts); -} - -int __init register_persistent_clock(clock_access_fn read_boot, - clock_access_fn read_persistent) +int __init register_persistent_clock(clock_access_fn read_persistent) { /* Only allow the clockaccess functions to be registered once */ - if (__read_persistent_clock == dummy_clock_access && - __read_boot_clock == dummy_clock_access) { - if (read_boot) - __read_boot_clock = read_boot; + if (__read_persistent_clock == dummy_clock_access) { if (read_persistent) __read_persistent_clock = read_persistent; - return 0; } diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 7a4b06049001..a826df3d3814 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -90,6 +90,15 @@ .text ENTRY(arm_copy_from_user) +#ifdef CONFIG_CPU_SPECTRE + get_thread_info r3 + ldr r3, [r3, #TI_ADDR_LIMIT] + adds ip, r1, r2 @ ip=addr+size + sub r3, r3, #1 @ addr_limit - 1 + cmpcc ip, r3 @ if (addr+size > addr_limit - 1) + movcs r1, #0 @ addr = NULL + csdb +#endif #include "copy_template.S" diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 4ea93c9df77b..7415f181907b 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -19,31 +19,6 @@ ifeq ($(CONFIG_PM_DEBUG),y) CFLAGS_pm.o += -DDEBUG endif -# Default sed regexp - multiline due to syntax constraints -define sed-y - "/^->/{s:->#\(.*\):/* \1 */:; \ - s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \ - s:->::; p;}" -endef - -# Use filechk to avoid rebuilds when a header changes, but the resulting file -# does not -define filechk_offsets - (set -e; \ - echo "#ifndef $2"; \ - echo "#define $2"; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was generated by Kbuild"; \ - echo " */"; \ - echo ""; \ - sed -ne $(sed-y); \ - echo ""; \ - echo "#endif" ) -endef - arch/arm/mach-at91/pm_data-offsets.s: arch/arm/mach-at91/pm_data-offsets.c $(call if_changed_dep,cc_s_c) diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index d3db306a5a70..f3384e3a675d 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -272,7 +272,7 @@ static int exynos5420_cpu_suspend(unsigned long arg) static void exynos_pm_set_wakeup_mask(void) { /* Set wake-up mask registers */ - pmu_raw_writel(exynos_get_eint_wake_mask(), S5P_EINT_WAKEUP_MASK); + pmu_raw_writel(exynos_get_eint_wake_mask(), EXYNOS_EINT_WAKEUP_MASK); pmu_raw_writel(exynos_irqwake_intmask & ~(1 << 31), S5P_WAKEUP_MASK); } diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index f4f8f23bda8c..af46d2182533 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -688,7 +688,6 @@ struct platform_nand_data balloon3_nand_pdata = { .chip_delay = 50, }, .ctrl = { - .hwcontrol = 0, .dev_ready = balloon3_nand_dev_ready, .select_chip = balloon3_nand_select_chip, .cmd_ctrl = balloon3_nand_cmd_ctl, diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index d7c9a8476d57..5a16ea74e28a 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -4,6 +4,7 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/dma-mapping.h> +#include <linux/dmaengine.h> #include <linux/spi/pxa2xx_spi.h> #include <linux/platform_data/i2c-pxa.h> @@ -59,16 +60,6 @@ static struct resource pxamci_resources[] = { .end = IRQ_MMC, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = 21, - .end = 21, - .flags = IORESOURCE_DMA, - }, - [3] = { - .start = 22, - .end = 22, - .flags = IORESOURCE_DMA, - }, }; static u64 pxamci_dmamask = 0xffffffffUL; @@ -406,16 +397,6 @@ static struct resource pxa_ir_resources[] = { .end = 0x40700023, .flags = IORESOURCE_MEM, }, - [5] = { - .start = 17, - .end = 17, - .flags = IORESOURCE_DMA, - }, - [6] = { - .start = 18, - .end = 18, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa_device_ficp = { @@ -544,18 +525,6 @@ static struct resource pxa25x_resource_ssp[] = { .end = IRQ_SSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 13, - .end = 13, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 14, - .end = 14, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa25x_device_ssp = { @@ -582,18 +551,6 @@ static struct resource pxa25x_resource_nssp[] = { .end = IRQ_NSSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 15, - .end = 15, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 16, - .end = 16, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa25x_device_nssp = { @@ -620,18 +577,6 @@ static struct resource pxa25x_resource_assp[] = { .end = IRQ_ASSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 23, - .end = 23, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 24, - .end = 24, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa25x_device_assp = { @@ -750,18 +695,6 @@ static struct resource pxa27x_resource_ssp1[] = { .end = IRQ_SSP, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 13, - .end = 13, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 14, - .end = 14, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa27x_device_ssp1 = { @@ -788,18 +721,6 @@ static struct resource pxa27x_resource_ssp2[] = { .end = IRQ_SSP2, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 15, - .end = 15, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 16, - .end = 16, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa27x_device_ssp2 = { @@ -826,18 +747,6 @@ static struct resource pxa27x_resource_ssp3[] = { .end = IRQ_SSP3, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 66, - .end = 66, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 67, - .end = 67, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa27x_device_ssp3 = { @@ -894,16 +803,6 @@ static struct resource pxa3xx_resources_mci2[] = { .end = IRQ_MMC2, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = 93, - .end = 93, - .flags = IORESOURCE_DMA, - }, - [3] = { - .start = 94, - .end = 94, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa3xx_device_mci2 = { @@ -933,16 +832,6 @@ static struct resource pxa3xx_resources_mci3[] = { .end = IRQ_MMC3, .flags = IORESOURCE_IRQ, }, - [2] = { - .start = 100, - .end = 100, - .flags = IORESOURCE_DMA, - }, - [3] = { - .start = 101, - .end = 101, - .flags = IORESOURCE_DMA, - }, }; struct platform_device pxa3xx_device_mci3 = { @@ -1020,18 +909,6 @@ static struct resource pxa3xx_resources_nand[] = { .end = IRQ_NAND, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for Data DMA */ - .start = 97, - .end = 97, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for Command DMA */ - .start = 99, - .end = 99, - .flags = IORESOURCE_DMA, - }, }; static u64 pxa3xx_nand_dma_mask = DMA_BIT_MASK(32); @@ -1065,18 +942,6 @@ static struct resource pxa3xx_resource_ssp4[] = { .end = IRQ_SSP4, .flags = IORESOURCE_IRQ, }, - [2] = { - /* DRCMR for RX */ - .start = 2, - .end = 2, - .flags = IORESOURCE_DMA, - }, - [3] = { - /* DRCMR for TX */ - .start = 3, - .end = 3, - .flags = IORESOURCE_DMA, - }, }; /* @@ -1202,11 +1067,6 @@ void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info) platform_device_add(pd); } -static struct mmp_dma_platdata pxa_dma_pdata = { - .dma_channels = 0, - .nb_requestors = 0, -}; - static struct resource pxa_dma_resource[] = { [0] = { .start = 0x40000000, @@ -1233,9 +1093,7 @@ static struct platform_device pxa2xx_pxa_dma = { .resource = pxa_dma_resource, }; -void __init pxa2xx_set_dmac_info(int nb_channels, int nb_requestors) +void __init pxa2xx_set_dmac_info(struct mmp_dma_platdata *dma_pdata) { - pxa_dma_pdata.dma_channels = nb_channels; - pxa_dma_pdata.nb_requestors = nb_requestors; - pxa_register_device(&pxa2xx_pxa_dma, &pxa_dma_pdata); + pxa_register_device(&pxa2xx_pxa_dma, dma_pdata); } diff --git a/arch/arm/mach-pxa/devices.h b/arch/arm/mach-pxa/devices.h index 11263f7c455b..498b07bc6a3e 100644 --- a/arch/arm/mach-pxa/devices.h +++ b/arch/arm/mach-pxa/devices.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define PDMA_FILTER_PARAM(_prio, _requestor) (&(struct pxad_param) { \ + .prio = PXAD_PRIO_##_prio, .drcmr = _requestor }) +struct mmp_dma_platdata; + extern struct platform_device pxa_device_pmu; extern struct platform_device pxa_device_mci; extern struct platform_device pxa3xx_device_mci2; @@ -55,7 +59,7 @@ extern struct platform_device pxa3xx_device_gpio; extern struct platform_device pxa93x_device_gpio; void __init pxa_register_device(struct platform_device *dev, void *data); -void __init pxa2xx_set_dmac_info(int nb_channels, int nb_requestors); +void __init pxa2xx_set_dmac_info(struct mmp_dma_platdata *dma_pdata); struct i2c_pxa_platform_data; extern void pxa_set_i2c_info(struct i2c_pxa_platform_data *info); diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 49022ad338e9..29be04c6cc48 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -346,7 +346,6 @@ struct platform_nand_data em_x270_nand_platdata = { .chip_delay = 20, }, .ctrl = { - .hwcontrol = 0, .dev_ready = em_x270_nand_device_ready, .select_chip = 0, .cmd_ctrl = em_x270_nand_cmd_ctl, diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index ba431fad5c47..ab8808ce7e21 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -16,6 +16,8 @@ * initialization stuff for PXA machines which can be overridden later if * need be. */ +#include <linux/dmaengine.h> +#include <linux/dma/pxa-dma.h> #include <linux/gpio.h> #include <linux/gpio-pxa.h> #include <linux/module.h> @@ -26,6 +28,7 @@ #include <linux/syscore_ops.h> #include <linux/irq.h> #include <linux/irqchip.h> +#include <linux/platform_data/mmp_dma.h> #include <asm/mach/map.h> #include <asm/suspend.h> @@ -201,6 +204,39 @@ static struct platform_device *pxa25x_devices[] __initdata = { &pxa_device_asoc_platform, }; +static const struct dma_slave_map pxa25x_slave_map[] = { + /* PXA25x, PXA27x and PXA3xx common entries */ + { "pxa2xx-ac97", "pcm_pcm_mic_mono", PDMA_FILTER_PARAM(LOWEST, 8) }, + { "pxa2xx-ac97", "pcm_pcm_aux_mono_in", PDMA_FILTER_PARAM(LOWEST, 9) }, + { "pxa2xx-ac97", "pcm_pcm_aux_mono_out", + PDMA_FILTER_PARAM(LOWEST, 10) }, + { "pxa2xx-ac97", "pcm_pcm_stereo_in", PDMA_FILTER_PARAM(LOWEST, 11) }, + { "pxa2xx-ac97", "pcm_pcm_stereo_out", PDMA_FILTER_PARAM(LOWEST, 12) }, + { "pxa-ssp-dai.1", "rx", PDMA_FILTER_PARAM(LOWEST, 13) }, + { "pxa-ssp-dai.1", "tx", PDMA_FILTER_PARAM(LOWEST, 14) }, + { "pxa-ssp-dai.2", "rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa-ssp-dai.2", "tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa2xx-ir", "rx", PDMA_FILTER_PARAM(LOWEST, 17) }, + { "pxa2xx-ir", "tx", PDMA_FILTER_PARAM(LOWEST, 18) }, + { "pxa2xx-mci.0", "rx", PDMA_FILTER_PARAM(LOWEST, 21) }, + { "pxa2xx-mci.0", "tx", PDMA_FILTER_PARAM(LOWEST, 22) }, + + /* PXA25x specific map */ + { "pxa25x-ssp.0", "rx", PDMA_FILTER_PARAM(LOWEST, 13) }, + { "pxa25x-ssp.0", "tx", PDMA_FILTER_PARAM(LOWEST, 14) }, + { "pxa25x-nssp.1", "rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa25x-nssp.1", "tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa25x-nssp.2", "rx", PDMA_FILTER_PARAM(LOWEST, 23) }, + { "pxa25x-nssp.2", "tx", PDMA_FILTER_PARAM(LOWEST, 24) }, +}; + +static struct mmp_dma_platdata pxa25x_dma_pdata = { + .dma_channels = 16, + .nb_requestors = 40, + .slave_map = pxa25x_slave_map, + .slave_map_cnt = ARRAY_SIZE(pxa25x_slave_map), +}; + static int __init pxa25x_init(void) { int ret = 0; @@ -215,7 +251,7 @@ static int __init pxa25x_init(void) register_syscore_ops(&pxa2xx_mfp_syscore_ops); if (!of_have_populated_dt()) { - pxa2xx_set_dmac_info(16, 40); + pxa2xx_set_dmac_info(&pxa25x_dma_pdata); pxa_register_device(&pxa25x_device_gpio, &pxa25x_gpio_info); ret = platform_add_devices(pxa25x_devices, ARRAY_SIZE(pxa25x_devices)); diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 0c06f383ad52..5a8990a9313d 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -11,6 +11,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/dmaengine.h> +#include <linux/dma/pxa-dma.h> #include <linux/gpio.h> #include <linux/gpio-pxa.h> #include <linux/module.h> @@ -23,6 +25,7 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/platform_data/i2c-pxa.h> +#include <linux/platform_data/mmp_dma.h> #include <asm/mach/map.h> #include <mach/hardware.h> @@ -297,6 +300,40 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_pwm1, }; +static const struct dma_slave_map pxa27x_slave_map[] = { + /* PXA25x, PXA27x and PXA3xx common entries */ + { "pxa2xx-ac97", "pcm_pcm_mic_mono", PDMA_FILTER_PARAM(LOWEST, 8) }, + { "pxa2xx-ac97", "pcm_pcm_aux_mono_in", PDMA_FILTER_PARAM(LOWEST, 9) }, + { "pxa2xx-ac97", "pcm_pcm_aux_mono_out", + PDMA_FILTER_PARAM(LOWEST, 10) }, + { "pxa2xx-ac97", "pcm_pcm_stereo_in", PDMA_FILTER_PARAM(LOWEST, 11) }, + { "pxa2xx-ac97", "pcm_pcm_stereo_out", PDMA_FILTER_PARAM(LOWEST, 12) }, + { "pxa-ssp-dai.0", "rx", PDMA_FILTER_PARAM(LOWEST, 13) }, + { "pxa-ssp-dai.0", "tx", PDMA_FILTER_PARAM(LOWEST, 14) }, + { "pxa-ssp-dai.1", "rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa-ssp-dai.1", "tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa2xx-ir", "rx", PDMA_FILTER_PARAM(LOWEST, 17) }, + { "pxa2xx-ir", "tx", PDMA_FILTER_PARAM(LOWEST, 18) }, + { "pxa2xx-mci.0", "rx", PDMA_FILTER_PARAM(LOWEST, 21) }, + { "pxa2xx-mci.0", "tx", PDMA_FILTER_PARAM(LOWEST, 22) }, + { "pxa-ssp-dai.2", "rx", PDMA_FILTER_PARAM(LOWEST, 66) }, + { "pxa-ssp-dai.2", "tx", PDMA_FILTER_PARAM(LOWEST, 67) }, + + /* PXA27x specific map */ + { "pxa2xx-i2s", "rx", PDMA_FILTER_PARAM(LOWEST, 2) }, + { "pxa2xx-i2s", "tx", PDMA_FILTER_PARAM(LOWEST, 3) }, + { "pxa27x-camera.0", "CI_Y", PDMA_FILTER_PARAM(HIGHEST, 68) }, + { "pxa27x-camera.0", "CI_U", PDMA_FILTER_PARAM(HIGHEST, 69) }, + { "pxa27x-camera.0", "CI_V", PDMA_FILTER_PARAM(HIGHEST, 70) }, +}; + +static struct mmp_dma_platdata pxa27x_dma_pdata = { + .dma_channels = 32, + .nb_requestors = 75, + .slave_map = pxa27x_slave_map, + .slave_map_cnt = ARRAY_SIZE(pxa27x_slave_map), +}; + static int __init pxa27x_init(void) { int ret = 0; @@ -313,7 +350,7 @@ static int __init pxa27x_init(void) if (!of_have_populated_dt()) { pxa_register_device(&pxa27x_device_gpio, &pxa27x_gpio_info); - pxa2xx_set_dmac_info(32, 75); + pxa2xx_set_dmac_info(&pxa27x_dma_pdata); ret = platform_add_devices(devices, ARRAY_SIZE(devices)); } diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 8c64f93b669b..df9c8970adcf 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -12,6 +12,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/dmaengine.h> +#include <linux/dma/pxa-dma.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -24,6 +26,7 @@ #include <linux/of.h> #include <linux/syscore_ops.h> #include <linux/platform_data/i2c-pxa.h> +#include <linux/platform_data/mmp_dma.h> #include <asm/mach/map.h> #include <asm/suspend.h> @@ -421,6 +424,42 @@ static struct platform_device *devices[] __initdata = { &pxa27x_device_pwm1, }; +static const struct dma_slave_map pxa3xx_slave_map[] = { + /* PXA25x, PXA27x and PXA3xx common entries */ + { "pxa2xx-ac97", "pcm_pcm_mic_mono", PDMA_FILTER_PARAM(LOWEST, 8) }, + { "pxa2xx-ac97", "pcm_pcm_aux_mono_in", PDMA_FILTER_PARAM(LOWEST, 9) }, + { "pxa2xx-ac97", "pcm_pcm_aux_mono_out", + PDMA_FILTER_PARAM(LOWEST, 10) }, + { "pxa2xx-ac97", "pcm_pcm_stereo_in", PDMA_FILTER_PARAM(LOWEST, 11) }, + { "pxa2xx-ac97", "pcm_pcm_stereo_out", PDMA_FILTER_PARAM(LOWEST, 12) }, + { "pxa-ssp-dai.0", "rx", PDMA_FILTER_PARAM(LOWEST, 13) }, + { "pxa-ssp-dai.0", "tx", PDMA_FILTER_PARAM(LOWEST, 14) }, + { "pxa-ssp-dai.1", "rx", PDMA_FILTER_PARAM(LOWEST, 15) }, + { "pxa-ssp-dai.1", "tx", PDMA_FILTER_PARAM(LOWEST, 16) }, + { "pxa2xx-ir", "rx", PDMA_FILTER_PARAM(LOWEST, 17) }, + { "pxa2xx-ir", "tx", PDMA_FILTER_PARAM(LOWEST, 18) }, + { "pxa2xx-mci.0", "rx", PDMA_FILTER_PARAM(LOWEST, 21) }, + { "pxa2xx-mci.0", "tx", PDMA_FILTER_PARAM(LOWEST, 22) }, + { "pxa-ssp-dai.2", "rx", PDMA_FILTER_PARAM(LOWEST, 66) }, + { "pxa-ssp-dai.2", "tx", PDMA_FILTER_PARAM(LOWEST, 67) }, + + /* PXA3xx specific map */ + { "pxa-ssp-dai.3", "rx", PDMA_FILTER_PARAM(LOWEST, 2) }, + { "pxa-ssp-dai.3", "tx", PDMA_FILTER_PARAM(LOWEST, 3) }, + { "pxa2xx-mci.1", "rx", PDMA_FILTER_PARAM(LOWEST, 93) }, + { "pxa2xx-mci.1", "tx", PDMA_FILTER_PARAM(LOWEST, 94) }, + { "pxa3xx-nand", "data", PDMA_FILTER_PARAM(LOWEST, 97) }, + { "pxa2xx-mci.2", "rx", PDMA_FILTER_PARAM(LOWEST, 100) }, + { "pxa2xx-mci.2", "tx", PDMA_FILTER_PARAM(LOWEST, 101) }, +}; + +static struct mmp_dma_platdata pxa3xx_dma_pdata = { + .dma_channels = 32, + .nb_requestors = 100, + .slave_map = pxa3xx_slave_map, + .slave_map_cnt = ARRAY_SIZE(pxa3xx_slave_map), +}; + static int __init pxa3xx_init(void) { int ret = 0; @@ -456,7 +495,7 @@ static int __init pxa3xx_init(void) if (of_have_populated_dt()) return 0; - pxa2xx_set_dmac_info(32, 100); + pxa2xx_set_dmac_info(&pxa3xx_dma_pdata); ret = platform_add_devices(devices, ARRAY_SIZE(devices)); if (ret) return ret; diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 8db62cc54a6a..04b2f22c2739 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -212,7 +212,7 @@ static DEFINE_MUTEX(ecard_mutex); */ static void ecard_init_pgtables(struct mm_struct *mm) { - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_EXEC); /* We want to set up the page tables for the following mapping: * Virtual Physical @@ -237,9 +237,6 @@ static void ecard_init_pgtables(struct mm_struct *mm) memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); - vma_init(&vma, mm); - vma.vm_flags = VM_EXEC; - flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 96a7b6cf459b..b169e580bf82 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -702,7 +702,6 @@ config ARM_THUMBEE config ARM_VIRT_EXT bool - depends on MMU default y if CPU_V7 help Enable the kernel to make use of the ARM Virtualization diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 5dd6c58d653b..7d67c70bbded 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -53,7 +53,8 @@ static inline bool security_extensions_enabled(void) { /* Check CPUID Identification Scheme before ID_PFR1 read */ if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) - return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4); + return cpuid_feature_extract(CPUID_EXT_PFR1, 4) || + cpuid_feature_extract(CPUID_EXT_PFR1, 20); return 0; } diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h index 8015ad434a40..24101925fe64 100644 --- a/arch/arm/mm/tcm.h +++ b/arch/arm/mm/tcm.h @@ -11,7 +11,7 @@ void __init tcm_init(void); #else /* No TCM support, just blank inlines to be optimized out */ -inline void tcm_init(void) +static inline void tcm_init(void) { } #endif diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index f6a62ae44a65..25b3ee85066e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/hwcap.h> #include <asm/opcodes.h> +#include <asm/system_info.h> #include "bpf_jit_32.h" @@ -47,32 +48,73 @@ * The callee saved registers depends on whether frame pointers are enabled. * With frame pointers (to be compliant with the ABI): * - * high - * original ARM_SP => +------------------+ \ - * | pc | | - * current ARM_FP => +------------------+ } callee saved registers - * |r4-r8,r10,fp,ip,lr| | - * +------------------+ / - * low + * high + * original ARM_SP => +--------------+ \ + * | pc | | + * current ARM_FP => +--------------+ } callee saved registers + * |r4-r9,fp,ip,lr| | + * +--------------+ / + * low * * Without frame pointers: * - * high - * original ARM_SP => +------------------+ - * | r4-r8,r10,fp,lr | callee saved registers - * current ARM_FP => +------------------+ - * low + * high + * original ARM_SP => +--------------+ + * | r4-r9,fp,lr | callee saved registers + * current ARM_FP => +--------------+ + * low * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ - 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ + 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ 1 << ARM_FP) #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) -#define STACK_OFFSET(k) (k) +enum { + /* Stack layout - these are offsets from (top of stack - 4) */ + BPF_R2_HI, + BPF_R2_LO, + BPF_R3_HI, + BPF_R3_LO, + BPF_R4_HI, + BPF_R4_LO, + BPF_R5_HI, + BPF_R5_LO, + BPF_R7_HI, + BPF_R7_LO, + BPF_R8_HI, + BPF_R8_LO, + BPF_R9_HI, + BPF_R9_LO, + BPF_FP_HI, + BPF_FP_LO, + BPF_TC_HI, + BPF_TC_LO, + BPF_AX_HI, + BPF_AX_LO, + /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, + * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, + * BPF_REG_FP and Tail call counts. + */ + BPF_JIT_SCRATCH_REGS, +}; + +/* + * Negative "register" values indicate the register is stored on the stack + * and are the offset from the top of the eBPF JIT scratch space. + */ +#define STACK_OFFSET(k) (-4 - (k) * 4) +#define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4) + +#ifdef CONFIG_FRAME_POINTER +#define EBPF_SCRATCH_TO_ARM_FP(x) ((x) - 4 * hweight16(CALLEE_PUSH_MASK) - 4) +#else +#define EBPF_SCRATCH_TO_ARM_FP(x) (x) +#endif + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ @@ -94,35 +136,35 @@ * scratch memory space and we have to build eBPF 64 bit register from those. * */ -static const u8 bpf2a32[][2] = { +static const s8 bpf2a32[][2] = { /* return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = {ARM_R1, ARM_R0}, /* arguments from eBPF program to in-kernel function */ [BPF_REG_1] = {ARM_R3, ARM_R2}, /* Stored on stack scratch space */ - [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, - [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, - [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, - [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, + [BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)}, + [BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)}, + [BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)}, + [BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, /* callee saved registers that in-kernel function will preserve */ [BPF_REG_6] = {ARM_R5, ARM_R4}, /* Stored on stack scratch space */ - [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, - [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, - [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, + [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, /* Read only Frame Pointer to access Stack */ - [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, + [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, /* Temporary Register for internal BPF JIT, can be used * for constant blindings and others. */ [TMP_REG_1] = {ARM_R7, ARM_R6}, - [TMP_REG_2] = {ARM_R10, ARM_R8}, + [TMP_REG_2] = {ARM_R9, ARM_R8}, /* Tail call count. Stored on stack scratch space. */ - [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, + [TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, /* temporary register for blinding constants. * Stored on stack scratch space. */ - [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, }; #define dst_lo dst[1] @@ -151,6 +193,7 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; + unsigned int cpu_architecture; u32 flags; u32 *offsets; u32 *target; @@ -196,9 +239,55 @@ static inline void emit(u32 inst, struct jit_ctx *ctx) } /* + * This is rather horrid, but necessary to convert an integer constant + * to an immediate operand for the opcodes, and be able to detect at + * build time whether the constant can't be converted (iow, usable in + * BUILD_BUG_ON()). + */ +#define imm12val(v, s) (rol32(v, (s)) | (s) << 7) +#define const_imm8m(x) \ + ({ int r; \ + u32 v = (x); \ + if (!(v & ~0x000000ff)) \ + r = imm12val(v, 0); \ + else if (!(v & ~0xc000003f)) \ + r = imm12val(v, 2); \ + else if (!(v & ~0xf000000f)) \ + r = imm12val(v, 4); \ + else if (!(v & ~0xfc000003)) \ + r = imm12val(v, 6); \ + else if (!(v & ~0xff000000)) \ + r = imm12val(v, 8); \ + else if (!(v & ~0x3fc00000)) \ + r = imm12val(v, 10); \ + else if (!(v & ~0x0ff00000)) \ + r = imm12val(v, 12); \ + else if (!(v & ~0x03fc0000)) \ + r = imm12val(v, 14); \ + else if (!(v & ~0x00ff0000)) \ + r = imm12val(v, 16); \ + else if (!(v & ~0x003fc000)) \ + r = imm12val(v, 18); \ + else if (!(v & ~0x000ff000)) \ + r = imm12val(v, 20); \ + else if (!(v & ~0x0003fc00)) \ + r = imm12val(v, 22); \ + else if (!(v & ~0x0000ff00)) \ + r = imm12val(v, 24); \ + else if (!(v & ~0x00003fc0)) \ + r = imm12val(v, 26); \ + else if (!(v & ~0x00000ff0)) \ + r = imm12val(v, 28); \ + else if (!(v & ~0x000003fc)) \ + r = imm12val(v, 30); \ + else \ + r = -1; \ + r; }) + +/* * Checks if immediate value can be converted to imm12(12 bits) value. */ -static int16_t imm8m(u32 x) +static int imm8m(u32 x) { u32 rot; @@ -208,6 +297,38 @@ static int16_t imm8m(u32 x) return -1; } +#define imm8m(x) (__builtin_constant_p(x) ? const_imm8m(x) : imm8m(x)) + +static u32 arm_bpf_ldst_imm12(u32 op, u8 rt, u8 rn, s16 imm12) +{ + op |= rt << 12 | rn << 16; + if (imm12 >= 0) + op |= ARM_INST_LDST__U; + else + imm12 = -imm12; + return op | (imm12 & ARM_INST_LDST__IMM12); +} + +static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) +{ + op |= rt << 12 | rn << 16; + if (imm8 >= 0) + op |= ARM_INST_LDST__U; + else + imm8 = -imm8; + return op | (imm8 & 0xf0) << 4 | (imm8 & 0x0f); +} + +#define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) +#define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) +#define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) +#define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) + +#define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) +#define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) +#define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) +#define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) + /* * Initializes the JIT space with undefined instructions. */ @@ -227,19 +348,10 @@ static void jit_fill_hole(void *area, unsigned int size) #define STACK_ALIGNMENT 4 #endif -/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, - * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, - * BPF_REG_FP and Tail call counts. - */ -#define SCRATCH_SIZE 80 - /* total stack size used in JITed code */ #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) -/* Get the offset of eBPF REGISTERs stored on scratch space. */ -#define STACK_VAR(off) (STACK_SIZE - off) - #if __LINUX_ARM_ARCH__ < 7 static u16 imm_offset(u32 k, struct jit_ctx *ctx) @@ -355,7 +467,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { @@ -402,44 +514,110 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } -/* Checks whether BPF register is on scratch stack space or not. */ -static inline bool is_on_stack(u8 bpf_reg) +/* Is the translated BPF register on stack? */ +static bool is_stacked(s8 reg) +{ + return reg < 0; +} + +/* If a BPF register is on the stack (stk is true), load it to the + * supplied temporary register and return the temporary register + * for subsequent operations, otherwise just use the CPU register. + */ +static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx) +{ + if (is_stacked(reg)) { + emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); + reg = tmp; + } + return reg; +} + +static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, + struct jit_ctx *ctx) { - static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, - BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, - BPF_REG_2, BPF_REG_FP}; - int i, reg_len = sizeof(stack_regs); - - for (i = 0 ; i < reg_len ; i++) { - if (bpf_reg == stack_regs[i]) - return true; + if (is_stacked(reg[1])) { + if (__LINUX_ARM_ARCH__ >= 6 || + ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { + emit(ARM_LDRD_I(tmp[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + } else { + emit(ARM_LDR_I(tmp[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + emit(ARM_LDR_I(tmp[0], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); + } + reg = tmp; + } + return reg; +} + +/* If a BPF register is on the stack (stk is true), save the register + * back to the stack. If the source register is not the same, then + * move it into the correct register. + */ +static void arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx) +{ + if (is_stacked(reg)) + emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); + else if (reg != src) + emit(ARM_MOV_R(reg, src), ctx); +} + +static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, + struct jit_ctx *ctx) +{ + if (is_stacked(reg[1])) { + if (__LINUX_ARM_ARCH__ >= 6 || + ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { + emit(ARM_STRD_I(src[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + } else { + emit(ARM_STR_I(src[1], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); + emit(ARM_STR_I(src[0], ARM_FP, + EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); + } + } else { + if (reg[1] != src[1]) + emit(ARM_MOV_R(reg[1], src[1]), ctx); + if (reg[0] != src[0]) + emit(ARM_MOV_R(reg[0], src[0]), ctx); } - return false; } -static inline void emit_a32_mov_i(const u8 dst, const u32 val, - bool dstk, struct jit_ctx *ctx) +static inline void emit_a32_mov_i(const s8 dst, const u32 val, + struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp = bpf2a32[TMP_REG_1]; - if (dstk) { + if (is_stacked(dst)) { emit_mov_i(tmp[1], val, ctx); - emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); + arm_bpf_put_reg32(dst, tmp[1], ctx); } else { emit_mov_i(dst, val, ctx); } } +static void emit_a32_mov_i64(const s8 dst[], u64 val, struct jit_ctx *ctx) +{ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + + emit_mov_i(rd[1], (u32)val, ctx); + emit_mov_i(rd[0], val >> 32, ctx); + + arm_bpf_put_reg64(dst, rd, ctx); +} + /* Sign extended move */ -static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], - const u32 val, bool dstk, - struct jit_ctx *ctx) { - u32 hi = 0; +static inline void emit_a32_mov_se_i64(const bool is64, const s8 dst[], + const u32 val, struct jit_ctx *ctx) { + u64 val64 = val; if (is64 && (val & (1<<31))) - hi = (u32)~0; - emit_a32_mov_i(dst_lo, val, dstk, ctx); - emit_a32_mov_i(dst_hi, hi, dstk, ctx); + val64 |= 0xffffffff00000000ULL; + emit_a32_mov_i64(dst, val64, ctx); } static inline void emit_a32_add_r(const u8 dst, const u8 src, @@ -521,75 +699,94 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, /* ALU operation (32 bit) * dst = dst (op) src */ -static inline void emit_a32_alu_r(const u8 dst, const u8 src, - bool dstk, bool sstk, +static inline void emit_a32_alu_r(const s8 dst, const s8 src, struct jit_ctx *ctx, const bool is64, const bool hi, const u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rn = sstk ? tmp[1] : src; - - if (sstk) - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rn, rd; + rn = arm_bpf_get_reg32(src, tmp[1], ctx); + rd = arm_bpf_get_reg32(dst, tmp[0], ctx); /* ALU operation */ - if (dstk) { - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); - emit_alu_r(tmp[0], rn, is64, hi, op, ctx); - emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); - } else { - emit_alu_r(dst, rn, is64, hi, op, ctx); - } + emit_alu_r(rd, rn, is64, hi, op, ctx); + arm_bpf_put_reg32(dst, rd, ctx); } /* ALU operation (64 bit) */ -static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], - const u8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx, +static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], + const s8 src[], struct jit_ctx *ctx, const u8 op) { - emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); - if (is64) - emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); - else - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; + + rd = arm_bpf_get_reg64(dst, tmp, ctx); + if (is64) { + const s8 *rs; + + rs = arm_bpf_get_reg64(src, tmp2, ctx); + + /* ALU operation */ + emit_alu_r(rd[1], rs[1], true, false, op, ctx); + emit_alu_r(rd[0], rs[0], true, true, op, ctx); + } else { + s8 rs; + + rs = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + + /* ALU operation */ + emit_alu_r(rd[1], rs, true, false, op, ctx); + emit_a32_mov_i(rd[0], 0, ctx); + } + + arm_bpf_put_reg64(dst, rd, ctx); } -/* dst = imm (4 bytes)*/ -static inline void emit_a32_mov_r(const u8 dst, const u8 src, - bool dstk, bool sstk, +/* dst = src (4 bytes)*/ +static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rt = sstk ? tmp[0] : src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rt; - if (sstk) - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); - if (dstk) - emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); - else - emit(ARM_MOV_R(dst, rt), ctx); + rt = arm_bpf_get_reg32(src, tmp[0], ctx); + arm_bpf_put_reg32(dst, rt, ctx); } /* dst = src */ -static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], - const u8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); - if (is64) { +static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], + const s8 src[], + struct jit_ctx *ctx) { + if (!is64) { + emit_a32_mov_r(dst_lo, src_lo, ctx); + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); + } else if (__LINUX_ARM_ARCH__ < 6 && + ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ - emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); + emit_a32_mov_r(dst_lo, src_lo, ctx); + emit_a32_mov_r(dst_hi, src_hi, ctx); + } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + + emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); + emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); + } else if (is_stacked(src_lo)) { + emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); + } else if (is_stacked(dst_lo)) { + emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); } else { - /* Zero out high 4 bytes */ - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit(ARM_MOV_R(dst[0], src[0]), ctx); + emit(ARM_MOV_R(dst[1], src[1]), ctx); } } /* Shift operations */ -static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, +static inline void emit_a32_alu_i(const s8 dst, const u32 val, struct jit_ctx *ctx, const u8 op) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[0] : dst; + const s8 *tmp = bpf2a32[TMP_REG_1]; + s8 rd; - if (dstk) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + rd = arm_bpf_get_reg32(dst, tmp[0], ctx); /* Do shift operation */ switch (op) { @@ -604,303 +801,245 @@ static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, break; } - if (dstk) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + arm_bpf_put_reg32(dst, rd, ctx); } /* dst = ~dst (64 bit) */ -static inline void emit_a32_neg64(const u8 dst[], bool dstk, +static inline void emit_a32_neg64(const s8 dst[], struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst[1]; - u8 rm = dstk ? tmp[0] : dst[0]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd; /* Setup Operand */ - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do Negate Operation */ - emit(ARM_RSBS_I(rd, rd, 0), ctx); - emit(ARM_RSC_I(rm, rm, 0), ctx); + emit(ARM_RSBS_I(rd[1], rd[1], 0), ctx); + emit(ARM_RSC_I(rd[0], rd[0], 0), ctx); - if (dstk) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst << src */ -static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; +static inline void emit_a32_lsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; + s8 rt; /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - - if (sstk) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); - emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); - - if (dstk) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + emit(ARM_MOV_SR(ARM_LR, rd[0], SRTYPE_ASL, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[1], SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd[1], SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_ASL, rt), ctx); + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst >> src (signed)*/ -static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; +static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; + s8 rt; + /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - - if (sstk) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); _emit(ARM_COND_MI, ARM_B(0), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); - if (dstk) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst >> src */ -static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; +static inline void emit_a32_rsh_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; + s8 rt; + /* Setup Operands */ - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - - if (sstk) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rt = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do RSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); - emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); - if (dstk) { - emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_LR), ctx); - emit(ARM_MOV_R(rm, ARM_IP), ctx); - } + emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_LSR, rt), ctx); + + arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); + arm_bpf_put_reg32(dst_hi, ARM_IP, ctx); } /* dst = dst << val */ -static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, - const u32 val, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; +static inline void emit_a32_lsh_i64(const s8 dst[], + const u32 val, struct jit_ctx *ctx){ + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSH operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); - emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); - emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); + emit(ARM_MOV_SI(tmp2[0], rd[0], SRTYPE_ASL, val), ctx); + emit(ARM_ORR_SI(rd[0], tmp2[0], rd[1], SRTYPE_LSR, 32 - val), ctx); + emit(ARM_MOV_SI(rd[1], rd[1], SRTYPE_ASL, val), ctx); } else { if (val == 32) - emit(ARM_MOV_R(rm, rd), ctx); + emit(ARM_MOV_R(rd[0], rd[1]), ctx); else - emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_MOV_SI(rd[0], rd[1], SRTYPE_ASL, val - 32), ctx); + emit(ARM_EOR_R(rd[1], rd[1], rd[1]), ctx); } - if (dstk) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst >> val */ -static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_rsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + /* Setup operands */ + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); - emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); + emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); } else if (val == 32) { - emit(ARM_MOV_R(rd, rm), ctx); - emit(ARM_MOV_I(rm, 0), ctx); + emit(ARM_MOV_R(rd[1], rd[0]), ctx); + emit(ARM_MOV_I(rd[0], 0), ctx); } else { - emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); - emit(ARM_MOV_I(rm, 0), ctx); + emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_LSR, val - 32), ctx); + emit(ARM_MOV_I(rd[0], 0), ctx); } - if (dstk) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg64(dst, rd, ctx); } /* dst = dst >> val (signed) */ -static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, +static inline void emit_a32_arsh_i64(const s8 dst[], const u32 val, struct jit_ctx *ctx){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; - /* Setup operands */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd; + + /* Setup operands */ + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ if (val < 32) { - emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); - emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx); + emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); } else if (val == 32) { - emit(ARM_MOV_R(rd, rm), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); + emit(ARM_MOV_R(rd[1], rd[0]), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); } else { - emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx); - emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx); + emit(ARM_MOV_SI(rd[1], rd[0], SRTYPE_ASR, val - 32), ctx); + emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, 31), ctx); } - if (dstk) { - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg64(dst, rd, ctx); } -static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, - bool sstk, struct jit_ctx *ctx) { - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; +static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], + struct jit_ctx *ctx) { + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rd, *rt; + /* Setup operands for multiplication */ - u8 rd = dstk ? tmp[1] : dst_lo; - u8 rm = dstk ? tmp[0] : dst_hi; - u8 rt = sstk ? tmp2[1] : src_lo; - u8 rn = sstk ? tmp2[0] : src_hi; - - if (dstk) { - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } - if (sstk) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rd = arm_bpf_get_reg64(dst, tmp, ctx); + rt = arm_bpf_get_reg64(src, tmp2, ctx); /* Do Multiplication */ - emit(ARM_MUL(ARM_IP, rd, rn), ctx); - emit(ARM_MUL(ARM_LR, rm, rt), ctx); + emit(ARM_MUL(ARM_IP, rd[1], rt[0]), ctx); + emit(ARM_MUL(ARM_LR, rd[0], rt[1]), ctx); emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); - emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); - emit(ARM_ADD_R(rm, ARM_LR, rm), ctx); - if (dstk) { - emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); - } else { - emit(ARM_MOV_R(rd, ARM_IP), ctx); - } + emit(ARM_UMULL(ARM_IP, rd[0], rd[1], rt[1]), ctx); + emit(ARM_ADD_R(rd[0], ARM_LR, rd[0]), ctx); + + arm_bpf_put_reg32(dst_lo, ARM_IP, ctx); + arm_bpf_put_reg32(dst_hi, rd[0], ctx); } /* *(size *)(dst + off) = src */ -static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, - const s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst; - - if (dstk) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); - if (off) { - emit_a32_mov_i(tmp[0], off, false, ctx); - emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx); +static inline void emit_str_r(const s8 dst, const s8 src[], + s32 off, struct jit_ctx *ctx, const u8 sz){ + const s8 *tmp = bpf2a32[TMP_REG_1]; + s32 off_max; + s8 rd; + + rd = arm_bpf_get_reg32(dst, tmp[1], ctx); + + if (sz == BPF_H) + off_max = 0xff; + else + off_max = 0xfff; + + if (off < 0 || off > off_max) { + emit_a32_mov_i(tmp[0], off, ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; + off = 0; } switch (sz) { - case BPF_W: - /* Store a Word */ - emit(ARM_STR_I(src, rd, 0), ctx); + case BPF_B: + /* Store a Byte */ + emit(ARM_STRB_I(src_lo, rd, off), ctx); break; case BPF_H: /* Store a HalfWord */ - emit(ARM_STRH_I(src, rd, 0), ctx); + emit(ARM_STRH_I(src_lo, rd, off), ctx); break; - case BPF_B: - /* Store a Byte */ - emit(ARM_STRB_I(src, rd, 0), ctx); + case BPF_W: + /* Store a Word */ + emit(ARM_STR_I(src_lo, rd, off), ctx); + break; + case BPF_DW: + /* Store a Double Word */ + emit(ARM_STR_I(src_lo, rd, off), ctx); + emit(ARM_STR_I(src_hi, rd, off + 4), ctx); break; } } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, +static inline void emit_ldx_r(const s8 dst[], const s8 src, s32 off, struct jit_ctx *ctx, const u8 sz){ - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *rd = dstk ? tmp : dst; - u8 rm = src; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *rd = is_stacked(dst_lo) ? tmp : dst; + s8 rm = src; s32 off_max; if (sz == BPF_H) @@ -909,7 +1048,7 @@ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, off_max = 0xfff; if (off < 0 || off > off_max) { - emit_a32_mov_i(tmp[0], off, false, ctx); + emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; off = 0; @@ -921,17 +1060,17 @@ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(dst[0], 0, dstk, ctx); + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -939,10 +1078,7 @@ static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } - if (dstk) - emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); - if (dstk && sz == BPF_DW) - emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); + arm_bpf_put_reg64(dst, rd, ctx); } /* Arithmatic Operation */ @@ -981,64 +1117,66 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) { /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ - const u8 *r2 = bpf2a32[BPF_REG_2]; - const u8 *r3 = bpf2a32[BPF_REG_3]; - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; - const u8 *tcc = bpf2a32[TCALL_CNT]; + const s8 *r2 = bpf2a32[BPF_REG_2]; + const s8 *r3 = bpf2a32[BPF_REG_3]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tcc = bpf2a32[TCALL_CNT]; + const s8 *tc; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) #define jmp_offset (out_offset - (cur_offset) - 2) - u32 off, lo, hi; + u32 lo, hi; + s8 r_array, r_index; + int off; /* if (index >= array->map.max_entries) * goto out; */ + BUILD_BUG_ON(offsetof(struct bpf_array, map.max_entries) > + ARM_INST_LDST__IMM12); off = offsetof(struct bpf_array, map.max_entries); - /* array->map.max_entries */ - emit_a32_mov_i(tmp[1], off, false, ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); - emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); + r_array = arm_bpf_get_reg32(r2[1], tmp2[0], ctx); /* index is 32-bit for arrays */ - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); + r_index = arm_bpf_get_reg32(r3[1], tmp2[1], ctx); + /* array->map.max_entries */ + emit(ARM_LDR_I(tmp[1], r_array, off), ctx); /* index >= array->map.max_entries */ - emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); + emit(ARM_CMP_R(r_index, tmp[1]), ctx); _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); + /* tmp2[0] = array, tmp2[1] = index */ + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; */ lo = (u32)MAX_TAIL_CALL_CNT; hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32); - emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); - emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); - emit(ARM_CMP_I(tmp[0], hi), ctx); - _emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx); + tc = arm_bpf_get_reg64(tcc, tmp, ctx); + emit(ARM_CMP_I(tc[0], hi), ctx); + _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); - emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx); - emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx); - emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx); - emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx); + emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); + emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); + arm_bpf_put_reg64(tcc, tmp, ctx); /* prog = array->ptrs[index] * if (prog == NULL) * goto out; */ - off = offsetof(struct bpf_array, ptrs); - emit_a32_mov_i(tmp[1], off, false, ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); - emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx); - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); - emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx); - emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx); + BUILD_BUG_ON(imm8m(offsetof(struct bpf_array, ptrs)) < 0); + off = imm8m(offsetof(struct bpf_array, ptrs)); + emit(ARM_ADD_I(tmp[1], r_array, off), ctx); + emit(ARM_LDR_R_SI(tmp[1], tmp[1], r_index, SRTYPE_ASL, 2), ctx); emit(ARM_CMP_I(tmp[1], 0), ctx); _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); /* goto *(prog->bpf_func + prologue_size); */ + BUILD_BUG_ON(offsetof(struct bpf_prog, bpf_func) > + ARM_INST_LDST__IMM12); off = offsetof(struct bpf_prog, bpf_func); - emit_a32_mov_i(tmp2[1], off, false, ctx); - emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); + emit(ARM_LDR_I(tmp[1], tmp[1], off), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); emit_bx_r(tmp[1], ctx); @@ -1059,7 +1197,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 6 - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx); @@ -1074,7 +1212,7 @@ static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx) static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 6 - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx); emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx); @@ -1094,28 +1232,27 @@ static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx) } // push the scratch stack register on top of the stack -static inline void emit_push_r64(const u8 src[], const u8 shift, - struct jit_ctx *ctx) +static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) { - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *rt; u16 reg_set = 0; - emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx); - emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx); + rt = arm_bpf_get_reg64(src, tmp2, ctx); - reg_set = (1 << tmp2[1]) | (1 << tmp2[0]); + reg_set = (1 << rt[1]) | (1 << rt[0]); emit(ARM_PUSH(reg_set), ctx); } static void build_prologue(struct jit_ctx *ctx) { - const u8 r0 = bpf2a32[BPF_REG_0][1]; - const u8 r2 = bpf2a32[BPF_REG_1][1]; - const u8 r3 = bpf2a32[BPF_REG_1][0]; - const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 fplo = bpf2a32[BPF_REG_FP][1]; - const u8 fphi = bpf2a32[BPF_REG_FP][0]; - const u8 *tcc = bpf2a32[TCALL_CNT]; + const s8 r0 = bpf2a32[BPF_REG_0][1]; + const s8 r2 = bpf2a32[BPF_REG_1][1]; + const s8 r3 = bpf2a32[BPF_REG_1][0]; + const s8 r4 = bpf2a32[BPF_REG_6][1]; + const s8 fplo = bpf2a32[BPF_REG_FP][1]; + const s8 fphi = bpf2a32[BPF_REG_FP][0]; + const s8 *tcc = bpf2a32[TCALL_CNT]; /* Save callee saved registers. */ #ifdef CONFIG_FRAME_POINTER @@ -1136,8 +1273,8 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); /* Set up BPF prog stack base register */ - emit_a32_mov_r(fplo, ARM_IP, true, false, ctx); - emit_a32_mov_i(fphi, 0, true, ctx); + emit_a32_mov_r(fplo, ARM_IP, ctx); + emit_a32_mov_i(fphi, 0, ctx); /* mov r4, 0 */ emit(ARM_MOV_I(r4, 0), ctx); @@ -1146,8 +1283,8 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_R(r3, r4), ctx); emit(ARM_MOV_R(r2, r0), ctx); /* Initialize Tail Count */ - emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx); - emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx); + emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); + emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); /* end of prologue */ } @@ -1178,17 +1315,16 @@ static void build_epilogue(struct jit_ctx *ctx) static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) { const u8 code = insn->code; - const u8 *dst = bpf2a32[insn->dst_reg]; - const u8 *src = bpf2a32[insn->src_reg]; - const u8 *tmp = bpf2a32[TMP_REG_1]; - const u8 *tmp2 = bpf2a32[TMP_REG_2]; + const s8 *dst = bpf2a32[insn->dst_reg]; + const s8 *src = bpf2a32[insn->src_reg]; + const s8 *tmp = bpf2a32[TMP_REG_1]; + const s8 *tmp2 = bpf2a32[TMP_REG_2]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; const bool is64 = BPF_CLASS(code) == BPF_ALU64; - const bool dstk = is_on_stack(insn->dst_reg); - const bool sstk = is_on_stack(insn->src_reg); - u8 rd, rt, rm, rn; + const s8 *rd, *rs; + s8 rd_lo, rt, rm, rn; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -1211,11 +1347,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx); + emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: /* Sign-extend immediate value to destination reg */ - emit_a32_mov_i64(is64, dst, imm, dstk, ctx); + emit_a32_mov_se_i64(is64, dst, imm, ctx); break; } break; @@ -1255,8 +1391,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_XOR | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_alu_r64(is64, dst, src, dstk, sstk, - ctx, BPF_OP(code)); + emit_a32_alu_r64(is64, dst, src, ctx, BPF_OP(code)); break; case BPF_K: /* Move immediate value to the temporary register @@ -1265,9 +1400,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * value into temporary reg and then it would be * safe to do the operation on it. */ - emit_a32_mov_i64(is64, tmp2, imm, false, ctx); - emit_a32_alu_r64(is64, dst, tmp2, dstk, false, - ctx, BPF_OP(code)); + emit_a32_mov_se_i64(is64, tmp2, imm, ctx); + emit_a32_alu_r64(is64, dst, tmp2, ctx, BPF_OP(code)); break; } break; @@ -1277,26 +1411,22 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_MOD | BPF_X: - rt = src_lo; - rd = dstk ? tmp2[1] : dst_lo; - if (dstk) - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + rd_lo = arm_bpf_get_reg32(dst_lo, tmp2[1], ctx); switch (BPF_SRC(code)) { case BPF_X: - rt = sstk ? tmp2[0] : rt; - if (sstk) - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), - ctx); + rt = arm_bpf_get_reg32(src_lo, tmp2[0], ctx); break; case BPF_K: rt = tmp2[0]; - emit_a32_mov_i(rt, imm, false, ctx); + emit_a32_mov_i(rt, imm, ctx); + break; + default: + rt = src_lo; break; } - emit_udivmod(rd, rd, rt, ctx, BPF_OP(code)); - if (dstk) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); + arm_bpf_put_reg32(dst_lo, rd_lo, ctx); + emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1310,54 +1440,54 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) if (unlikely(imm > 31)) return -EINVAL; if (imm) - emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_lsh_i64(dst, dstk, imm, ctx); + emit_a32_lsh_i64(dst, imm, ctx); break; /* dst = dst >> imm */ case BPF_ALU64 | BPF_RSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_rsh_i64(dst, dstk, imm, ctx); + emit_a32_rsh_i64(dst, imm, ctx); break; /* dst = dst << src */ case BPF_ALU64 | BPF_LSH | BPF_X: - emit_a32_lsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_lsh_r64(dst, src, ctx); break; /* dst = dst >> src */ case BPF_ALU64 | BPF_RSH | BPF_X: - emit_a32_rsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_rsh_r64(dst, src, ctx); break; /* dst = dst >> src (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_X: - emit_a32_arsh_r64(dst, src, dstk, sstk, ctx); + emit_a32_arsh_r64(dst, src, ctx); break; /* dst = dst >> imm (signed) */ case BPF_ALU64 | BPF_ARSH | BPF_K: if (unlikely(imm > 63)) return -EINVAL; - emit_a32_arsh_i64(dst, dstk, imm, ctx); + emit_a32_arsh_i64(dst, imm, ctx); break; /* dst = ~dst */ case BPF_ALU | BPF_NEG: - emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); + emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: - emit_a32_neg64(dst, dstk, ctx); + emit_a32_neg64(dst, ctx); break; /* dst = dst * src/imm */ case BPF_ALU64 | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: switch (BPF_SRC(code)) { case BPF_X: - emit_a32_mul_r64(dst, src, dstk, sstk, ctx); + emit_a32_mul_r64(dst, src, ctx); break; case BPF_K: /* Move immediate value to the temporary register @@ -1366,8 +1496,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) * reg then it would be safe to do the operation * on it. */ - emit_a32_mov_i64(is64, tmp2, imm, false, ctx); - emit_a32_mul_r64(dst, tmp2, dstk, false, ctx); + emit_a32_mov_se_i64(is64, tmp2, imm, ctx); + emit_a32_mul_r64(dst, tmp2, ctx); break; } break; @@ -1375,25 +1505,20 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: - rd = dstk ? tmp[0] : dst_hi; - rt = dstk ? tmp[1] : dst_lo; - if (dstk) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rd = arm_bpf_get_reg64(dst, tmp, ctx); if (BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; switch (imm) { case 16: - emit_rev16(rt, rt, ctx); + emit_rev16(rd[1], rd[1], ctx); goto emit_bswap_uxt; case 32: - emit_rev32(rt, rt, ctx); + emit_rev32(rd[1], rd[1], ctx); goto emit_bswap_uxt; case 64: - emit_rev32(ARM_LR, rt, ctx); - emit_rev32(rt, rd, ctx); - emit(ARM_MOV_R(rd, ARM_LR), ctx); + emit_rev32(ARM_LR, rd[1], ctx); + emit_rev32(rd[1], rd[0], ctx); + emit(ARM_MOV_R(rd[0], ARM_LR), ctx); break; } goto exit; @@ -1402,36 +1527,30 @@ emit_bswap_uxt: case 16: /* zero-extend 16 bits into 64 bits */ #if __LINUX_ARM_ARCH__ < 6 - emit_a32_mov_i(tmp2[1], 0xffff, false, ctx); - emit(ARM_AND_R(rt, rt, tmp2[1]), ctx); + emit_a32_mov_i(tmp2[1], 0xffff, ctx); + emit(ARM_AND_R(rd[1], rd[1], tmp2[1]), ctx); #else /* ARMv6+ */ - emit(ARM_UXTH(rt, rt), ctx); + emit(ARM_UXTH(rd[1], rd[1]), ctx); #endif - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 32: /* zero-extend 32 bits into 64 bits */ - emit(ARM_EOR_R(rd, rd, rd), ctx); + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 64: /* nop */ break; } exit: - if (dstk) { - emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + arm_bpf_put_reg64(dst, rd, ctx); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: { - const struct bpf_insn insn1 = insn[1]; - u32 hi, lo = imm; + u64 val = (u32)imm | (u64)insn[1].imm << 32; - hi = insn1.imm; - emit_a32_mov_i(dst_lo, lo, dstk, ctx); - emit_a32_mov_i(dst_hi, hi, dstk, ctx); + emit_a32_mov_i64(dst, val, ctx); return 1; } @@ -1440,10 +1559,8 @@ exit: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_DW: - rn = sstk ? tmp2[1] : src_lo; - if (sstk) - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); + emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: @@ -1453,18 +1570,15 @@ exit: switch (BPF_SIZE(code)) { case BPF_DW: /* Sign-extend immediate value into temp reg */ - emit_a32_mov_i64(true, tmp2, imm, false, ctx); - emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W); - emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W); + emit_a32_mov_se_i64(true, tmp2, imm, ctx); break; case BPF_W: case BPF_H: case BPF_B: - emit_a32_mov_i(tmp2[1], imm, false, ctx); - emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, - BPF_SIZE(code)); + emit_a32_mov_i(tmp2[1], imm, ctx); break; } + emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); break; /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: @@ -1476,25 +1590,9 @@ exit: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - { - u8 sz = BPF_SIZE(code); - - rn = sstk ? tmp2[1] : src_lo; - rm = sstk ? tmp2[0] : src_hi; - if (sstk) { - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); - } - - /* Store the value */ - if (BPF_SIZE(code) == BPF_DW) { - emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W); - emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W); - } else { - emit_str_r(dst_lo, rn, dstk, off, ctx, sz); - } + rs = arm_bpf_get_reg64(src, tmp2, ctx); + emit_str_r(dst_lo, rs, off, ctx, BPF_SIZE(code)); break; - } /* PC += off if dst == src */ /* PC += off if dst > src */ /* PC += off if dst >= src */ @@ -1518,12 +1616,8 @@ exit: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: /* Setup source registers */ - rm = sstk ? tmp2[0] : src_hi; - rn = sstk ? tmp2[1] : src_lo; - if (sstk) { - emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx); - } + rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); + rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); goto go_jmp; /* PC += off if dst == imm */ /* PC += off if dst > imm */ @@ -1552,18 +1646,13 @@ exit: rm = tmp2[0]; rn = tmp2[1]; /* Sign-extend immediate value */ - emit_a32_mov_i64(true, tmp2, imm, false, ctx); + emit_a32_mov_se_i64(true, tmp2, imm, ctx); go_jmp: /* Setup destination register */ - rd = dstk ? tmp[0] : dst_hi; - rt = dstk ? tmp[1] : dst_lo; - if (dstk) { - emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx); - emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx); - } + rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Check for the condition */ - emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code)); + emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code)); /* Setup JUMP instruction */ jmp_offset = bpf2a32_offset(i+off, i, ctx); @@ -1619,21 +1708,21 @@ go_jmp: /* function call */ case BPF_JMP | BPF_CALL: { - const u8 *r0 = bpf2a32[BPF_REG_0]; - const u8 *r1 = bpf2a32[BPF_REG_1]; - const u8 *r2 = bpf2a32[BPF_REG_2]; - const u8 *r3 = bpf2a32[BPF_REG_3]; - const u8 *r4 = bpf2a32[BPF_REG_4]; - const u8 *r5 = bpf2a32[BPF_REG_5]; + const s8 *r0 = bpf2a32[BPF_REG_0]; + const s8 *r1 = bpf2a32[BPF_REG_1]; + const s8 *r2 = bpf2a32[BPF_REG_2]; + const s8 *r3 = bpf2a32[BPF_REG_3]; + const s8 *r4 = bpf2a32[BPF_REG_4]; + const s8 *r5 = bpf2a32[BPF_REG_5]; const u32 func = (u32)__bpf_call_base + (u32)imm; - emit_a32_mov_r64(true, r0, r1, false, false, ctx); - emit_a32_mov_r64(true, r1, r2, false, true, ctx); - emit_push_r64(r5, 0, ctx); - emit_push_r64(r4, 8, ctx); - emit_push_r64(r3, 16, ctx); + emit_a32_mov_r64(true, r0, r1, ctx); + emit_a32_mov_r64(true, r1, r2, ctx); + emit_push_r64(r5, ctx); + emit_push_r64(r4, ctx); + emit_push_r64(r3, ctx); - emit_a32_mov_i(tmp[1], func, false, ctx); + emit_a32_mov_i(tmp[1], func, ctx); emit_blx_r(tmp[1], ctx); emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean @@ -1745,6 +1834,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; + ctx.cpu_architecture = cpu_architecture(); /* Not able to allocate memory for offsets[] , then * we must fall back to the interpreter diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index d5cf5f6208aa..f4e58bcdaa43 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -77,11 +77,14 @@ #define ARM_INST_EOR_R 0x00200000 #define ARM_INST_EOR_I 0x02200000 -#define ARM_INST_LDRB_I 0x05d00000 +#define ARM_INST_LDST__U 0x00800000 +#define ARM_INST_LDST__IMM12 0x00000fff +#define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 -#define ARM_INST_LDRH_I 0x01d000b0 +#define ARM_INST_LDRD_I 0x014000d0 +#define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 -#define ARM_INST_LDR_I 0x05900000 +#define ARM_INST_LDR_I 0x05100000 #define ARM_INST_LDR_R 0x07900000 #define ARM_INST_LDM 0x08900000 @@ -124,9 +127,10 @@ #define ARM_INST_SBC_R 0x00c00000 #define ARM_INST_SBCS_R 0x00d00000 -#define ARM_INST_STR_I 0x05800000 -#define ARM_INST_STRB_I 0x05c00000 -#define ARM_INST_STRH_I 0x01c000b0 +#define ARM_INST_STR_I 0x05000000 +#define ARM_INST_STRB_I 0x05400000 +#define ARM_INST_STRD_I 0x014000f0 +#define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 #define ARM_INST_TST_I 0x03100000 @@ -183,17 +187,18 @@ #define ARM_EOR_R(rd, rn, rm) _AL3_R(ARM_INST_EOR, rd, rn, rm) #define ARM_EOR_I(rd, rn, imm) _AL3_I(ARM_INST_EOR, rd, rn, imm) -#define ARM_LDR_I(rt, rn, off) (ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \ - | ((off) & 0xfff)) -#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDR_R(rt, rn, rm) (ARM_INST_LDR_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) -#define ARM_LDRB_I(rt, rn, off) (ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \ - | (off)) -#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDR_R_SI(rt, rn, rm, type, imm) \ + (ARM_INST_LDR_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ + | (imm) << 7 | (type) << 5 | (rm)) +#define ARM_LDRB_R(rt, rn, rm) (ARM_INST_LDRB_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) -#define ARM_LDRH_I(rt, rn, off) (ARM_INST_LDRH_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) -#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | (rt) << 12 | (rn) << 16 \ +#define ARM_LDRH_R(rt, rn, rm) (ARM_INST_LDRH_R | ARM_INST_LDST__U \ + | (rt) << 12 | (rn) << 16 \ | (rm)) #define ARM_LDM(rn, regs) (ARM_INST_LDM | (rn) << 16 | (regs)) @@ -254,13 +259,6 @@ #define ARM_SUBS_I(rd, rn, imm) _AL3_I(ARM_INST_SUBS, rd, rn, imm) #define ARM_SBC_I(rd, rn, imm) _AL3_I(ARM_INST_SBC, rd, rn, imm) -#define ARM_STR_I(rt, rn, off) (ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \ - | ((off) & 0xfff)) -#define ARM_STRH_I(rt, rn, off) (ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) -#define ARM_STRB_I(rt, rn, off) (ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \ - | (((off) & 0xf0) << 4) | ((off) & 0xf)) - #define ARM_TST_R(rn, rm) _AL3_R(ARM_INST_TST, 0, rn, rm) #define ARM_TST_I(rn, imm) _AL3_I(ARM_INST_TST, 0, rn, imm) diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 2438b96004c1..fcc5bfec8bd1 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -110,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) } sched_clock_register(omap_32k_read_sched_clock, 32, 32768); - register_persistent_clock(NULL, omap_read_persistent_clock64); + register_persistent_clock(omap_read_persistent_clock64); pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); return 0; diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index ba13f793fbce..ed36dcab80f1 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -127,53 +127,6 @@ static int pxa_ssp_probe(struct platform_device *pdev) if (IS_ERR(ssp->clk)) return PTR_ERR(ssp->clk); - if (dev->of_node) { - struct of_phandle_args dma_spec; - struct device_node *np = dev->of_node; - int ret; - - /* - * FIXME: we should allocate the DMA channel from this - * context and pass the channel down to the ssp users. - * For now, we lookup the rx and tx indices manually - */ - - /* rx */ - ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", - 0, &dma_spec); - - if (ret) { - dev_err(dev, "Can't parse dmas property\n"); - return -ENODEV; - } - ssp->drcmr_rx = dma_spec.args[0]; - of_node_put(dma_spec.np); - - /* tx */ - ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells", - 1, &dma_spec); - if (ret) { - dev_err(dev, "Can't parse dmas property\n"); - return -ENODEV; - } - ssp->drcmr_tx = dma_spec.args[0]; - of_node_put(dma_spec.np); - } else { - res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (res == NULL) { - dev_err(dev, "no SSP RX DRCMR defined\n"); - return -ENODEV; - } - ssp->drcmr_rx = res->start; - - res = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (res == NULL) { - dev_err(dev, "no SSP TX DRCMR defined\n"); - return -ENODEV; - } - ssp->drcmr_tx = res->start; - } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(dev, "no memory resource defined\n"); diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index e90cc8a08186..f8bd523d64d1 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -47,9 +47,6 @@ (unsigned long)(addr) + \ (size)) -/* Used as a marker in ARM_pc to note when we're in a jprobe. */ -#define JPROBE_MAGIC_ADDR 0xffffffff - DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -289,8 +286,8 @@ void __kprobes kprobe_handler(struct pt_regs *regs) break; case KPROBE_REENTER: /* A nested probe was hit in FIQ, it is a BUG */ - pr_warn("Unrecoverable kprobe detected at %p.\n", - p->addr); + pr_warn("Unrecoverable kprobe detected.\n"); + dump_kprobe(p); /* fall through */ default: /* impossible cases */ @@ -303,10 +300,10 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* * If we have no pre-handler or it returned 0, we - * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry, - * so get out doing nothing more here. + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it will + * modify the execution path and no need to single + * stepping. Let's just reset current kprobe and exit. */ if (!p->pre_handler || !p->pre_handler(p, regs)) { kcb->kprobe_status = KPROBE_HIT_SS; @@ -315,20 +312,9 @@ void __kprobes kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); } - reset_current_kprobe(); - } - } - } else if (cur) { - /* We probably hit a jprobe. Call its break handler. */ - if (cur->break_handler && cur->break_handler(cur, regs)) { - kcb->kprobe_status = KPROBE_HIT_SS; - singlestep(cur, regs, kcb); - if (cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); } + reset_current_kprobe(); } - reset_current_kprobe(); } else { /* * The probe was removed and a race is in progress. @@ -521,117 +507,6 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->ARM_lr = (unsigned long)&kretprobe_trampoline; } -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - long sp_addr = regs->ARM_sp; - long cpsr; - - kcb->jprobe_saved_regs = *regs; - memcpy(kcb->jprobes_stack, (void *)sp_addr, MIN_STACK_SIZE(sp_addr)); - regs->ARM_pc = (long)jp->entry; - - cpsr = regs->ARM_cpsr | PSR_I_BIT; -#ifdef CONFIG_THUMB2_KERNEL - /* Set correct Thumb state in cpsr */ - if (regs->ARM_pc & 1) - cpsr |= PSR_T_BIT; - else - cpsr &= ~PSR_T_BIT; -#endif - regs->ARM_cpsr = cpsr; - - preempt_disable(); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - __asm__ __volatile__ ( - /* - * Setup an empty pt_regs. Fill SP and PC fields as - * they're needed by longjmp_break_handler. - * - * We allocate some slack between the original SP and start of - * our fabricated regs. To be precise we want to have worst case - * covered which is STMFD with all 16 regs so we allocate 2 * - * sizeof(struct_pt_regs)). - * - * This is to prevent any simulated instruction from writing - * over the regs when they are accessing the stack. - */ -#ifdef CONFIG_THUMB2_KERNEL - "sub r0, %0, %1 \n\t" - "mov sp, r0 \n\t" -#else - "sub sp, %0, %1 \n\t" -#endif - "ldr r0, ="__stringify(JPROBE_MAGIC_ADDR)"\n\t" - "str %0, [sp, %2] \n\t" - "str r0, [sp, %3] \n\t" - "mov r0, sp \n\t" - "bl kprobe_handler \n\t" - - /* - * Return to the context saved by setjmp_pre_handler - * and restored by longjmp_break_handler. - */ -#ifdef CONFIG_THUMB2_KERNEL - "ldr lr, [sp, %2] \n\t" /* lr = saved sp */ - "ldrd r0, r1, [sp, %5] \n\t" /* r0,r1 = saved lr,pc */ - "ldr r2, [sp, %4] \n\t" /* r2 = saved psr */ - "stmdb lr!, {r0, r1, r2} \n\t" /* push saved lr and */ - /* rfe context */ - "ldmia sp, {r0 - r12} \n\t" - "mov sp, lr \n\t" - "ldr lr, [sp], #4 \n\t" - "rfeia sp! \n\t" -#else - "ldr r0, [sp, %4] \n\t" - "msr cpsr_cxsf, r0 \n\t" - "ldmia sp, {r0 - pc} \n\t" -#endif - : - : "r" (kcb->jprobe_saved_regs.ARM_sp), - "I" (sizeof(struct pt_regs) * 2), - "J" (offsetof(struct pt_regs, ARM_sp)), - "J" (offsetof(struct pt_regs, ARM_pc)), - "J" (offsetof(struct pt_regs, ARM_cpsr)), - "J" (offsetof(struct pt_regs, ARM_lr)) - : "memory", "cc"); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - long stack_addr = kcb->jprobe_saved_regs.ARM_sp; - long orig_sp = regs->ARM_sp; - struct jprobe *jp = container_of(p, struct jprobe, kp); - - if (regs->ARM_pc == JPROBE_MAGIC_ADDR) { - if (orig_sp != stack_addr) { - struct pt_regs *saved_regs = - (struct pt_regs *)kcb->jprobe_saved_regs.ARM_sp; - printk("current sp %lx does not match saved sp %lx\n", - orig_sp, stack_addr); - printk("Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); - printk("Current registers\n"); - show_regs(regs); - BUG(); - } - *regs = kcb->jprobe_saved_regs; - memcpy((void *)stack_addr, kcb->jprobes_stack, - MIN_STACK_SIZE(stack_addr)); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - int __kprobes arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 14db14152909..cc237fa9b90f 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -1461,7 +1461,6 @@ fail: print_registers(&result_regs); if (mem) { - pr_err("current_stack=%p\n", current_stack); pr_err("expected_memory:\n"); print_memory(expected_memory, mem_size); pr_err("result_memory:\n"); diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index a81404c09d5d..94516c40ebd3 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -8,8 +8,5 @@ # asflags-y := -DDEBUG KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp -mfloat-abi=soft) -LDFLAGS +=--no-warn-mismatch -obj-y += vfp.o - -vfp-$(CONFIG_VFP) += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o +obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 35d0f823e823..dc7e6b50ef67 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -596,13 +596,11 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp __user *ufp, } /* Sanitise and restore the current VFP state from the provided structures. */ -int vfp_restore_user_hwstate(struct user_vfp __user *ufp, - struct user_vfp_exc __user *ufp_exc) +int vfp_restore_user_hwstate(struct user_vfp *ufp, struct user_vfp_exc *ufp_exc) { struct thread_info *thread = current_thread_info(); struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; unsigned long fpexc; - int err = 0; /* Disable VFP to avoid corrupting the new thread state. */ vfp_flush_hwstate(thread); @@ -611,17 +609,16 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, * Copy the floating point registers. There can be unused * registers see asm/hwcap.h for details. */ - err |= __copy_from_user(&hwstate->fpregs, &ufp->fpregs, - sizeof(hwstate->fpregs)); + memcpy(&hwstate->fpregs, &ufp->fpregs, sizeof(hwstate->fpregs)); /* * Copy the status and control register. */ - __get_user_error(hwstate->fpscr, &ufp->fpscr, err); + hwstate->fpscr = ufp->fpscr; /* * Sanitise and restore the exception registers. */ - __get_user_error(fpexc, &ufp_exc->fpexc, err); + fpexc = ufp_exc->fpexc; /* Ensure the VFP is enabled. */ fpexc |= FPEXC_EN; @@ -630,10 +627,10 @@ int vfp_restore_user_hwstate(struct user_vfp __user *ufp, fpexc &= ~(FPEXC_EX | FPEXC_FP2V); hwstate->fpexc = fpexc; - __get_user_error(hwstate->fpinst, &ufp_exc->fpinst, err); - __get_user_error(hwstate->fpinst2, &ufp_exc->fpinst2, err); + hwstate->fpinst = ufp_exc->fpinst; + hwstate->fpinst2 = ufp_exc->fpinst2; - return err ? -EFAULT : 0; + return 0; } /* |