diff options
Diffstat (limited to 'arch/arm')
259 files changed, 7303 insertions, 1809 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8a50efb559f3..ba75e3661a41 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,7 +7,6 @@ config ARM select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED - select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE @@ -117,7 +116,6 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC - select REFCOUNT_FULL select RTC_LIB select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones @@ -1020,7 +1018,7 @@ config ARM_ERRATA_775420 depends on CPU_V7 help This option enables the workaround for the 775420 Cortex-A9 (r2p2, - r2p6,r2p8,r2p10,r3p0) erratum. In case a date cache maintenance + r2p6,r2p8,r2p10,r3p0) erratum. In case a data cache maintenance operation aborts with MMU exception, it might cause the processor to deadlock. This workaround puts DSB before executing ISB if an abort may occur on cache maintenance. @@ -1359,7 +1357,7 @@ config ARCH_NR_GPIO int default 2048 if ARCH_SOCFPGA default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \ - ARCH_ZYNQ + ARCH_ZYNQ || ARCH_ASPEED default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \ SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210 default 416 if ARCH_SUNXI diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S index 5c476bd2b4ce..b562da2f7040 100644 --- a/arch/arm/boot/bootp/init.S +++ b/arch/arm/boot/bootp/init.S @@ -13,7 +13,7 @@ * size immediately following the kernel, we could build this into * a binary blob, and concatenate the zImage using the cat command. */ - .section .start,#alloc,#execinstr + .section .start, "ax" .type _start, #function .globl _start diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 9219389bbe61..a1e883c5e5c4 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -121,7 +121,7 @@ ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ +KBSS_SZ = $(shell echo $$(($$($(NM) $(obj)/../../../../vmlinux | \ sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \ -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) ) LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ) @@ -165,7 +165,7 @@ $(obj)/bswapsdi2.S: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.S # The .data section is already discarded by the linker script so no need # to bother about it here. check_for_bad_syms = \ -bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ +bad_syms=$$($(NM) $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ [ -z "$$bad_syms" ] || \ ( echo "following symbols must have non local/private scope:" >&2; \ echo "$$bad_syms" >&2; false ) diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 330cd3c2eae5..64c49747f8a3 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -19,7 +19,7 @@ static int node_offset(void *fdt, const char *node_path) } static int setprop(void *fdt, const char *node_path, const char *property, - uint32_t *val_array, int size) + void *val_array, int size) { int offset = node_offset(fdt, node_path); if (offset < 0) @@ -60,7 +60,7 @@ static uint32_t get_cell_size(const void *fdt) { int len; uint32_t cell_size = 1; - const uint32_t *size_len = getprop(fdt, "/", "#size-cells", &len); + const __be32 *size_len = getprop(fdt, "/", "#size-cells", &len); if (size_len) cell_size = fdt32_to_cpu(*size_len); @@ -129,7 +129,7 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) struct tag *atag = atag_list; /* In the case of 64 bits memory size, need to reserve 2 cells for * address and size for each bank */ - uint32_t mem_reg_property[2 * 2 * NR_BANKS]; + __be32 mem_reg_property[2 * 2 * NR_BANKS]; int memcount = 0; int ret, memsize; @@ -138,7 +138,7 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) return 1; /* if we get a DTB here we're done already */ - if (*(u32 *)atag_list == fdt32_to_cpu(FDT_MAGIC)) + if (*(__be32 *)atag_list == cpu_to_fdt32(FDT_MAGIC)) return 0; /* validate the ATAG */ @@ -177,8 +177,8 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) /* if memsize is 2, that means that * each data needs 2 cells of 32 bits, * so the data are 64 bits */ - uint64_t *mem_reg_prop64 = - (uint64_t *)mem_reg_property; + __be64 *mem_reg_prop64 = + (__be64 *)mem_reg_property; mem_reg_prop64[memcount++] = cpu_to_fdt64(atag->u.mem.start); mem_reg_prop64[memcount++] = diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S index 88e2a88d324b..0e092c36da2f 100644 --- a/arch/arm/boot/compressed/big-endian.S +++ b/arch/arm/boot/compressed/big-endian.S @@ -6,7 +6,7 @@ * Author: Nicolas Pitre */ - .section ".start", #alloc, #execinstr + .section ".start", "ax" mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #(1 << 7) @ enable big endian mode diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 93dffed0ac6e..ead21e5f2b80 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -140,7 +140,7 @@ #endif .endm - .section ".start", #alloc, #execinstr + .section ".start", "ax" /* * sort out different calling conventions */ @@ -1273,7 +1273,7 @@ iflush: __armv5tej_mmu_cache_flush: tst r4, #1 movne pc, lr -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache bne 1b mcr p15, 0, r0, c7, c5, 0 @ flush I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index b36c0289a308..6a0f1f524466 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -2,11 +2,13 @@ #ifndef _ARM_LIBFDT_ENV_H #define _ARM_LIBFDT_ENV_H +#include <linux/limits.h> #include <linux/types.h> #include <linux/string.h> #include <asm/byteorder.h> -#define INT_MAX ((int)(~0U>>1)) +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX typedef __be16 fdt16_t; typedef __be32 fdt32_t; diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S index 0284f84dcf38..27577644ee72 100644 --- a/arch/arm/boot/compressed/piggy.S +++ b/arch/arm/boot/compressed/piggy.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .piggydata,#alloc + .section .piggydata, "a" .globl input_data input_data: .incbin "arch/arm/boot/compressed/piggy_data" diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index baadf7e2f8f4..125379ecab2f 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -16,6 +16,37 @@ can = &hecc; }; + cpus { + cpu: cpu@0 { + /* Based on OMAP3630 variants OPP50 and OPP100 */ + operating-points-v2 = <&cpu0_opp_table>; + + clock-latency = <300000>; /* From legacy driver */ + }; + }; + + cpu0_opp_table: opp-table { + compatible = "operating-points-v2-ti-cpu"; + syscon = <&scm_conf>; + /* + * AM3517 TRM only lists 600MHz @ 1.2V, but omap36xx + * appear to operate at 300MHz as well. Since AM3517 only + * lists one operating voltage, it will remain fixed at 1.2V + */ + opp50-300000000 { + opp-hz = /bits/ 64 <300000000>; + opp-microvolt = <1200000>; + opp-supported-hw = <0xffffffff 0xffffffff>; + opp-suspend; + }; + + opp100-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <1200000>; + opp-supported-hw = <0xffffffff 0xffffffff>; + }; + }; + ocp@68000000 { am35x_otg_hs: am35x_otg_hs@5c040000 { compatible = "ti,omap3-musb"; diff --git a/arch/arm/boot/dts/am3517_mt_ventoux.dts b/arch/arm/boot/dts/am3517_mt_ventoux.dts index e507e4ae0d88..e7d7124a34ba 100644 --- a/arch/arm/boot/dts/am3517_mt_ventoux.dts +++ b/arch/arm/boot/dts/am3517_mt_ventoux.dts @@ -8,7 +8,7 @@ / { model = "TeeJet Mt.Ventoux"; - compatible = "teejet,mt_ventoux", "ti,omap3"; + compatible = "teejet,mt_ventoux", "ti,am3517", "ti,omap3"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/am3874-iceboard.dts b/arch/arm/boot/dts/am3874-iceboard.dts index 883fb85135d4..1b4b2b0500e4 100644 --- a/arch/arm/boot/dts/am3874-iceboard.dts +++ b/arch/arm/boot/dts/am3874-iceboard.dts @@ -111,13 +111,13 @@ reg = <0x70>; #address-cells = <1>; #size-cells = <0>; + i2c-mux-idle-disconnect; i2c@0 { /* FMC A */ #address-cells = <1>; #size-cells = <0>; reg = <0>; - i2c-mux-idle-disconnect; }; i2c@1 { @@ -125,7 +125,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <1>; - i2c-mux-idle-disconnect; }; i2c@2 { @@ -133,7 +132,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <2>; - i2c-mux-idle-disconnect; }; i2c@3 { @@ -141,7 +139,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <3>; - i2c-mux-idle-disconnect; }; i2c@4 { @@ -149,14 +146,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <4>; - i2c-mux-idle-disconnect; }; i2c@5 { #address-cells = <1>; #size-cells = <0>; reg = <5>; - i2c-mux-idle-disconnect; ina230@40 { compatible = "ti,ina230"; reg = <0x40>; shunt-resistor = <5000>; }; ina230@41 { compatible = "ti,ina230"; reg = <0x41>; shunt-resistor = <5000>; }; @@ -182,14 +177,12 @@ #address-cells = <1>; #size-cells = <0>; reg = <6>; - i2c-mux-idle-disconnect; }; i2c@7 { #address-cells = <1>; #size-cells = <0>; reg = <7>; - i2c-mux-idle-disconnect; u41: pca9575@20 { compatible = "nxp,pca9575"; diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 0aaacea1d887..820ce3b60bb6 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -186,3 +186,30 @@ pinctrl-1 = <&mmc2_pins_hs>; pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>; }; + +&mac_sw { + pinctrl-names = "default", "sleep"; + status = "okay"; +}; + +&cpsw_port1 { + phy-handle = <ðphy0_sw>; + phy-mode = "rgmii"; + ti,dual-emac-pvid = <1>; +}; + +&cpsw_port2 { + phy-handle = <ðphy1_sw>; + phy-mode = "rgmii"; + ti,dual-emac-pvid = <2>; +}; + +&davinci_mdio_sw { + ethphy0_sw: ethernet-phy@0 { + reg = <0>; + }; + + ethphy1_sw: ethernet-phy@1 { + reg = <1>; + }; +}; diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts index ea1c119feaa5..c3d966904d64 100644 --- a/arch/arm/boot/dts/am572x-idk.dts +++ b/arch/arm/boot/dts/am572x-idk.dts @@ -27,3 +27,8 @@ pinctrl-1 = <&mmc2_pins_hs>; pinctrl-2 = <&mmc2_pins_ddr_rev20>; }; + +&mac { + status = "okay"; + dual_emac; +}; diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts index 7935d70874ce..fa0088025b2c 100644 --- a/arch/arm/boot/dts/am574x-idk.dts +++ b/arch/arm/boot/dts/am574x-idk.dts @@ -35,3 +35,8 @@ pinctrl-1 = <&mmc2_pins_default>; pinctrl-2 = <&mmc2_pins_default>; }; + +&mac { + status = "okay"; + dual_emac; +}; diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index 423855a2a2d6..398721c7201c 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -363,11 +363,6 @@ ext-clk-src; }; -&mac { - status = "okay"; - dual_emac; -}; - &cpsw_emac0 { phy-handle = <ðphy0>; phy-mode = "rgmii"; diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index a8ce59a3c88d..a259c63fff06 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -401,6 +401,7 @@ interrupts = <8>; clocks = <&syscon ASPEED_CLK_APB>; no-loopback-test; + aspeed,sirq-polarity-sense = <&syscon 0x70 25>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 09a088f98566..b75af21069f9 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -113,6 +113,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; + bus-width = <4>; mmc-pwrseq = <&wifi_pwrseq>; non-removable; status = "okay"; diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi index 7c3cb7ece6cb..925cb37c22f0 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi @@ -9,6 +9,14 @@ reg = <0 0x40000000>; }; + leds { + /* + * Since there is no upstream GPIO driver yet, + * remove the incomplete node. + */ + /delete-node/ act; + }; + reg_3v3: fixed-regulator { compatible = "regulator-fixed"; regulator-name = "3V3"; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 53962fdfea21..bbf1c6db50e0 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3076,6 +3076,58 @@ phys = <&phy_gmii_sel 2>; }; }; + + mac_sw: switch@0 { + compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch"; + reg = <0x0 0x4000>; + ranges = <0 0 0x4000>; + clocks = <&gmac_main_clk>; + clock-names = "fck"; + #address-cells = <1>; + #size-cells = <1>; + syscon = <&scm_conf>; + status = "disabled"; + + interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "rx_thresh", "rx", "tx", "misc"; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + cpsw_port1: port@1 { + reg = <1>; + label = "port1"; + mac-address = [ 00 00 00 00 00 00 ]; + phys = <&phy_gmii_sel 1>; + }; + + cpsw_port2: port@2 { + reg = <2>; + label = "port2"; + mac-address = [ 00 00 00 00 00 00 ]; + phys = <&phy_gmii_sel 2>; + }; + }; + + davinci_mdio_sw: mdio@1000 { + compatible = "ti,cpsw-mdio","ti,davinci_mdio"; + clocks = <&gmac_main_clk>; + clock-names = "fck"; + #address-cells = <1>; + #size-cells = <0>; + bus_freq = <1000000>; + reg = <0x1000 0x100>; + }; + + cpts { + clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>; + clock-names = "cpts"; + }; + }; }; }; }; diff --git a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi index 2a6ce87071f9..9e027b9a5f91 100644 --- a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi +++ b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi @@ -328,6 +328,10 @@ pinctrl-0 = <&pinctrl_pwm3>; }; +&snvs_pwrkey { + status = "okay"; +}; + &ssi2 { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi index f3404dd10537..cf628465cd0a 100644 --- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi @@ -230,6 +230,8 @@ accelerometer@1c { compatible = "fsl,mma8451"; reg = <0x1c>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_mma8451_int>; interrupt-parent = <&gpio6>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; }; @@ -628,6 +630,12 @@ >; }; + pinctrl_mma8451_int: mma8451intgrp { + fsl,pins = < + MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0xb0b1 + >; + }; + pinctrl_pwm3: pwm1grp { fsl,pins = < MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1 diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts index f7a841a28865..2a0a98fe67f0 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts @@ -9,5 +9,5 @@ / { model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit"; - compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3"; + compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3"; }; diff --git a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts index 7675bc3fa868..57bae2aa910e 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts @@ -9,5 +9,5 @@ / { model = "LogicPD Zoom OMAP35xx Torpedo Development Kit"; - compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3"; + compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3430", "ti,omap3"; }; diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 4c11deb0bc38..3a5228562b0d 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -197,3 +197,7 @@ &twl_gpio { ti,use-leds; }; + +&twl_keypad { + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index d1eae47b83f6..08bae935605c 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -43,11 +43,13 @@ compatible = "motorola,mapphone-cpcap-charger"; interrupts-extended = < &cpcap 13 0 &cpcap 12 0 &cpcap 29 0 &cpcap 28 0 - &cpcap 22 0 &cpcap 20 0 &cpcap 19 0 &cpcap 54 0 + &cpcap 22 0 &cpcap 21 0 &cpcap 20 0 &cpcap 19 0 + &cpcap 54 0 >; interrupt-names = "chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn", - "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb"; + "rvrs_mode", "chrgcurr2", "chrgcurr1", "vbusvld", + "battdetb"; mode-gpios = <&gpio3 29 GPIO_ACTIVE_LOW &gpio3 23 GPIO_ACTIVE_LOW>; io-channels = <&cpcap_adc 0 &cpcap_adc 1 diff --git a/arch/arm/boot/dts/mt7629-rfb.dts b/arch/arm/boot/dts/mt7629-rfb.dts index 3621b7d2b22a..9980c10c6e29 100644 --- a/arch/arm/boot/dts/mt7629-rfb.dts +++ b/arch/arm/boot/dts/mt7629-rfb.dts @@ -66,9 +66,21 @@ pinctrl-1 = <&ephy_leds_pins>; status = "okay"; + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "2500base-x"; + fixed-link { + speed = <2500>; + full-duplex; + pause; + }; + }; + gmac1: mac@1 { compatible = "mediatek,eth-mac"; reg = <1>; + phy-mode = "gmii"; phy-handle = <&phy0>; }; @@ -78,7 +90,6 @@ phy0: ethernet-phy@0 { reg = <0>; - phy-mode = "gmii"; }; }; }; diff --git a/arch/arm/boot/dts/mt7629.dtsi b/arch/arm/boot/dts/mt7629.dtsi index 9608bc2ccb3f..867b88103b9d 100644 --- a/arch/arm/boot/dts/mt7629.dtsi +++ b/arch/arm/boot/dts/mt7629.dtsi @@ -468,14 +468,12 @@ compatible = "mediatek,mt7629-sgmiisys", "syscon"; reg = <0x1b128000 0x3000>; #clock-cells = <1>; - mediatek,physpeed = "2500"; }; sgmiisys1: syscon@1b130000 { compatible = "mediatek,mt7629-sgmiisys", "syscon"; reg = <0x1b130000 0x3000>; #clock-cells = <1>; - mediatek,physpeed = "2500"; }; }; }; diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index 1aa99fc1487a..125ed933ca75 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -8,7 +8,7 @@ / { model = "TI OMAP3 BeagleBoard xM"; - compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3"; + compatible = "ti,omap3-beagle-xm", "ti,omap3630", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index e3df3c166902..4ed3f93f5841 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -8,7 +8,7 @@ / { model = "TI OMAP3 BeagleBoard"; - compatible = "ti,omap3-beagle", "ti,omap3"; + compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3-cm-t3530.dts b/arch/arm/boot/dts/omap3-cm-t3530.dts index 76e52c78cbb4..32dbaeaed147 100644 --- a/arch/arm/boot/dts/omap3-cm-t3530.dts +++ b/arch/arm/boot/dts/omap3-cm-t3530.dts @@ -9,7 +9,7 @@ / { model = "CompuLab CM-T3530"; - compatible = "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3"; + compatible = "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3"; /* Regulator to trigger the reset signal of the Wifi module */ mmc2_sdio_reset: regulator-mmc2-sdio-reset { diff --git a/arch/arm/boot/dts/omap3-cm-t3730.dts b/arch/arm/boot/dts/omap3-cm-t3730.dts index 6e944dfa0f3d..683819bf0915 100644 --- a/arch/arm/boot/dts/omap3-cm-t3730.dts +++ b/arch/arm/boot/dts/omap3-cm-t3730.dts @@ -9,7 +9,7 @@ / { model = "CompuLab CM-T3730"; - compatible = "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3"; + compatible = "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3"; wl12xx_vmmc2: wl12xx_vmmc2 { compatible = "regulator-fixed"; diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts index a80fc60bc773..afed85078ad8 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts +++ b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts @@ -11,7 +11,7 @@ #include "omap3-devkit8000-lcd-common.dtsi" / { model = "TimLL OMAP3 Devkit8000 with 4.3'' LCD panel"; - compatible = "timll,omap3-devkit8000", "ti,omap3"; + compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3"; lcd0: display { panel-timing { diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts index 0753776071f8..07c51a105c0d 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts +++ b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts @@ -11,7 +11,7 @@ #include "omap3-devkit8000-lcd-common.dtsi" / { model = "TimLL OMAP3 Devkit8000 with 7.0'' LCD panel"; - compatible = "timll,omap3-devkit8000", "ti,omap3"; + compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3"; lcd0: display { panel-timing { diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index faafc48d8f61..162d0726b008 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -7,7 +7,7 @@ #include "omap3-devkit8000-common.dtsi" / { model = "TimLL OMAP3 Devkit8000"; - compatible = "timll,omap3-devkit8000", "ti,omap3"; + compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3"; aliases { display1 = &dvi0; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index b6ef1a7ac8a4..409a758c99f1 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -11,7 +11,7 @@ / { model = "OMAP3 GTA04"; - compatible = "ti,omap3-gta04", "ti,omap36xx", "ti,omap3"; + compatible = "ti,omap3-gta04", "ti,omap3630", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3-ha-lcd.dts b/arch/arm/boot/dts/omap3-ha-lcd.dts index badb9b3c8897..c9ecbc45c8e2 100644 --- a/arch/arm/boot/dts/omap3-ha-lcd.dts +++ b/arch/arm/boot/dts/omap3-ha-lcd.dts @@ -8,7 +8,7 @@ / { model = "TI OMAP3 HEAD acoustics LCD-baseboard with TAO3530 SOM"; - compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3"; + compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3"; }; &omap3_pmx_core { diff --git a/arch/arm/boot/dts/omap3-ha.dts b/arch/arm/boot/dts/omap3-ha.dts index a5365252bfbe..35c4e15abeb7 100644 --- a/arch/arm/boot/dts/omap3-ha.dts +++ b/arch/arm/boot/dts/omap3-ha.dts @@ -8,7 +8,7 @@ / { model = "TI OMAP3 HEAD acoustics baseboard with TAO3530 SOM"; - compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3"; + compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3"; }; &omap3_pmx_core { diff --git a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts index 001decc20b3d..567232584f08 100644 --- a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts +++ b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts @@ -10,7 +10,7 @@ / { model = "IGEPv2 Rev. F (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0020-rev-f", "ti,omap36xx", "ti,omap3"; + compatible = "isee,omap3-igep0020-rev-f", "ti,omap3630", "ti,omap36xx", "ti,omap3"; /* Regulator to trigger the WL_EN signal of the Wifi module */ lbep5clwmc_wlen: regulator-lbep5clwmc-wlen { diff --git a/arch/arm/boot/dts/omap3-igep0020.dts b/arch/arm/boot/dts/omap3-igep0020.dts index 6d0519e3dfd0..e341535a7162 100644 --- a/arch/arm/boot/dts/omap3-igep0020.dts +++ b/arch/arm/boot/dts/omap3-igep0020.dts @@ -10,7 +10,7 @@ / { model = "IGEPv2 Rev. C (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0020", "ti,omap36xx", "ti,omap3"; + compatible = "isee,omap3-igep0020", "ti,omap3630", "ti,omap36xx", "ti,omap3"; vmmcsdio_fixed: fixedregulator-mmcsdio { compatible = "regulator-fixed"; diff --git a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts index 9a8975799e16..df6ba1219830 100644 --- a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts +++ b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts @@ -10,7 +10,7 @@ / { model = "IGEP COM MODULE Rev. G (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0030-rev-g", "ti,omap36xx", "ti,omap3"; + compatible = "isee,omap3-igep0030-rev-g", "ti,omap3630", "ti,omap36xx", "ti,omap3"; /* Regulator to trigger the WL_EN signal of the Wifi module */ lbep5clwmc_wlen: regulator-lbep5clwmc-wlen { diff --git a/arch/arm/boot/dts/omap3-igep0030.dts b/arch/arm/boot/dts/omap3-igep0030.dts index 25170bd3c573..32f31035daa2 100644 --- a/arch/arm/boot/dts/omap3-igep0030.dts +++ b/arch/arm/boot/dts/omap3-igep0030.dts @@ -10,7 +10,7 @@ / { model = "IGEP COM MODULE Rev. E (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0030", "ti,omap36xx", "ti,omap3"; + compatible = "isee,omap3-igep0030", "ti,omap3630", "ti,omap36xx", "ti,omap3"; vmmcsdio_fixed: fixedregulator-mmcsdio { compatible = "regulator-fixed"; diff --git a/arch/arm/boot/dts/omap3-ldp.dts b/arch/arm/boot/dts/omap3-ldp.dts index 9a5fde2d9bce..ec9ba04ef43b 100644 --- a/arch/arm/boot/dts/omap3-ldp.dts +++ b/arch/arm/boot/dts/omap3-ldp.dts @@ -10,7 +10,7 @@ / { model = "TI OMAP3430 LDP (Zoom1 Labrador)"; - compatible = "ti,omap3-ldp", "ti,omap3"; + compatible = "ti,omap3-ldp", "ti,omap3430", "ti,omap3"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi index c22833d4e568..73d477898ec2 100644 --- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi +++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi @@ -7,7 +7,7 @@ / { model = "INCOstartec LILLY-A83X module (DM3730)"; - compatible = "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3"; + compatible = "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3"; chosen { bootargs = "console=ttyO0,115200n8 vt.global_cursor_default=0 consoleblank=0"; diff --git a/arch/arm/boot/dts/omap3-lilly-dbb056.dts b/arch/arm/boot/dts/omap3-lilly-dbb056.dts index fec335400074..ecb4ef738e07 100644 --- a/arch/arm/boot/dts/omap3-lilly-dbb056.dts +++ b/arch/arm/boot/dts/omap3-lilly-dbb056.dts @@ -8,7 +8,7 @@ / { model = "INCOstartec LILLY-DBB056 (DM3730)"; - compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3"; + compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &twl { diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts index 74c0ff2350d3..2495a696cec6 100644 --- a/arch/arm/boot/dts/omap3-n9.dts +++ b/arch/arm/boot/dts/omap3-n9.dts @@ -12,7 +12,7 @@ / { model = "Nokia N9"; - compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3"; + compatible = "nokia,omap3-n9", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &i2c2 { diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index e1286510fdf8..a638e059135b 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -155,6 +155,12 @@ pwms = <&pwm9 0 26316 0>; /* 38000 Hz */ }; + rom_rng: rng { + compatible = "nokia,n900-rom-rng"; + clocks = <&rng_ick>; + clock-names = "ick"; + }; + /* controlled (enabled/disabled) directly by bcm2048 and wl1251 */ vctcxo: vctcxo { compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 6681d4519e97..a075b63f3087 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -11,13 +11,6 @@ cpus { cpu@0 { cpu0-supply = <&vcc>; - operating-points = < - /* kHz uV */ - 300000 1012500 - 600000 1200000 - 800000 1325000 - 1000000 1375000 - >; }; }; diff --git a/arch/arm/boot/dts/omap3-n950.dts b/arch/arm/boot/dts/omap3-n950.dts index 9886bf8b90ab..31d47a1fad84 100644 --- a/arch/arm/boot/dts/omap3-n950.dts +++ b/arch/arm/boot/dts/omap3-n950.dts @@ -12,7 +12,7 @@ / { model = "Nokia N950"; - compatible = "nokia,omap3-n950", "ti,omap36xx", "ti,omap3"; + compatible = "nokia,omap3-n950", "ti,omap3630", "ti,omap36xx", "ti,omap3"; keys { compatible = "gpio-keys"; diff --git a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts index 18338576c41d..7f04dfad8203 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts @@ -14,5 +14,5 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Alto35"; - compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; diff --git a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts index f204c8af8281..bc5a04e03336 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts @@ -14,7 +14,7 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Chestnut43"; - compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts index c633f7cee68e..065c31cbf0e2 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts @@ -14,7 +14,7 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Gallop43"; - compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts index fb88ebc9858c..e38c1c51392c 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts @@ -14,7 +14,7 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo35"; - compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts index 76cca00d97b6..e6dc23159c4d 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts @@ -14,7 +14,7 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo43"; - compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/omap3-overo-storm-summit.dts b/arch/arm/boot/dts/omap3-overo-storm-summit.dts index cc081a9e4c1e..587c08ce282d 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-summit.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-summit.dts @@ -14,7 +14,7 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Summit"; - compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts index 1de41c0826e0..f57de6010994 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts @@ -14,6 +14,6 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Tobi"; - compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts index 9ed13118ed8e..281af6c113be 100644 --- a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts +++ b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts @@ -14,5 +14,5 @@ / { model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on TobiDuo"; - compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3"; + compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; diff --git a/arch/arm/boot/dts/omap3-pandora-1ghz.dts b/arch/arm/boot/dts/omap3-pandora-1ghz.dts index 81b957f33c9f..ea509956d7ac 100644 --- a/arch/arm/boot/dts/omap3-pandora-1ghz.dts +++ b/arch/arm/boot/dts/omap3-pandora-1ghz.dts @@ -16,7 +16,7 @@ / { model = "Pandora Handheld Console 1GHz"; - compatible = "openpandora,omap3-pandora-1ghz", "ti,omap36xx", "ti,omap3"; + compatible = "openpandora,omap3-pandora-1ghz", "ti,omap3630", "ti,omap36xx", "ti,omap3"; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index ec5891718ae6..150d5be42d27 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -226,6 +226,17 @@ gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* GPIO_164 */ }; + /* wl1251 wifi+bt module */ + wlan_en: fixed-regulator-wg7210_en { + compatible = "regulator-fixed"; + regulator-name = "vwlan"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + startup-delay-us = <50000>; + enable-active-high; + gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>; + }; + /* wg7210 (wifi+bt module) 32k clock buffer */ wg7210_32k: fixed-regulator-wg7210_32k { compatible = "regulator-fixed"; @@ -522,9 +533,30 @@ /*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/ /* GPIO_127 */ }; -/* mmc3 is probed using pdata-quirks to pass wl1251 card data */ &mmc3 { - status = "disabled"; + vmmc-supply = <&wlan_en>; + + bus-width = <4>; + non-removable; + ti,non-removable; + cap-power-off-card; + + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins>; + + #address-cells = <1>; + #size-cells = <0>; + + wlan: wifi@1 { + compatible = "ti,wl1251"; + + reg = <1>; + + interrupt-parent = <&gpio1>; + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_21 */ + + ti,wl1251-has-eeprom; + }; }; /* bluetooth*/ diff --git a/arch/arm/boot/dts/omap3-sbc-t3530.dts b/arch/arm/boot/dts/omap3-sbc-t3530.dts index ae96002abb3b..24bf3fd86641 100644 --- a/arch/arm/boot/dts/omap3-sbc-t3530.dts +++ b/arch/arm/boot/dts/omap3-sbc-t3530.dts @@ -8,7 +8,7 @@ / { model = "CompuLab SBC-T3530 with CM-T3530"; - compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3"; + compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3"; aliases { display0 = &dvi0; diff --git a/arch/arm/boot/dts/omap3-sbc-t3730.dts b/arch/arm/boot/dts/omap3-sbc-t3730.dts index 7de6df16fc17..eb3893b9535e 100644 --- a/arch/arm/boot/dts/omap3-sbc-t3730.dts +++ b/arch/arm/boot/dts/omap3-sbc-t3730.dts @@ -8,7 +8,7 @@ / { model = "CompuLab SBC-T3730 with CM-T3730"; - compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3"; + compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3"; aliases { display0 = &dvi0; diff --git a/arch/arm/boot/dts/omap3-sniper.dts b/arch/arm/boot/dts/omap3-sniper.dts index 40a87330e8c3..b6879cdc5c13 100644 --- a/arch/arm/boot/dts/omap3-sniper.dts +++ b/arch/arm/boot/dts/omap3-sniper.dts @@ -9,7 +9,7 @@ / { model = "LG Optimus Black"; - compatible = "lg,omap3-sniper", "ti,omap36xx", "ti,omap3"; + compatible = "lg,omap3-sniper", "ti,omap3630", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3-thunder.dts b/arch/arm/boot/dts/omap3-thunder.dts index 6276e7079b36..64221e3b3477 100644 --- a/arch/arm/boot/dts/omap3-thunder.dts +++ b/arch/arm/boot/dts/omap3-thunder.dts @@ -8,7 +8,7 @@ / { model = "TI OMAP3 Thunder baseboard with TAO3530 SOM"; - compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3"; + compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3"; }; &omap3_pmx_core { diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts index db3a2fe84e99..d240e39f2151 100644 --- a/arch/arm/boot/dts/omap3-zoom3.dts +++ b/arch/arm/boot/dts/omap3-zoom3.dts @@ -9,7 +9,7 @@ / { model = "TI Zoom3"; - compatible = "ti,omap3-zoom3", "ti,omap36xx", "ti,omap3"; + compatible = "ti,omap3-zoom3", "ti,omap3630", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts index 0abd61108a53..7bfde8aac7ae 100644 --- a/arch/arm/boot/dts/omap3430-sdp.dts +++ b/arch/arm/boot/dts/omap3430-sdp.dts @@ -8,7 +8,7 @@ / { model = "TI OMAP3430 SDP"; - compatible = "ti,omap3430-sdp", "ti,omap3"; + compatible = "ti,omap3430-sdp", "ti,omap3430", "ti,omap3"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi index 7b09cbee8bb8..c4dd9801840d 100644 --- a/arch/arm/boot/dts/omap34xx.dtsi +++ b/arch/arm/boot/dts/omap34xx.dtsi @@ -16,19 +16,67 @@ / { cpus { cpu: cpu@0 { - /* OMAP343x/OMAP35xx variants OPP1-5 */ - operating-points = < - /* kHz uV */ - 125000 975000 - 250000 1075000 - 500000 1200000 - 550000 1270000 - 600000 1350000 - >; + /* OMAP343x/OMAP35xx variants OPP1-6 */ + operating-points-v2 = <&cpu0_opp_table>; + clock-latency = <300000>; /* From legacy driver */ }; }; + /* see Documentation/devicetree/bindings/opp/opp.txt */ + cpu0_opp_table: opp-table { + compatible = "operating-points-v2-ti-cpu"; + syscon = <&scm_conf>; + + opp1-125000000 { + opp-hz = /bits/ 64 <125000000>; + /* + * we currently only select the max voltage from table + * Table 3-3 of the omap3530 Data sheet (SPRS507F). + * Format is: <target min max> + */ + opp-microvolt = <975000 975000 975000>; + /* + * first value is silicon revision bit mask + * second one 720MHz Device Identification bit mask + */ + opp-supported-hw = <0xffffffff 3>; + }; + + opp2-250000000 { + opp-hz = /bits/ 64 <250000000>; + opp-microvolt = <1075000 1075000 1075000>; + opp-supported-hw = <0xffffffff 3>; + opp-suspend; + }; + + opp3-500000000 { + opp-hz = /bits/ 64 <500000000>; + opp-microvolt = <1200000 1200000 1200000>; + opp-supported-hw = <0xffffffff 3>; + }; + + opp4-550000000 { + opp-hz = /bits/ 64 <550000000>; + opp-microvolt = <1275000 1275000 1275000>; + opp-supported-hw = <0xffffffff 3>; + }; + + opp5-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <1350000 1350000 1350000>; + opp-supported-hw = <0xffffffff 3>; + }; + + opp6-720000000 { + opp-hz = /bits/ 64 <720000000>; + opp-microvolt = <1350000 1350000 1350000>; + /* only high-speed grade omap3530 devices */ + opp-supported-hw = <0xffffffff 2>; + turbo-mode; + }; + }; + ocp@68000000 { omap3_pmx_core2: pinmux@480025d8 { compatible = "ti,omap3-padconf", "pinctrl-single"; diff --git a/arch/arm/boot/dts/omap36xx-clocks.dtsi b/arch/arm/boot/dts/omap36xx-clocks.dtsi index e66fc57ec35d..4e9cc9003594 100644 --- a/arch/arm/boot/dts/omap36xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap36xx-clocks.dtsi @@ -105,3 +105,7 @@ <&mcbsp4_ick>, <&uart4_fck>; }; }; + +&dpll4_m4_ck { + ti,max-div = <31>; +}; diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi index 1e552f08f120..c618cb257d00 100644 --- a/arch/arm/boot/dts/omap36xx.dtsi +++ b/arch/arm/boot/dts/omap36xx.dtsi @@ -19,16 +19,65 @@ }; cpus { - /* OMAP3630/OMAP37xx 'standard device' variants OPP50 to OPP130 */ + /* OMAP3630/OMAP37xx variants OPP50 to OPP130 and OPP1G */ cpu: cpu@0 { - operating-points = < - /* kHz uV */ - 300000 1012500 - 600000 1200000 - 800000 1325000 - >; - clock-latency = <300000>; /* From legacy driver */ + operating-points-v2 = <&cpu0_opp_table>; + + vbb-supply = <&abb_mpu_iva>; + clock-latency = <300000>; /* From omap-cpufreq driver */ + }; + }; + + /* see Documentation/devicetree/bindings/opp/opp.txt */ + cpu0_opp_table: opp-table { + compatible = "operating-points-v2-ti-cpu"; + syscon = <&scm_conf>; + + opp50-300000000 { + opp-hz = /bits/ 64 <300000000>; + /* + * we currently only select the max voltage from table + * Table 4-19 of the DM3730 Data sheet (SPRS685B) + * Format is: cpu0-supply: <target min max> + * vbb-supply: <target min max> + */ + opp-microvolt = <1012500 1012500 1012500>, + <1012500 1012500 1012500>; + /* + * first value is silicon revision bit mask + * second one is "speed binned" bit mask + */ + opp-supported-hw = <0xffffffff 3>; + opp-suspend; + }; + + opp100-600000000 { + opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <1200000 1200000 1200000>, + <1200000 1200000 1200000>; + opp-supported-hw = <0xffffffff 3>; + }; + + opp130-800000000 { + opp-hz = /bits/ 64 <800000000>; + opp-microvolt = <1325000 1325000 1325000>, + <1325000 1325000 1325000>; + opp-supported-hw = <0xffffffff 3>; }; + + opp1g-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <1375000 1375000 1375000>, + <1375000 1375000 1375000>; + /* only on am/dm37x with speed-binned bit set */ + opp-supported-hw = <0xffffffff 2>; + turbo-mode; + }; + }; + + opp_supply_mpu_iva: opp_supply { + compatible = "ti,omap-opp-supply"; + ti,absolute-max-voltage-uv = <1375000>; }; ocp@68000000 { diff --git a/arch/arm/boot/dts/omap3xxx-clocks.dtsi b/arch/arm/boot/dts/omap3xxx-clocks.dtsi index 685c82a9d03e..0656c32439d2 100644 --- a/arch/arm/boot/dts/omap3xxx-clocks.dtsi +++ b/arch/arm/boot/dts/omap3xxx-clocks.dtsi @@ -416,7 +416,7 @@ #clock-cells = <0>; compatible = "ti,divider-clock"; clocks = <&dpll4_ck>; - ti,max-div = <32>; + ti,max-div = <16>; reg = <0x0e40>; ti,index-starts-at-one; }; diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index fac2e57dcca9..4791834dacb2 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -1146,7 +1146,7 @@ }; }; - gpu_cm: clock-controller@1500 { + gpu_cm: gpu_cm@1500 { compatible = "ti,omap4-cm"; reg = <0x1500 0x100>; #address-cells = <1>; diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi index 1e45b75e24bf..3d1ecb408b03 100644 --- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi @@ -637,13 +637,13 @@ <STM32_PINMUX('F', 6, AF9)>; /* QSPI_BK1_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = <STM32_PINMUX('B', 6, AF10)>; /* QSPI_BK1_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; @@ -665,13 +665,13 @@ <STM32_PINMUX('G', 7, AF11)>; /* QSPI_BK2_IO3 */ bias-disable; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; pins2 { pinmux = <STM32_PINMUX('C', 0, AF10)>; /* QSPI_BK2_NCS */ bias-pull-up; drive-push-pull; - slew-rate = <3>; + slew-rate = <1>; }; }; diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts index 2baae5f25e2c..3789312c8539 100644 --- a/arch/arm/boot/dts/stm32mp157c-ev1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts @@ -182,14 +182,12 @@ ov5640: camera@3c { compatible = "ovti,ov5640"; - pinctrl-names = "default"; - pinctrl-0 = <&ov5640_pins>; reg = <0x3c>; clocks = <&clk_ext_camera>; clock-names = "xclk"; DOVDD-supply = <&v2v8>; - powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>; - reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>; + powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>; + reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>; rotation = <180>; status = "okay"; @@ -222,15 +220,8 @@ joystick_pins: joystick { pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4"; - drive-push-pull; bias-pull-down; }; - - ov5640_pins: camera { - pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */ - drive-push-pull; - output-low; - }; }; }; }; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index e0f3d4c62b4f..ed8b258256d7 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -932,7 +932,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; status = "disabled"; }; @@ -945,7 +945,7 @@ interrupt-names = "int0", "int1"; clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; + bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index ce823c44e98a..4c268b70b735 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -520,6 +520,7 @@ interrupts = <39>; clocks = <&ccu CLK_AHB_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -529,6 +530,7 @@ interrupts = <64>; clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -608,6 +610,7 @@ interrupts = <40>; clocks = <&ccu CLK_AHB_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -617,6 +620,7 @@ interrupts = <65>; clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i.dtsi index cfb1efc8828c..6befa236ba99 100644 --- a/arch/arm/boot/dts/sun5i.dtsi +++ b/arch/arm/boot/dts/sun5i.dtsi @@ -391,6 +391,7 @@ interrupts = <39>; clocks = <&ccu CLK_AHB_EHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -400,6 +401,7 @@ interrupts = <40>; clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 6a5033785a8b..2cf34ae1c17b 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -544,6 +544,7 @@ clocks = <&ccu CLK_AHB1_EHCI0>; resets = <&ccu RST_AHB1_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -554,6 +555,7 @@ clocks = <&ccu CLK_AHB1_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_AHB1_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -564,6 +566,7 @@ clocks = <&ccu CLK_AHB1_EHCI1>; resets = <&ccu RST_AHB1_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -574,6 +577,7 @@ clocks = <&ccu CLK_AHB1_OHCI1>, <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_AHB1_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 31631a3504c0..8aebefd6accf 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -622,6 +622,7 @@ interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -631,6 +632,7 @@ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -713,6 +715,7 @@ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_AHB_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -722,6 +725,7 @@ interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>; clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index 52eed0ae3607..f292f96ab39b 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -307,6 +307,7 @@ clocks = <&ccu CLK_BUS_EHCI>; resets = <&ccu RST_BUS_EHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -317,6 +318,7 @@ clocks = <&ccu CLK_BUS_OHCI>, <&ccu CLK_USB_OHCI>; resets = <&ccu RST_BUS_OHCI>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 52e467c6808c..53c38deb8a08 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -641,6 +641,7 @@ clocks = <&ccu CLK_BUS_EHCI0>; resets = <&ccu RST_BUS_EHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -652,6 +653,7 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -663,6 +665,7 @@ clocks = <&ccu CLK_BUS_EHCI1>; resets = <&ccu RST_BUS_EHCI1>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index 98f288244af9..421dfbbfd7ee 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -282,6 +282,7 @@ clocks = <&ccu CLK_BUS_EHCI1>; resets = <&ccu RST_BUS_EHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -293,6 +294,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -303,6 +305,7 @@ clocks = <&ccu CLK_BUS_EHCI2>; resets = <&ccu RST_BUS_EHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -314,6 +317,7 @@ <&ccu CLK_USB_OHCI2>; resets = <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi index 0a0906072e76..1d900f591d5f 100644 --- a/arch/arm/boot/dts/sun9i-a80.dtsi +++ b/arch/arm/boot/dts/sun9i-a80.dtsi @@ -346,6 +346,7 @@ clocks = <&usb_clocks CLK_BUS_HCI0>; resets = <&usb_clocks RST_USB0_HCI>; phys = <&usbphy1>; + phy-names = "usb"; status = "disabled"; }; @@ -357,6 +358,7 @@ <&usb_clocks CLK_USB_OHCI0>; resets = <&usb_clocks RST_USB0_HCI>; phys = <&usbphy1>; + phy-names = "usb"; status = "disabled"; }; @@ -378,6 +380,7 @@ clocks = <&usb_clocks CLK_BUS_HCI1>; resets = <&usb_clocks RST_USB1_HCI>; phys = <&usbphy2>; + phy-names = "usb"; status = "disabled"; }; @@ -407,6 +410,7 @@ clocks = <&usb_clocks CLK_BUS_HCI2>; resets = <&usb_clocks RST_USB2_HCI>; phys = <&usbphy3>; + phy-names = "usb"; status = "disabled"; }; @@ -418,6 +422,7 @@ <&usb_clocks CLK_USB_OHCI2>; resets = <&usb_clocks RST_USB2_HCI>; phys = <&usbphy3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 510f83fb234b..0afea59486c2 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -305,6 +305,7 @@ clocks = <&ccu CLK_BUS_EHCI1>, <&ccu CLK_BUS_OHCI1>; resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -316,6 +317,7 @@ <&ccu CLK_USB_OHCI1>; resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>; phys = <&usbphy 1>; + phy-names = "usb"; status = "disabled"; }; @@ -326,6 +328,7 @@ clocks = <&ccu CLK_BUS_EHCI2>, <&ccu CLK_BUS_OHCI2>; resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -337,6 +340,7 @@ <&ccu CLK_USB_OHCI2>; resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>; phys = <&usbphy 2>; + phy-names = "usb"; status = "disabled"; }; @@ -347,6 +351,7 @@ clocks = <&ccu CLK_BUS_EHCI3>, <&ccu CLK_BUS_OHCI3>; resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>; phys = <&usbphy 3>; + phy-names = "usb"; status = "disabled"; }; @@ -358,6 +363,7 @@ <&ccu CLK_USB_OHCI3>; resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>; phys = <&usbphy 3>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 31bfe1647d28..f53634af014b 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -20,7 +20,6 @@ CONFIG_NAMESPACES=y CONFIG_SCHED_AUTOGROUP=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig index 5ae5b5228467..ef484c4cfd1a 100644 --- a/arch/arm/configs/badge4_defconfig +++ b/arch/arm/configs/badge4_defconfig @@ -91,7 +91,6 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index e4f6442588e7..4fec2ec379ad 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -195,7 +195,6 @@ CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 01e3c0f4be92..231f8973bbb2 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -167,6 +167,7 @@ CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_DA8XX=y CONFIG_BACKLIGHT_PWM=m +CONFIG_BACKLIGHT_GPIO=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 6a9f47d7f072..11e2211f9007 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -11,7 +11,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_SCHED=y CONFIG_BLK_CGROUP=y CONFIG_BLK_DEV_INITRD=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig index 09deb57db942..989bcc84e7fb 100644 --- a/arch/arm/configs/lpc32xx_defconfig +++ b/arch/arm/configs/lpc32xx_defconfig @@ -9,7 +9,6 @@ CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y # CONFIG_ARCH_MULTI_V7 is not set diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 9b98761e51c9..45d27190c9c9 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -4,7 +4,6 @@ CONFIG_SYSVIPC=y CONFIG_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_SYSCTL_SYSCALL=y # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set # CONFIG_SIGNALFD is not set diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index d3f50971e451..89cce8d4bc6b 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -356,15 +356,15 @@ CONFIG_DRM_OMAP_CONNECTOR_HDMI=m CONFIG_DRM_OMAP_CONNECTOR_ANALOG_TV=m CONFIG_DRM_OMAP_PANEL_DPI=m CONFIG_DRM_OMAP_PANEL_DSI_CM=m -CONFIG_DRM_OMAP_PANEL_SONY_ACX565AKM=m -CONFIG_DRM_OMAP_PANEL_LGPHILIPS_LB035Q02=m -CONFIG_DRM_OMAP_PANEL_SHARP_LS037V7DW01=m -CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m -CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m -CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m CONFIG_DRM_TILCDC=m CONFIG_DRM_PANEL_SIMPLE=m CONFIG_DRM_TI_TFP410=m +CONFIG_DRM_PANEL_LG_LB035Q02=m +CONFIG_DRM_PANEL_NEC_NL8048HL11=m +CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m +CONFIG_DRM_PANEL_SONY_ACX565AKM=m +CONFIG_DRM_PANEL_TPO_TD028TTEC1=m +CONFIG_DRM_PANEL_TPO_TD043MTEA1=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y @@ -554,3 +554,4 @@ CONFIG_DEBUG_INFO_DWARF4=y CONFIG_MAGIC_SYSRQ=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_TI_CPSW_SWITCHDEV=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 787c3f9be414..b817c57f05f1 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -581,7 +581,6 @@ CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index 02f1e7b7c8f6..67c306fff376 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -5,7 +5,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_SLUB_DEBUG is not set diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index 95b5a4ffddea..73ed73a8785a 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -327,7 +327,6 @@ CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_ADUTUX=m CONFIG_USB_SEVSEG=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYPRESS_CY7C63=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index 4fb51d665abb..a1cdbfa064c5 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -189,7 +189,6 @@ CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m -CONFIG_USB_RIO500=m CONFIG_USB_LEGOTOWER=m CONFIG_USB_LCD=m CONFIG_USB_CYTHERM=m diff --git a/arch/arm/configs/zx_defconfig b/arch/arm/configs/zx_defconfig index c4070c19ea6c..4d2ef785ed34 100644 --- a/arch/arm/configs/zx_defconfig +++ b/arch/arm/configs/zx_defconfig @@ -11,7 +11,6 @@ CONFIG_RT_GROUP_SCHED=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index b24df84a1d7a..2674de6ada1f 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON config CRYPTO_SHA1_ARM_CE tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA1_ARM select CRYPTO_HASH help @@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE config CRYPTO_SHA2_ARM_CE tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA256_ARM select CRYPTO_HASH help @@ -81,7 +81,7 @@ config CRYPTO_AES_ARM config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + select CRYPTO_SKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD help @@ -96,8 +96,9 @@ config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + select CRYPTO_SKCIPHER + select CRYPTO_LIB_AES select CRYPTO_SIMD help Use an implementation of AES in CBC, CTR and XTS modes that uses @@ -105,7 +106,7 @@ config CRYPTO_AES_ARM_CE config CRYPTO_GHASH_ARM_CE tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_HASH select CRYPTO_CRYPTD select CRYPTO_GF128MUL @@ -117,23 +118,35 @@ config CRYPTO_GHASH_ARM_CE config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC_T10DIF + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC_T10DIF select CRYPTO_HASH config CRYPTO_CRC32_ARM_CE tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" - depends on KERNEL_MODE_NEON && CRC32 + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC32 select CRYPTO_HASH config CRYPTO_CHACHA20_NEON - tristate "NEON accelerated ChaCha stream cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_CHACHA20 + tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" + select CRYPTO_SKCIPHER + select CRYPTO_ARCH_HAVE_LIB_CHACHA + +config CRYPTO_POLY1305_ARM + tristate "Accelerated scalar and SIMD Poly1305 hash implementations" + select CRYPTO_HASH + select CRYPTO_ARCH_HAVE_LIB_POLY1305 config CRYPTO_NHPOLY1305_NEON tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" depends on KERNEL_MODE_NEON select CRYPTO_NHPOLY1305 +config CRYPTO_CURVE25519_NEON + tristate "NEON accelerated Curve25519 scalar multiplication library" + depends on KERNEL_MODE_NEON + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4180f3a13512..b745c17d356f 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -10,34 +10,16 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o +obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o +obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o -ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o -crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o - -ifneq ($(crc-obj-y)$(crc-obj-m),) -ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y) -ce-obj-y += $(crc-obj-y) -ce-obj-m += $(crc-obj-m) -else -$(warning These CRC Extensions modules need binutils 2.23 or higher) -$(warning $(crc-obj-y) $(crc-obj-m)) -endif -endif - -ifneq ($(ce-obj-y)$(ce-obj-m),) -ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) -obj-y += $(ce-obj-y) -obj-m += $(ce-obj-m) -else -$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) -$(warning $(ce-obj-y) $(ce-obj-m)) -endif -endif +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o +obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o @@ -53,13 +35,19 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o +chacha-neon-y := chacha-scalar-core.o chacha-glue.o +chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o +poly1305-arm-y := poly1305-core.o poly1305-glue.o nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o +curve25519-neon-y := curve25519-core.o curve25519-glue.o ifdef REGENERATE_ARM_CRYPTO quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) +$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl + $(call cmd,perl) + $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) @@ -67,4 +55,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl $(call cmd,perl) endif -clean-files += sha256-core.S sha512-core.S +clean-files += poly1305-core.S sha256-core.S sha512-core.S + +# massage the perlasm code a bit so we only get the NEON routine if we need it +poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 +poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 +AFLAGS_poly1305-core.o += $(poly1305-aflags-y) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index b978cdf133af..4d1707388d94 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -9,6 +9,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 .align 3 diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c new file mode 100644 index 000000000000..6ebbb2b241d2 --- /dev/null +++ b/arch/arm/crypto/chacha-glue.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM NEON accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 Martin Willi + */ + +#include <crypto/algapi.h> +#include <crypto/internal/chacha.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cputype.h> +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const u32 *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state[12] += 4; + } + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block_xor_neon(state, dst, src, nrounds); + bytes -= CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, bytes); + } +} + +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, stream, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, stream, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + kernel_neon_begin(); + chacha_doneon(state, dst, src, bytes, nrounds); + kernel_neon_end(); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +static int chacha_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv, + bool neon) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + chacha_init_generic(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + if (!neon) { + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, + nbytes, state, ctx->nrounds); + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); + } else { + kernel_neon_begin(); + chacha_doneon(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, ctx->nrounds); + kernel_neon_end(); + } + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int do_chacha(struct skcipher_request *req, bool neon) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_stream_xor(req, ctx, req->iv, neon); +} + +static int chacha_arm(struct skcipher_request *req) +{ + return do_chacha(req, false); +} + +static int chacha_neon(struct skcipher_request *req) +{ + return do_chacha(req, neon_usable()); +} + +static int do_xchacha(struct skcipher_request *req, bool neon) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + chacha_init_generic(state, ctx->key, req->iv); + + if (!neon) { + hchacha_block_arm(state, subctx.key, ctx->nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, subctx.key, ctx->nrounds); + kernel_neon_end(); + } + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_stream_xor(req, &subctx, real_iv, neon); +} + +static int xchacha_arm(struct skcipher_request *req) +{ + return do_xchacha(req, false); +} + +static int xchacha_neon(struct skcipher_request *req) +{ + return do_xchacha(req, neon_usable()); +} + +static struct skcipher_alg arm_algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-arm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_arm, + .decrypt = chacha_arm, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-arm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_arm, + .decrypt = xchacha_arm, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-arm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_arm, + .decrypt = xchacha_arm, + }, +}; + +static struct skcipher_alg neon_algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_neon, + .decrypt = chacha_neon, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-neon", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .walksize = 4 * CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_neon, + .decrypt = xchacha_neon, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + int err = 0; + + if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { + err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); + if (err) + return err; + } + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + int i; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + for (i = 0; i < ARRAY_SIZE(neon_algs); i++) + neon_algs[i].base.cra_priority = 0; + break; + default: + static_branch_enable(&use_neon); + } + + if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { + err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); + if (err) + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); + } + } + return err; +} + +static void __exit chacha_simd_mod_fini(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { + crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) + crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); + } +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-arm"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-arm"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-arm"); +#ifdef CONFIG_KERNEL_MODE_NEON +MODULE_ALIAS_CRYPTO("chacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha20-neon"); +MODULE_ALIAS_CRYPTO("xchacha12-neon"); +#endif diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c deleted file mode 100644 index a8e9b534c8da..000000000000 --- a/arch/arm/crypto/chacha-neon-glue.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * ARM NEON accelerated ChaCha and XChaCha stream ciphers, - * including ChaCha20 (RFC7539) - * - * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <crypto/algapi.h> -#include <crypto/chacha.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <linux/kernel.h> -#include <linux/module.h> - -#include <asm/hwcap.h> -#include <asm/neon.h> -#include <asm/simd.h> - -asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); -asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); - -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 buf[CHACHA_BLOCK_SIZE]; - - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - state[12]++; - } - if (bytes) { - memcpy(buf, src, bytes); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, bytes); - } -} - -static int chacha_neon_stream_xor(struct skcipher_request *req, - const struct chacha_ctx *ctx, const u8 *iv) -{ - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, false); - - crypto_chacha_init(state, ctx, iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - kernel_neon_begin(); - chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, - nbytes, ctx->nrounds); - kernel_neon_end(); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static int chacha_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_chacha_crypt(req); - - return chacha_neon_stream_xor(req, ctx, req->iv); -} - -static int xchacha_neon(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - struct chacha_ctx subctx; - u32 state[16]; - u8 real_iv[16]; - - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_xchacha_crypt(req); - - crypto_chacha_init(state, ctx, req->iv); - - kernel_neon_begin(); - hchacha_block_neon(state, subctx.key, ctx->nrounds); - kernel_neon_end(); - subctx.nrounds = ctx->nrounds; - - memcpy(&real_iv[0], req->iv + 24, 8); - memcpy(&real_iv[8], req->iv + 16, 8); - return chacha_neon_stream_xor(req, &subctx, real_iv); -} - -static struct skcipher_alg algs[] = { - { - .base.cra_name = "chacha20", - .base.cra_driver_name = "chacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = CHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = chacha_neon, - .decrypt = chacha_neon, - }, { - .base.cra_name = "xchacha20", - .base.cra_driver_name = "xchacha20-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - }, { - .base.cra_name = "xchacha12", - .base.cra_driver_name = "xchacha12-neon", - .base.cra_priority = 300, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct chacha_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = CHACHA_KEY_SIZE, - .max_keysize = CHACHA_KEY_SIZE, - .ivsize = XCHACHA_IV_SIZE, - .chunksize = CHACHA_BLOCK_SIZE, - .walksize = 4 * CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha12_setkey, - .encrypt = xchacha_neon, - .decrypt = xchacha_neon, - } -}; - -static int __init chacha_simd_mod_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - - return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -} - -static void __exit chacha_simd_mod_fini(void) -{ - crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -} - -module_init(chacha_simd_mod_init); -module_exit(chacha_simd_mod_fini); - -MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); -MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("chacha20"); -MODULE_ALIAS_CRYPTO("chacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha20"); -MODULE_ALIAS_CRYPTO("xchacha20-neon"); -MODULE_ALIAS_CRYPTO("xchacha12"); -MODULE_ALIAS_CRYPTO("xchacha12-neon"); diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S new file mode 100644 index 000000000000..2985b80a45b5 --- /dev/null +++ b/arch/arm/crypto/chacha-scalar-core.S @@ -0,0 +1,460 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Google, Inc. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Design notes: + * + * 16 registers would be needed to hold the state matrix, but only 14 are + * available because 'sp' and 'pc' cannot be used. So we spill the elements + * (x8, x9) to the stack and swap them out with (x10, x11). This adds one + * 'ldrd' and one 'strd' instruction per round. + * + * All rotates are performed using the implicit rotate operand accepted by the + * 'add' and 'eor' instructions. This is faster than using explicit rotate + * instructions. To make this work, we allow the values in the second and last + * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the + * wrong rotation amount. The rotation amount is then fixed up just in time + * when the values are used. 'brot' is the number of bits the values in row 'b' + * need to be rotated right to arrive at the correct values, and 'drot' + * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such + * that they end up as (25, 24) after every round. + */ + + // ChaCha state registers + X0 .req r0 + X1 .req r1 + X2 .req r2 + X3 .req r3 + X4 .req r4 + X5 .req r5 + X6 .req r6 + X7 .req r7 + X8_X10 .req r8 // shared by x8 and x10 + X9_X11 .req r9 // shared by x9 and x11 + X12 .req r10 + X13 .req r11 + X14 .req r12 + X15 .req r14 + +.macro __rev out, in, t0, t1, t2 +.if __LINUX_ARM_ARCH__ >= 6 + rev \out, \in +.else + lsl \t0, \in, #24 + and \t1, \in, #0xff00 + and \t2, \in, #0xff0000 + orr \out, \t0, \in, lsr #24 + orr \out, \out, \t1, lsl #8 + orr \out, \out, \t2, lsr #8 +.endif +.endm + +.macro _le32_bswap x, t0, t1, t2 +#ifdef __ARMEB__ + __rev \x, \x, \t0, \t1, \t2 +#endif +.endm + +.macro _le32_bswap_4x a, b, c, d, t0, t1, t2 + _le32_bswap \a, \t0, \t1, \t2 + _le32_bswap \b, \t0, \t1, \t2 + _le32_bswap \c, \t0, \t1, \t2 + _le32_bswap \d, \t0, \t1, \t2 +.endm + +.macro __ldrd a, b, src, offset +#if __LINUX_ARM_ARCH__ >= 6 + ldrd \a, \b, [\src, #\offset] +#else + ldr \a, [\src, #\offset] + ldr \b, [\src, #\offset + 4] +#endif +.endm + +.macro __strd a, b, dst, offset +#if __LINUX_ARM_ARCH__ >= 6 + strd \a, \b, [\dst, #\offset] +#else + str \a, [\dst, #\offset] + str \b, [\dst, #\offset + 4] +#endif +.endm + +.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2 + + // a += b; d ^= a; d = rol(d, 16); + add \a1, \a1, \b1, ror #brot + add \a2, \a2, \b2, ror #brot + eor \d1, \a1, \d1, ror #drot + eor \d2, \a2, \d2, ror #drot + // drot == 32 - 16 == 16 + + // c += d; b ^= c; b = rol(b, 12); + add \c1, \c1, \d1, ror #16 + add \c2, \c2, \d2, ror #16 + eor \b1, \c1, \b1, ror #brot + eor \b2, \c2, \b2, ror #brot + // brot == 32 - 12 == 20 + + // a += b; d ^= a; d = rol(d, 8); + add \a1, \a1, \b1, ror #20 + add \a2, \a2, \b2, ror #20 + eor \d1, \a1, \d1, ror #16 + eor \d2, \a2, \d2, ror #16 + // drot == 32 - 8 == 24 + + // c += d; b ^= c; b = rol(b, 7); + add \c1, \c1, \d1, ror #24 + add \c2, \c2, \d2, ror #24 + eor \b1, \c1, \b1, ror #20 + eor \b2, \c2, \b2, ror #20 + // brot == 32 - 7 == 25 +.endm + +.macro _doubleround + + // column round + + // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13) + _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13 + + // save (x8, x9); restore (x10, x11) + __strd X8_X10, X9_X11, sp, 0 + __ldrd X8_X10, X9_X11, sp, 8 + + // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15) + _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15 + + .set brot, 25 + .set drot, 24 + + // diagonal round + + // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12) + _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12 + + // save (x10, x11); restore (x8, x9) + __strd X8_X10, X9_X11, sp, 8 + __ldrd X8_X10, X9_X11, sp, 0 + + // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14) + _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14 +.endm + +.macro _chacha_permute nrounds + .set brot, 0 + .set drot, 0 + .rept \nrounds / 2 + _doubleround + .endr +.endm + +.macro _chacha nrounds + +.Lnext_block\@: + // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN + // Registers contain x0-x9,x12-x15. + + // Do the core ChaCha permutation to update x0-x15. + _chacha_permute \nrounds + + add sp, #8 + // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers contain x0-x9,x12-x15. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15). + push {X8_X10, X9_X11, X12, X13, X14, X15} + + // Load (OUT, IN, LEN). + ldr r14, [sp, #96] + ldr r12, [sp, #100] + ldr r11, [sp, #104] + + orr r10, r14, r12 + + // Use slow path if fewer than 64 bytes remain. + cmp r11, #64 + blt .Lxor_slowpath\@ + + // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on + // ARMv6+, since ldmia and stmia (used below) still require alignment. + tst r10, #3 + bne .Lxor_slowpath\@ + + // Fast path: XOR 64 bytes of aligned data. + + // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // x0-x3 + __ldrd r8, r9, sp, 32 + __ldrd r10, r11, sp, 40 + add X0, X0, r8 + add X1, X1, r9 + add X2, X2, r10 + add X3, X3, r11 + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + ldmia r12!, {r8-r11} + eor X0, X0, r8 + eor X1, X1, r9 + eor X2, X2, r10 + eor X3, X3, r11 + stmia r14!, {X0-X3} + + // x4-x7 + __ldrd r8, r9, sp, 48 + __ldrd r10, r11, sp, 56 + add X4, r8, X4, ror #brot + add X5, r9, X5, ror #brot + ldmia r12!, {X0-X3} + add X6, r10, X6, ror #brot + add X7, r11, X7, ror #brot + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + eor X4, X4, X0 + eor X5, X5, X1 + eor X6, X6, X2 + eor X7, X7, X3 + stmia r14!, {X4-X7} + + // x8-x15 + pop {r0-r7} // (x8-x9,x12-x15,x10-x11) + __ldrd r8, r9, sp, 32 + __ldrd r10, r11, sp, 40 + add r0, r0, r8 // x8 + add r1, r1, r9 // x9 + add r6, r6, r10 // x10 + add r7, r7, r11 // x11 + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + ldmia r12!, {r8-r11} + eor r0, r0, r8 // x8 + eor r1, r1, r9 // x9 + eor r6, r6, r10 // x10 + eor r7, r7, r11 // x11 + stmia r14!, {r0,r1,r6,r7} + ldmia r12!, {r0,r1,r6,r7} + __ldrd r8, r9, sp, 48 + __ldrd r10, r11, sp, 56 + add r2, r8, r2, ror #drot // x12 + add r3, r9, r3, ror #drot // x13 + add r4, r10, r4, ror #drot // x14 + add r5, r11, r5, ror #drot // x15 + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + ldr r9, [sp, #72] // load LEN + eor r2, r2, r0 // x12 + eor r3, r3, r1 // x13 + eor r4, r4, r6 // x14 + eor r5, r5, r7 // x15 + subs r9, #64 // decrement and check LEN + stmia r14!, {r2-r5} + + beq .Ldone\@ + +.Lprepare_for_next_block\@: + + // Stack: x0-x15 OUT IN LEN + + // Increment block counter (x12) + add r8, #1 + + // Store updated (OUT, IN, LEN) + str r14, [sp, #64] + str r12, [sp, #68] + str r9, [sp, #72] + + mov r14, sp + + // Store updated block counter (x12) + str r8, [sp, #48] + + sub sp, #16 + + // Reload state and do next block + ldmia r14!, {r0-r11} // load x0-x11 + __strd r10, r11, sp, 8 // store x10-x11 before state + ldmia r14, {r10-r12,r14} // load x12-x15 + b .Lnext_block\@ + +.Lxor_slowpath\@: + // Slow path: < 64 bytes remaining, or unaligned input or output buffer. + // We handle it by storing the 64 bytes of keystream to the stack, then + // XOR-ing the needed portion with the data. + + // Allocate keystream buffer + sub sp, #64 + mov r14, sp + + // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN + // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0. + // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'. + + // Save keystream for x0-x3 + __ldrd r8, r9, sp, 96 + __ldrd r10, r11, sp, 104 + add X0, X0, r8 + add X1, X1, r9 + add X2, X2, r10 + add X3, X3, r11 + _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10 + stmia r14!, {X0-X3} + + // Save keystream for x4-x7 + __ldrd r8, r9, sp, 112 + __ldrd r10, r11, sp, 120 + add X4, r8, X4, ror #brot + add X5, r9, X5, ror #brot + add X6, r10, X6, ror #brot + add X7, r11, X7, ror #brot + _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10 + add r8, sp, #64 + stmia r14!, {X4-X7} + + // Save keystream for x8-x15 + ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11) + __ldrd r8, r9, sp, 128 + __ldrd r10, r11, sp, 136 + add r0, r0, r8 // x8 + add r1, r1, r9 // x9 + add r6, r6, r10 // x10 + add r7, r7, r11 // x11 + _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10 + stmia r14!, {r0,r1,r6,r7} + __ldrd r8, r9, sp, 144 + __ldrd r10, r11, sp, 152 + add r2, r8, r2, ror #drot // x12 + add r3, r9, r3, ror #drot // x13 + add r4, r10, r4, ror #drot // x14 + add r5, r11, r5, ror #drot // x15 + _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11 + stmia r14, {r2-r5} + + // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN + // Registers: r8 is block counter, r12 is IN. + + ldr r9, [sp, #168] // LEN + ldr r14, [sp, #160] // OUT + cmp r9, #64 + mov r0, sp + movle r1, r9 + movgt r1, #64 + // r1 is number of bytes to XOR, in range [1, 64] + +.if __LINUX_ARM_ARCH__ < 6 + orr r2, r12, r14 + tst r2, #3 // IN or OUT misaligned? + bne .Lxor_next_byte\@ +.endif + + // XOR a word at a time +.rept 16 + subs r1, #4 + blt .Lxor_words_done\@ + ldr r2, [r12], #4 + ldr r3, [r0], #4 + eor r2, r2, r3 + str r2, [r14], #4 +.endr + b .Lxor_slowpath_done\@ +.Lxor_words_done\@: + ands r1, r1, #3 + beq .Lxor_slowpath_done\@ + + // XOR a byte at a time +.Lxor_next_byte\@: + ldrb r2, [r12], #1 + ldrb r3, [r0], #1 + eor r2, r2, r3 + strb r2, [r14], #1 + subs r1, #1 + bne .Lxor_next_byte\@ + +.Lxor_slowpath_done\@: + subs r9, #64 + add sp, #96 + bgt .Lprepare_for_next_block\@ + +.Ldone\@: +.endm // _chacha + +/* + * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + * const u32 *state, int nrounds); + */ +ENTRY(chacha_doarm) + cmp r2, #0 // len == 0? + reteq lr + + ldr ip, [sp] + cmp ip, #12 + + push {r0-r2,r4-r11,lr} + + // Push state x0-x15 onto stack. + // Also store an extra copy of x10-x11 just before the state. + + add X12, r3, #48 + ldm X12, {X12,X13,X14,X15} + push {X12,X13,X14,X15} + sub sp, sp, #64 + + __ldrd X8_X10, X9_X11, r3, 40 + __strd X8_X10, X9_X11, sp, 8 + __strd X8_X10, X9_X11, sp, 56 + ldm r3, {X0-X9_X11} + __strd X0, X1, sp, 16 + __strd X2, X3, sp, 24 + __strd X4, X5, sp, 32 + __strd X6, X7, sp, 40 + __strd X8_X10, X9_X11, sp, 48 + + beq 1f + _chacha 20 + +0: add sp, #76 + pop {r4-r11, pc} + +1: _chacha 12 + b 0b +ENDPROC(chacha_doarm) + +/* + * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); + */ +ENTRY(hchacha_block_arm) + push {r1,r4-r11,lr} + + cmp r2, #12 // ChaCha12 ? + + mov r14, r0 + ldmia r14!, {r0-r11} // load x0-x11 + push {r10-r11} // store x10-x11 to stack + ldm r14, {r10-r12,r14} // load x12-x15 + sub sp, #8 + + beq 1f + _chacha_permute 20 + + // Skip over (unused0-unused1, x10-x11) +0: add sp, #16 + + // Fix up rotations of x12-x15 + ror X12, X12, #drot + ror X13, X13, #drot + pop {r4} // load 'out' + ror X14, X14, #drot + ror X15, X15, #drot + + // Store (x0-x3,x12-x15) to 'out' + stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} + + pop {r4-r11,pc} + +1: _chacha_permute 12 + b 0b +ENDPROC(hchacha_block_arm) diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S index 86be258a803f..46c02c518a30 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -72,7 +72,7 @@ #endif .text - .arch armv7-a + .arch armv8-a .fpu crypto-neon-fp-armv8 init_crc .req r0 diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S new file mode 100644 index 000000000000..be18af52e7dc --- /dev/null +++ b/arch/arm/crypto/curve25519-core.S @@ -0,0 +1,2062 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include <linux/linkage.h> + +.text +.fpu neon +.arch armv7-a +.align 4 + +ENTRY(curve25519_neon) + push {r4-r11, lr} + mov ip, sp + sub r3, sp, #704 + and r3, r3, #0xfffffff0 + mov sp, r3 + movw r4, #0 + movw r5, #254 + vmov.i32 q0, #1 + vshr.u64 q1, q0, #7 + vshr.u64 q0, q0, #8 + vmov.i32 d4, #19 + vmov.i32 d5, #38 + add r6, sp, #480 + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128] + add r6, r3, #0 + vmov.i32 q2, #0 + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 d4, [r6, : 64] + add r6, r3, #0 + movw r7, #960 + sub r7, r7, #2 + neg r7, r7 + sub r7, r7, r7, LSL #7 + str r7, [r6] + add r6, sp, #672 + vld1.8 {d4-d5}, [r1]! + vld1.8 {d6-d7}, [r1] + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d6-d7}, [r6, : 128] + sub r1, r6, #16 + ldrb r6, [r1] + and r6, r6, #248 + strb r6, [r1] + ldrb r6, [r1, #31] + and r6, r6, #127 + orr r6, r6, #64 + strb r6, [r1, #31] + vmov.i64 q2, #0xffffffff + vshr.u64 q3, q2, #7 + vshr.u64 q2, q2, #6 + vld1.8 {d8}, [r2] + vld1.8 {d10}, [r2] + add r2, r2, #6 + vld1.8 {d12}, [r2] + vld1.8 {d14}, [r2] + add r2, r2, #6 + vld1.8 {d16}, [r2] + add r2, r2, #4 + vld1.8 {d18}, [r2] + vld1.8 {d20}, [r2] + add r2, r2, #6 + vld1.8 {d22}, [r2] + add r2, r2, #2 + vld1.8 {d24}, [r2] + vld1.8 {d26}, [r2] + vshr.u64 q5, q5, #26 + vshr.u64 q6, q6, #3 + vshr.u64 q7, q7, #29 + vshr.u64 q8, q8, #6 + vshr.u64 q10, q10, #25 + vshr.u64 q11, q11, #3 + vshr.u64 q12, q12, #12 + vshr.u64 q13, q13, #38 + vand q4, q4, q2 + vand q6, q6, q2 + vand q8, q8, q2 + vand q10, q10, q2 + vand q2, q12, q2 + vand q5, q5, q3 + vand q7, q7, q3 + vand q9, q9, q3 + vand q11, q11, q3 + vand q3, q13, q3 + add r2, r3, #48 + vadd.i64 q12, q4, q1 + vadd.i64 q13, q10, q1 + vshr.s64 q12, q12, #26 + vshr.s64 q13, q13, #26 + vadd.i64 q5, q5, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q14, q5, q0 + vadd.i64 q11, q11, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q11, q0 + vsub.i64 q4, q4, q12 + vshr.s64 q12, q14, #25 + vsub.i64 q10, q10, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q14, q6, q1 + vadd.i64 q2, q2, q13 + vsub.i64 q5, q5, q12 + vshr.s64 q12, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q1 + vadd.i64 q7, q7, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q15, q7, q0 + vsub.i64 q11, q11, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q12 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q3, q0 + vadd.i64 q8, q8, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q15, q8, q1 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q7, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q9, q9, q12 + vtrn.32 d12, d14 + vshl.i64 q12, q12, #26 + vtrn.32 d13, d15 + vadd.i64 q0, q9, q0 + vadd.i64 q4, q4, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q6, q13, #4 + vsub.i64 q7, q8, q12 + vshr.s64 q0, q0, #25 + vadd.i64 q4, q4, q6 + vadd.i64 q6, q10, q0 + vshl.i64 q0, q0, #25 + vadd.i64 q8, q6, q1 + vadd.i64 q4, q4, q13 + vshl.i64 q10, q13, #25 + vadd.i64 q1, q4, q1 + vsub.i64 q0, q9, q0 + vshr.s64 q8, q8, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d14, d0 + vshr.s64 q1, q1, #26 + vtrn.32 d15, d1 + vadd.i64 q0, q11, q8 + vst1.8 d14, [r2, : 64] + vshl.i64 q7, q8, #26 + vadd.i64 q5, q5, q1 + vtrn.32 d4, d6 + vshl.i64 q1, q1, #26 + vtrn.32 d5, d7 + vsub.i64 q3, q6, q7 + add r2, r2, #16 + vsub.i64 q1, q4, q1 + vst1.8 d4, [r2, : 64] + vtrn.32 d6, d0 + vtrn.32 d7, d1 + sub r2, r2, #8 + vtrn.32 d2, d10 + vtrn.32 d3, d11 + vst1.8 d6, [r2, : 64] + sub r2, r2, #24 + vst1.8 d2, [r2, : 64] + add r2, r3, #96 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #144 + vmov.i32 q0, #0 + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #240 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #48 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d4, [r6, : 64] +.Lmainloop: + mov r2, r5, LSR #3 + and r6, r5, #7 + ldrb r2, [r1, r2] + mov r2, r2, LSR r6 + and r2, r2, #1 + str r5, [sp, #456] + eor r4, r4, r2 + str r2, [sp, #460] + neg r2, r4 + add r4, r3, #96 + add r5, r3, #192 + add r6, r3, #144 + vld1.8 {d8-d9}, [r4, : 128]! + add r7, r3, #240 + vld1.8 {d10-d11}, [r5, : 128]! + veor q6, q4, q5 + vld1.8 {d14-d15}, [r6, : 128]! + vdup.i32 q8, r2 + vld1.8 {d18-d19}, [r7, : 128]! + veor q10, q7, q9 + vld1.8 {d22-d23}, [r4, : 128]! + vand q6, q6, q8 + vld1.8 {d24-d25}, [r5, : 128]! + vand q10, q10, q8 + vld1.8 {d26-d27}, [r6, : 128]! + veor q4, q4, q6 + vld1.8 {d28-d29}, [r7, : 128]! + veor q5, q5, q6 + vld1.8 {d0}, [r4, : 64] + veor q6, q7, q10 + vld1.8 {d2}, [r5, : 64] + veor q7, q9, q10 + vld1.8 {d4}, [r6, : 64] + veor q9, q11, q12 + vld1.8 {d6}, [r7, : 64] + veor q10, q0, q1 + sub r2, r4, #32 + vand q9, q9, q8 + sub r4, r5, #32 + vand q10, q10, q8 + sub r5, r6, #32 + veor q11, q11, q9 + sub r6, r7, #32 + veor q0, q0, q10 + veor q9, q12, q9 + veor q1, q1, q10 + veor q10, q13, q14 + veor q12, q2, q3 + vand q10, q10, q8 + vand q8, q12, q8 + veor q12, q13, q10 + veor q2, q2, q8 + veor q10, q14, q10 + veor q3, q3, q8 + vadd.i32 q8, q4, q6 + vsub.i32 q4, q4, q6 + vst1.8 {d16-d17}, [r2, : 128]! + vadd.i32 q6, q11, q12 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q4, q11, q12 + vst1.8 {d12-d13}, [r2, : 128]! + vadd.i32 q6, q0, q2 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q0, q0, q2 + vst1.8 d12, [r2, : 64] + vadd.i32 q2, q5, q7 + vst1.8 d0, [r5, : 64] + vsub.i32 q0, q5, q7 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q9, q10 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q9, q10 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q1, q3 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q1, q3 + vst1.8 d4, [r4, : 64] + vst1.8 d0, [r6, : 64] + add r2, sp, #512 + add r4, r3, #96 + add r5, r3, #144 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #288 + vshl.i64 q12, q12, #25 + add r4, r3, #336 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #240 + add r4, r3, #96 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #144 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #192 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128] + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #144 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, r3, #288 + add r4, r3, #336 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vsub.i32 q1, q1, q2 + add r5, r3, #240 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vsub.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #144 + add r4, r3, #96 + add r5, r3, #144 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q2, q0, q1 + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d6-d7}, [r4, : 128]! + vsub.i32 q4, q1, q3 + vadd.i32 q1, q1, q3 + vld1.8 {d6}, [r2, : 64] + vld1.8 {d10}, [r4, : 64] + vsub.i32 q6, q3, q5 + vadd.i32 q3, q3, q5 + vst1.8 {d4-d5}, [r5, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d8-d9}, [r5, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d12, [r5, : 64] + vst1.8 d6, [r6, : 64] + add r2, r3, #0 + add r4, r3, #240 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #336 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #288 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #288 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, sp, #512 + add r4, r3, #144 + add r5, r3, #192 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #144 + vshl.i64 q12, q12, #25 + add r4, r3, #192 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #336 + add r4, r3, #288 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q1, q1, q2 + add r5, r3, #288 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vadd.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #48 + add r4, r3, #144 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #288 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #240 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #240 + vshl.i64 q7, q7, #25 + add r4, r3, #144 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + ldr r2, [sp, #456] + ldr r4, [sp, #460] + subs r5, r2, #1 + bge .Lmainloop + add r1, r3, #144 + add r2, r3, #336 + vld1.8 {d0-d1}, [r1, : 128]! + vld1.8 {d2-d3}, [r1, : 128]! + vld1.8 {d4}, [r1, : 64] + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 d4, [r2, : 64] + movw r1, #0 +.Linvertloop: + add r2, r3, #144 + movw r4, #0 + movw r5, #2 + cmp r1, #1 + moveq r5, #1 + addeq r2, r3, #336 + addeq r4, r3, #48 + cmp r1, #2 + moveq r5, #1 + addeq r2, r3, #48 + cmp r1, #3 + moveq r5, #5 + addeq r4, r3, #336 + cmp r1, #4 + moveq r5, #10 + cmp r1, #5 + moveq r5, #20 + cmp r1, #6 + moveq r5, #10 + addeq r2, r3, #336 + addeq r4, r3, #336 + cmp r1, #7 + moveq r5, #50 + cmp r1, #8 + moveq r5, #100 + cmp r1, #9 + moveq r5, #50 + addeq r2, r3, #336 + cmp r1, #10 + moveq r5, #5 + addeq r2, r3, #48 + cmp r1, #11 + moveq r5, #0 + addeq r2, r3, #96 + add r6, r3, #144 + add r7, r3, #288 + vld1.8 {d0-d1}, [r6, : 128]! + vld1.8 {d2-d3}, [r6, : 128]! + vld1.8 {d4}, [r6, : 64] + vst1.8 {d0-d1}, [r7, : 128]! + vst1.8 {d2-d3}, [r7, : 128]! + vst1.8 d4, [r7, : 64] + cmp r5, #0 + beq .Lskipsquaringloop +.Lsquaringloop: + add r6, r3, #288 + add r7, r3, #288 + add r8, r3, #288 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r7, : 128]! + vld1.8 {d6-d7}, [r7, : 128]! + vld1.8 {d9}, [r7, : 64] + vld1.8 {d10-d11}, [r6, : 128]! + add r7, sp, #384 + vld1.8 {d12-d13}, [r6, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r6, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r7, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r7, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r7, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r7, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r7, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r6, r7, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r6, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r6, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r6, r6, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r6, : 64] + sub r6, r6, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r7, sp, #480 + vld1.8 {d14-d15}, [r7, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r6, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r6, : 64]! + add r6, sp, #496 + vld1.8 {d20-d21}, [r6, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r6, r8, #8 + vst1.8 d0, [r6, : 64] + vsub.i64 d19, d19, d9 + add r6, r6, #16 + vst1.8 d16, [r6, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r6, r6, #8 + vst1.8 d6, [r6, : 64] + add r6, r6, #16 + vst1.8 d18, [r6, : 64] + vzip.i32 q2, q5 + sub r6, r6, #32 + vst1.8 d4, [r6, : 64] + subs r5, r5, #1 + bhi .Lsquaringloop +.Lskipsquaringloop: + mov r2, r2 + add r5, r3, #288 + add r6, r3, #144 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r5, : 128]! + vld1.8 {d6-d7}, [r5, : 128]! + vld1.8 {d9}, [r5, : 64] + vld1.8 {d10-d11}, [r2, : 128]! + add r5, sp, #384 + vld1.8 {d12-d13}, [r2, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r2, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r5, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r5, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r5, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r5, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r5, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r2, r5, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r2, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r2, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r2, r2, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r2, : 64] + sub r2, r2, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r5, sp, #480 + vld1.8 {d14-d15}, [r5, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r2, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r2, : 64]! + add r2, sp, #496 + vld1.8 {d20-d21}, [r2, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r2, r6, #8 + vst1.8 d0, [r2, : 64] + vsub.i64 d19, d19, d9 + add r2, r2, #16 + vst1.8 d16, [r2, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r2, r2, #8 + vst1.8 d6, [r2, : 64] + add r2, r2, #16 + vst1.8 d18, [r2, : 64] + vzip.i32 q2, q5 + sub r2, r2, #32 + vst1.8 d4, [r2, : 64] + cmp r4, #0 + beq .Lskippostcopy + add r2, r3, #144 + mov r4, r4 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskippostcopy: + cmp r1, #1 + bne .Lskipfinalcopy + add r2, r3, #288 + add r4, r3, #144 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskipfinalcopy: + add r1, r1, #1 + cmp r1, #12 + blo .Linvertloop + add r1, r3, #144 + ldr r2, [r1], #4 + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + ldr r1, [r1] + add r11, r1, r1, LSL #4 + add r11, r11, r1, LSL #1 + add r11, r11, #16777216 + mov r11, r11, ASR #25 + add r11, r11, r2 + mov r11, r11, ASR #26 + add r11, r11, r3 + mov r11, r11, ASR #25 + add r11, r11, r4 + mov r11, r11, ASR #26 + add r11, r11, r5 + mov r11, r11, ASR #25 + add r11, r11, r6 + mov r11, r11, ASR #26 + add r11, r11, r7 + mov r11, r11, ASR #25 + add r11, r11, r8 + mov r11, r11, ASR #26 + add r11, r11, r9 + mov r11, r11, ASR #25 + add r11, r11, r10 + mov r11, r11, ASR #26 + add r11, r11, r1 + mov r11, r11, ASR #25 + add r2, r2, r11 + add r2, r2, r11, LSL #1 + add r2, r2, r11, LSL #4 + mov r11, r2, ASR #26 + add r3, r3, r11 + sub r2, r2, r11, LSL #26 + mov r11, r3, ASR #25 + add r4, r4, r11 + sub r3, r3, r11, LSL #25 + mov r11, r4, ASR #26 + add r5, r5, r11 + sub r4, r4, r11, LSL #26 + mov r11, r5, ASR #25 + add r6, r6, r11 + sub r5, r5, r11, LSL #25 + mov r11, r6, ASR #26 + add r7, r7, r11 + sub r6, r6, r11, LSL #26 + mov r11, r7, ASR #25 + add r8, r8, r11 + sub r7, r7, r11, LSL #25 + mov r11, r8, ASR #26 + add r9, r9, r11 + sub r8, r8, r11, LSL #26 + mov r11, r9, ASR #25 + add r10, r10, r11 + sub r9, r9, r11, LSL #25 + mov r11, r10, ASR #26 + add r1, r1, r11 + sub r10, r10, r11, LSL #26 + mov r11, r1, ASR #25 + sub r1, r1, r11, LSL #25 + add r2, r2, r3, LSL #26 + mov r3, r3, LSR #6 + add r3, r3, r4, LSL #19 + mov r4, r4, LSR #13 + add r4, r4, r5, LSL #13 + mov r5, r5, LSR #19 + add r5, r5, r6, LSL #6 + add r6, r7, r8, LSL #25 + mov r7, r8, LSR #7 + add r7, r7, r9, LSL #19 + mov r8, r9, LSR #13 + add r8, r8, r10, LSL #12 + mov r9, r10, LSR #20 + add r1, r9, r1, LSL #6 + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r1, [r0, #28] + movw r0, #0 + mov sp, ip + pop {r4-r11, pc} +ENDPROC(curve25519_neon) diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c new file mode 100644 index 000000000000..f3f42cf3b893 --- /dev/null +++ b/arch/arm/crypto/curve25519-glue.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <crypto/internal/kpp.h> +#include <crypto/internal/simd.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <crypto/curve25519.h> + +asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], + const u8 scalar[CURVE25519_KEY_SIZE], + const u8 point[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + curve25519_neon(out, scalar, point); + kernel_neon_end(); + } else { + curve25519_generic(out, scalar, point); + } +} +EXPORT_SYMBOL(curve25519_arch); + +static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf, + unsigned int len) +{ + u8 *secret = kpp_tfm_ctx(tfm); + + if (!len) + curve25519_generate_secret(secret); + else if (len == CURVE25519_KEY_SIZE && + crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE)) + memcpy(secret, buf, CURVE25519_KEY_SIZE); + else + return -EINVAL; + return 0; +} + +static int curve25519_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + const u8 *secret = kpp_tfm_ctx(tfm); + u8 public_key[CURVE25519_KEY_SIZE]; + u8 buf[CURVE25519_KEY_SIZE]; + int copied, nbytes; + u8 const *bp; + + if (req->src) { + copied = sg_copy_to_buffer(req->src, + sg_nents_for_len(req->src, + CURVE25519_KEY_SIZE), + public_key, CURVE25519_KEY_SIZE); + if (copied != CURVE25519_KEY_SIZE) + return -EINVAL; + bp = public_key; + } else { + bp = curve25519_base_point; + } + + curve25519_arch(buf, secret, bp); + + /* might want less than we've got */ + nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len); + copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst, + nbytes), + buf, nbytes); + if (copied != nbytes) + return -EINVAL; + return 0; +} + +static unsigned int curve25519_max_size(struct crypto_kpp *tfm) +{ + return CURVE25519_KEY_SIZE; +} + +static struct kpp_alg curve25519_alg = { + .base.cra_name = "curve25519", + .base.cra_driver_name = "curve25519-neon", + .base.cra_priority = 200, + .base.cra_module = THIS_MODULE, + .base.cra_ctxsize = CURVE25519_KEY_SIZE, + + .set_secret = curve25519_set_secret, + .generate_public_key = curve25519_compute_value, + .compute_shared_secret = curve25519_compute_value, + .max_size = curve25519_max_size, +}; + +static int __init mod_init(void) +{ + if (elf_hwcap & HWCAP_NEON) { + static_branch_enable(&have_neon); + return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? + crypto_register_kpp(&curve25519_alg) : 0; + } + return 0; +} + +static void __exit mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) + crypto_unregister_kpp(&curve25519_alg); +} + +module_init(mod_init); +module_exit(mod_exit); + +MODULE_ALIAS_CRYPTO("curve25519"); +MODULE_ALIAS_CRYPTO("curve25519-neon"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index c47fe81abcb0..534c9647726d 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -88,6 +88,7 @@ T3_H .req d17 .text + .arch armv8-a .fpu crypto-neon-fp-armv8 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl new file mode 100644 index 000000000000..6d79498d3115 --- /dev/null +++ b/arch/arm/crypto/poly1305-armv4.pl @@ -0,0 +1,1236 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL +# project. +# ==================================================================== +# +# IALU(*)/gcc-4.4 NEON +# +# ARM11xx(ARMv6) 7.78/+100% - +# Cortex-A5 6.35/+130% 3.00 +# Cortex-A8 6.25/+115% 2.36 +# Cortex-A9 5.10/+95% 2.55 +# Cortex-A15 3.85/+85% 1.25(**) +# Snapdragon S4 5.70/+100% 1.48(**) +# +# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data; +# (**) these are trade-off results, they can be improved by ~8% but at +# the cost of 15/12% regression on Cortex-A5/A7, it's even possible +# to improve Cortex-A9 result, but then A5/A7 loose more than 20%; + +$flavour = shift; +if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } +else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open STDOUT,"| \"$^X\" $xlate $flavour $output"; +} else { + open STDOUT,">$output"; +} + +($ctx,$inp,$len,$padbit)=map("r$_",(0..3)); + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +# define poly1305_init poly1305_init_arm +# define poly1305_blocks poly1305_blocks_arm +# define poly1305_emit poly1305_emit_arm +.globl poly1305_blocks_neon +#endif + +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.text + +.globl poly1305_emit +.globl poly1305_blocks +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: +.Lpoly1305_init: + stmdb sp!,{r4-r11} + + eor r3,r3,r3 + cmp $inp,#0 + str r3,[$ctx,#0] @ zero hash value + str r3,[$ctx,#4] + str r3,[$ctx,#8] + str r3,[$ctx,#12] + str r3,[$ctx,#16] + str r3,[$ctx,#36] @ clear is_base2_26 + add $ctx,$ctx,#20 + +#ifdef __thumb2__ + it eq +#endif + moveq r0,#0 + beq .Lno_key + +#if __ARM_MAX_ARCH__>=7 + mov r3,#-1 + str r3,[$ctx,#28] @ impossible key power value +# ifndef __KERNEL__ + adr r11,.Lpoly1305_init + ldr r12,.LOPENSSL_armcap +# endif +#endif + ldrb r4,[$inp,#0] + mov r10,#0x0fffffff + ldrb r5,[$inp,#1] + and r3,r10,#-4 @ 0x0ffffffc + ldrb r6,[$inp,#2] + ldrb r7,[$inp,#3] + orr r4,r4,r5,lsl#8 + ldrb r5,[$inp,#4] + orr r4,r4,r6,lsl#16 + ldrb r6,[$inp,#5] + orr r4,r4,r7,lsl#24 + ldrb r7,[$inp,#6] + and r4,r4,r10 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +# if !defined(_WIN32) + ldr r12,[r11,r12] @ OPENSSL_armcap_P +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif +#endif + ldrb r8,[$inp,#7] + orr r5,r5,r6,lsl#8 + ldrb r6,[$inp,#8] + orr r5,r5,r7,lsl#16 + ldrb r7,[$inp,#9] + orr r5,r5,r8,lsl#24 + ldrb r8,[$inp,#10] + and r5,r5,r3 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + tst r12,#ARMV7_NEON @ check for NEON +# ifdef __thumb2__ + adr r9,.Lpoly1305_blocks_neon + adr r11,.Lpoly1305_blocks + it ne + movne r11,r9 + adr r12,.Lpoly1305_emit + orr r11,r11,#1 @ thumb-ify addresses + orr r12,r12,#1 +# else + add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) + ite eq + addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) + addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) +# endif +#endif + ldrb r9,[$inp,#11] + orr r6,r6,r7,lsl#8 + ldrb r7,[$inp,#12] + orr r6,r6,r8,lsl#16 + ldrb r8,[$inp,#13] + orr r6,r6,r9,lsl#24 + ldrb r9,[$inp,#14] + and r6,r6,r3 + + ldrb r10,[$inp,#15] + orr r7,r7,r8,lsl#8 + str r4,[$ctx,#0] + orr r7,r7,r9,lsl#16 + str r5,[$ctx,#4] + orr r7,r7,r10,lsl#24 + str r6,[$ctx,#8] + and r7,r7,r3 + str r7,[$ctx,#12] +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + stmia r2,{r11,r12} @ fill functions table + mov r0,#1 +#else + mov r0,#0 +#endif +.Lno_key: + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12)); +my ($s1,$s2,$s3)=($r1,$r2,$r3); + +$code.=<<___; +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + stmdb sp!,{r3-r11,lr} + + ands $len,$len,#-16 + beq .Lno_data + + add $len,$len,$inp @ end pointer + sub sp,sp,#32 + +#if __ARM_ARCH__<7 + ldmia $ctx,{$h0-$r3} @ load context + add $ctx,$ctx,#20 + str $len,[sp,#16] @ offload stuff + str $ctx,[sp,#12] +#else + ldr lr,[$ctx,#36] @ is_base2_26 + ldmia $ctx!,{$h0-$h4} @ load hash value + str $len,[sp,#16] @ offload stuff + str $ctx,[sp,#12] + + adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 + mov $r1,$h1,lsr#6 + adcs $r1,$r1,$h2,lsl#20 + mov $r2,$h2,lsr#12 + adcs $r2,$r2,$h3,lsl#14 + mov $r3,$h3,lsr#18 + adcs $r3,$r3,$h4,lsl#8 + mov $len,#0 + teq lr,#0 + str $len,[$ctx,#16] @ clear is_base2_26 + adc $len,$len,$h4,lsr#24 + + itttt ne + movne $h0,$r0 @ choose between radixes + movne $h1,$r1 + movne $h2,$r2 + movne $h3,$r3 + ldmia $ctx,{$r0-$r3} @ load key + it ne + movne $h4,$len +#endif + + mov lr,$inp + cmp $padbit,#0 + str $r1,[sp,#20] + str $r2,[sp,#24] + str $r3,[sp,#28] + b .Loop + +.align 4 +.Loop: +#if __ARM_ARCH__<7 + ldrb r0,[lr],#16 @ load input +# ifdef __thumb2__ + it hi +# endif + addhi $h4,$h4,#1 @ 1<<128 + ldrb r1,[lr,#-15] + ldrb r2,[lr,#-14] + ldrb r3,[lr,#-13] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-12] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-11] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-10] + adds $h0,$h0,r3 @ accumulate input + + ldrb r3,[lr,#-9] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-8] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-7] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-6] + adcs $h1,$h1,r3 + + ldrb r3,[lr,#-5] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-4] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-3] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-2] + adcs $h2,$h2,r3 + + ldrb r3,[lr,#-1] + orr r1,r0,r1,lsl#8 + str lr,[sp,#8] @ offload input pointer + orr r2,r1,r2,lsl#16 + add $s1,$r1,$r1,lsr#2 + orr r3,r2,r3,lsl#24 +#else + ldr r0,[lr],#16 @ load input + it hi + addhi $h4,$h4,#1 @ padbit + ldr r1,[lr,#-12] + ldr r2,[lr,#-8] + ldr r3,[lr,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + adds $h0,$h0,r0 @ accumulate input + str lr,[sp,#8] @ offload input pointer + adcs $h1,$h1,r1 + add $s1,$r1,$r1,lsr#2 + adcs $h2,$h2,r2 +#endif + add $s2,$r2,$r2,lsr#2 + adcs $h3,$h3,r3 + add $s3,$r3,$r3,lsr#2 + + umull r2,r3,$h1,$r0 + adc $h4,$h4,#0 + umull r0,r1,$h0,$r0 + umlal r2,r3,$h4,$s1 + umlal r0,r1,$h3,$s1 + ldr $r1,[sp,#20] @ reload $r1 + umlal r2,r3,$h2,$s3 + umlal r0,r1,$h1,$s3 + umlal r2,r3,$h3,$s2 + umlal r0,r1,$h2,$s2 + umlal r2,r3,$h0,$r1 + str r0,[sp,#0] @ future $h0 + mul r0,$s2,$h4 + ldr $r2,[sp,#24] @ reload $r2 + adds r2,r2,r1 @ d1+=d0>>32 + eor r1,r1,r1 + adc lr,r3,#0 @ future $h2 + str r2,[sp,#4] @ future $h1 + + mul r2,$s3,$h4 + eor r3,r3,r3 + umlal r0,r1,$h3,$s3 + ldr $r3,[sp,#28] @ reload $r3 + umlal r2,r3,$h3,$r0 + umlal r0,r1,$h2,$r0 + umlal r2,r3,$h2,$r1 + umlal r0,r1,$h1,$r1 + umlal r2,r3,$h1,$r2 + umlal r0,r1,$h0,$r2 + umlal r2,r3,$h0,$r3 + ldr $h0,[sp,#0] + mul $h4,$r0,$h4 + ldr $h1,[sp,#4] + + adds $h2,lr,r0 @ d2+=d1>>32 + ldr lr,[sp,#8] @ reload input pointer + adc r1,r1,#0 + adds $h3,r2,r1 @ d3+=d2>>32 + ldr r0,[sp,#16] @ reload end pointer + adc r3,r3,#0 + add $h4,$h4,r3 @ h4+=d3>>32 + + and r1,$h4,#-4 + and $h4,$h4,#3 + add r1,r1,r1,lsr#2 @ *=5 + adds $h0,$h0,r1 + adcs $h1,$h1,#0 + adcs $h2,$h2,#0 + adcs $h3,$h3,#0 + adc $h4,$h4,#0 + + cmp r0,lr @ done yet? + bhi .Loop + + ldr $ctx,[sp,#12] + add sp,sp,#32 + stmdb $ctx,{$h0-$h4} @ store the result + +.Lno_data: +#if __ARM_ARCH__>=5 + ldmia sp!,{r3-r11,pc} +#else + ldmia sp!,{r3-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce)=map("r$_",(0..2)); +my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11)); +my $g4=$ctx; + +$code.=<<___; +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + stmdb sp!,{r4-r11} + + ldmia $ctx,{$h0-$h4} + +#if __ARM_ARCH__>=7 + ldr ip,[$ctx,#36] @ is_base2_26 + + adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32 + mov $g1,$h1,lsr#6 + adcs $g1,$g1,$h2,lsl#20 + mov $g2,$h2,lsr#12 + adcs $g2,$g2,$h3,lsl#14 + mov $g3,$h3,lsr#18 + adcs $g3,$g3,$h4,lsl#8 + mov $g4,#0 + adc $g4,$g4,$h4,lsr#24 + + tst ip,ip + itttt ne + movne $h0,$g0 + movne $h1,$g1 + movne $h2,$g2 + movne $h3,$g3 + it ne + movne $h4,$g4 +#endif + + adds $g0,$h0,#5 @ compare to modulus + adcs $g1,$h1,#0 + adcs $g2,$h2,#0 + adcs $g3,$h3,#0 + adc $g4,$h4,#0 + tst $g4,#4 @ did it carry/borrow? + +#ifdef __thumb2__ + it ne +#endif + movne $h0,$g0 + ldr $g0,[$nonce,#0] +#ifdef __thumb2__ + it ne +#endif + movne $h1,$g1 + ldr $g1,[$nonce,#4] +#ifdef __thumb2__ + it ne +#endif + movne $h2,$g2 + ldr $g2,[$nonce,#8] +#ifdef __thumb2__ + it ne +#endif + movne $h3,$g3 + ldr $g3,[$nonce,#12] + + adds $h0,$h0,$g0 + adcs $h1,$h1,$g1 + adcs $h2,$h2,$g2 + adc $h3,$h3,$g3 + +#if __ARM_ARCH__>=7 +# ifdef __ARMEB__ + rev $h0,$h0 + rev $h1,$h1 + rev $h2,$h2 + rev $h3,$h3 +# endif + str $h0,[$mac,#0] + str $h1,[$mac,#4] + str $h2,[$mac,#8] + str $h3,[$mac,#12] +#else + strb $h0,[$mac,#0] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#4] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#8] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#12] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#1] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#5] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#9] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#13] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#2] + mov $h0,$h0,lsr#8 + strb $h1,[$mac,#6] + mov $h1,$h1,lsr#8 + strb $h2,[$mac,#10] + mov $h2,$h2,lsr#8 + strb $h3,[$mac,#14] + mov $h3,$h3,lsr#8 + + strb $h0,[$mac,#3] + strb $h1,[$mac,#7] + strb $h2,[$mac,#11] + strb $h3,[$mac,#15] +#endif + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size poly1305_emit,.-poly1305_emit +___ +{ +my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9)); +my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14)); +my ($T0,$T1,$MASK) = map("q$_",(15,4,0)); + +my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7)); + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.fpu neon + +.type poly1305_init_neon,%function +.align 5 +poly1305_init_neon: +.Lpoly1305_init_neon: + ldr r3,[$ctx,#48] @ first table element + cmp r3,#-1 @ is value impossible? + bne .Lno_init_neon + + ldr r4,[$ctx,#20] @ load key base 2^32 + ldr r5,[$ctx,#24] + ldr r6,[$ctx,#28] + ldr r7,[$ctx,#32] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + and r3,r3,#0x03ffffff + and r4,r4,#0x03ffffff + and r5,r5,#0x03ffffff + + vdup.32 $R0,r2 @ r^1 in both lanes + add r2,r3,r3,lsl#2 @ *5 + vdup.32 $R1,r3 + add r3,r4,r4,lsl#2 + vdup.32 $S1,r2 + vdup.32 $R2,r4 + add r4,r5,r5,lsl#2 + vdup.32 $S2,r3 + vdup.32 $R3,r5 + add r5,r6,r6,lsl#2 + vdup.32 $S3,r4 + vdup.32 $R4,r6 + vdup.32 $S4,r5 + + mov $zeros,#2 @ counter + +.Lsquare_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + + vmull.u32 $D0,$R0,${R0}[1] + vmull.u32 $D1,$R1,${R0}[1] + vmull.u32 $D2,$R2,${R0}[1] + vmull.u32 $D3,$R3,${R0}[1] + vmull.u32 $D4,$R4,${R0}[1] + + vmlal.u32 $D0,$R4,${S1}[1] + vmlal.u32 $D1,$R0,${R1}[1] + vmlal.u32 $D2,$R1,${R1}[1] + vmlal.u32 $D3,$R2,${R1}[1] + vmlal.u32 $D4,$R3,${R1}[1] + + vmlal.u32 $D0,$R3,${S2}[1] + vmlal.u32 $D1,$R4,${S2}[1] + vmlal.u32 $D3,$R1,${R2}[1] + vmlal.u32 $D2,$R0,${R2}[1] + vmlal.u32 $D4,$R2,${R2}[1] + + vmlal.u32 $D0,$R2,${S3}[1] + vmlal.u32 $D3,$R0,${R3}[1] + vmlal.u32 $D1,$R3,${S3}[1] + vmlal.u32 $D2,$R4,${S3}[1] + vmlal.u32 $D4,$R1,${R3}[1] + + vmlal.u32 $D3,$R4,${S4}[1] + vmlal.u32 $D0,$R1,${S4}[1] + vmlal.u32 $D1,$R2,${S4}[1] + vmlal.u32 $D2,$R3,${S4}[1] + vmlal.u32 $D4,$R0,${R4}[1] + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + @ and P. Schwabe + @ + @ H0>>+H1>>+H2>>+H3>>+H4 + @ H3>>+H4>>*5+H0>>+H1 + @ + @ Trivia. + @ + @ Result of multiplication of n-bit number by m-bit number is + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, + @ m-bit number multiplied by 2^n is still n+m bits wide. + @ + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit + @ one is n+1 bits wide. + @ + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 + @ can be 27. However! In cases when their width exceeds 26 bits + @ they are limited by 2^26+2^6. This in turn means that *sum* + @ of the products with these values can still be viewed as sum + @ of 52-bit numbers as long as the amount of addends is not a + @ power of 2. For example, + @ + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, + @ + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than + @ 8 * (2^52) or 2^55. However, the value is then multiplied by + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), + @ which is less than 32 * (2^52) or 2^57. And when processing + @ data we are looking at triple as many addends... + @ + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 + @ instruction accepts 2x32-bit input and writes 2x64-bit result. + @ This means that result of reduction have to be compressed upon + @ loop wrap-around. This can be done in the process of reduction + @ to minimize amount of instructions [as well as amount of + @ 128-bit instructions, which benefits low-end processors], but + @ one has to watch for H2 (which is narrower than H0) and 5*H4 + @ not being wider than 58 bits, so that result of right shift + @ by 26 bits fits in 32 bits. This is also useful on x86, + @ because it allows to use paddd in place for paddq, which + @ benefits Atom, where paddq is ridiculously slow. + + vshr.u64 $T0,$D3,#26 + vmovn.i64 $D3#lo,$D3 + vshr.u64 $T1,$D0,#26 + vmovn.i64 $D0#lo,$D0 + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + vbic.i32 $D0#lo,#0xfc000000 + + vshrn.u64 $T0#lo,$D4,#26 + vmovn.i64 $D4#lo,$D4 + vshr.u64 $T1,$D1,#26 + vmovn.i64 $D1#lo,$D1 + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + vbic.i32 $D4#lo,#0xfc000000 + vbic.i32 $D1#lo,#0xfc000000 + + vadd.i32 $D0#lo,$D0#lo,$T0#lo + vshl.u32 $T0#lo,$T0#lo,#2 + vshrn.u64 $T1#lo,$D2,#26 + vmovn.i64 $D2#lo,$D2 + vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0 + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 + vbic.i32 $D2#lo,#0xfc000000 + + vshr.u32 $T0#lo,$D0#lo,#26 + vbic.i32 $D0#lo,#0xfc000000 + vshr.u32 $T1#lo,$D3#lo,#26 + vbic.i32 $D3#lo,#0xfc000000 + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 + + subs $zeros,$zeros,#1 + beq .Lsquare_break_neon + + add $tbl0,$ctx,#(48+0*9*4) + add $tbl1,$ctx,#(48+1*9*4) + + vtrn.32 $R0,$D0#lo @ r^2:r^1 + vtrn.32 $R2,$D2#lo + vtrn.32 $R3,$D3#lo + vtrn.32 $R1,$D1#lo + vtrn.32 $R4,$D4#lo + + vshl.u32 $S2,$R2,#2 @ *5 + vshl.u32 $S3,$R3,#2 + vshl.u32 $S1,$R1,#2 + vshl.u32 $S4,$R4,#2 + vadd.i32 $S2,$S2,$R2 + vadd.i32 $S1,$S1,$R1 + vadd.i32 $S3,$S3,$R3 + vadd.i32 $S4,$S4,$R4 + + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vst1.32 {${S4}[0]},[$tbl0,:32] + vst1.32 {${S4}[1]},[$tbl1,:32] + + b .Lsquare_neon + +.align 4 +.Lsquare_break_neon: + add $tbl0,$ctx,#(48+2*4*9) + add $tbl1,$ctx,#(48+3*4*9) + + vmov $R0,$D0#lo @ r^4:r^3 + vshl.u32 $S1,$D1#lo,#2 @ *5 + vmov $R1,$D1#lo + vshl.u32 $S2,$D2#lo,#2 + vmov $R2,$D2#lo + vshl.u32 $S3,$D3#lo,#2 + vmov $R3,$D3#lo + vshl.u32 $S4,$D4#lo,#2 + vmov $R4,$D4#lo + vadd.i32 $S1,$S1,$D1#lo + vadd.i32 $S2,$S2,$D2#lo + vadd.i32 $S3,$S3,$D3#lo + vadd.i32 $S4,$S4,$D4#lo + + vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! + vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! + vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vst1.32 {${S4}[0]},[$tbl0] + vst1.32 {${S4}[1]},[$tbl1] + +.Lno_init_neon: + ret @ bx lr +.size poly1305_init_neon,.-poly1305_init_neon + +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + ldr ip,[$ctx,#36] @ is_base2_26 + + cmp $len,#64 + blo .Lpoly1305_blocks + + stmdb sp!,{r4-r7} + vstmdb sp!,{d8-d15} @ ABI specification says so + + tst ip,ip @ is_base2_26? + bne .Lbase2_26_neon + + stmdb sp!,{r1-r3,lr} + bl .Lpoly1305_init_neon + + ldr r4,[$ctx,#0] @ load hash value base 2^32 + ldr r5,[$ctx,#4] + ldr r6,[$ctx,#8] + ldr r7,[$ctx,#12] + ldr ip,[$ctx,#16] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + veor $D0#lo,$D0#lo,$D0#lo + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + veor $D1#lo,$D1#lo,$D1#lo + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + veor $D2#lo,$D2#lo,$D2#lo + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + veor $D3#lo,$D3#lo,$D3#lo + and r3,r3,#0x03ffffff + orr r6,r6,ip,lsl#24 + veor $D4#lo,$D4#lo,$D4#lo + and r4,r4,#0x03ffffff + mov r1,#1 + and r5,r5,#0x03ffffff + str r1,[$ctx,#36] @ set is_base2_26 + + vmov.32 $D0#lo[0],r2 + vmov.32 $D1#lo[0],r3 + vmov.32 $D2#lo[0],r4 + vmov.32 $D3#lo[0],r5 + vmov.32 $D4#lo[0],r6 + adr $zeros,.Lzeros + + ldmia sp!,{r1-r3,lr} + b .Lhash_loaded + +.align 4 +.Lbase2_26_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ load hash value + + veor $D0#lo,$D0#lo,$D0#lo + veor $D1#lo,$D1#lo,$D1#lo + veor $D2#lo,$D2#lo,$D2#lo + veor $D3#lo,$D3#lo,$D3#lo + veor $D4#lo,$D4#lo,$D4#lo + vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! + adr $zeros,.Lzeros + vld1.32 {$D4#lo[0]},[$ctx] + sub $ctx,$ctx,#16 @ rewind + +.Lhash_loaded: + add $in2,$inp,#32 + mov $padbit,$padbit,lsl#24 + tst $len,#31 + beq .Leven + + vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]! + vmov.32 $H4#lo[0],$padbit + sub $len,$len,#16 + add $in2,$inp,#32 + +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H3,$H3 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 +# endif + vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26 + vshl.u32 $H3#lo,$H3#lo,#18 + + vsri.u32 $H3#lo,$H2#lo,#14 + vshl.u32 $H2#lo,$H2#lo,#12 + vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi + + vbic.i32 $H3#lo,#0xfc000000 + vsri.u32 $H2#lo,$H1#lo,#20 + vshl.u32 $H1#lo,$H1#lo,#6 + + vbic.i32 $H2#lo,#0xfc000000 + vsri.u32 $H1#lo,$H0#lo,#26 + vadd.i32 $H3#hi,$H3#lo,$D3#lo + + vbic.i32 $H0#lo,#0xfc000000 + vbic.i32 $H1#lo,#0xfc000000 + vadd.i32 $H2#hi,$H2#lo,$D2#lo + + vadd.i32 $H0#hi,$H0#lo,$D0#lo + vadd.i32 $H1#hi,$H1#lo,$D1#lo + + mov $tbl1,$zeros + add $tbl0,$ctx,#48 + + cmp $len,$len + b .Long_tail + +.align 4 +.Leven: + subs $len,$len,#64 + it lo + movlo $in2,$zeros + + vmov.i32 $H4,#1<<24 @ padbit, yes, always + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] + add $inp,$inp,#64 + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) + add $in2,$in2,#64 + itt hi + addhi $tbl1,$ctx,#(48+1*9*4) + addhi $tbl0,$ctx,#(48+3*9*4) + +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H3,$H3 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 +# endif + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 + vshl.u32 $H3,$H3,#18 + + vsri.u32 $H3,$H2,#14 + vshl.u32 $H2,$H2,#12 + + vbic.i32 $H3,#0xfc000000 + vsri.u32 $H2,$H1,#20 + vshl.u32 $H1,$H1,#6 + + vbic.i32 $H2,#0xfc000000 + vsri.u32 $H1,$H0,#26 + + vbic.i32 $H0,#0xfc000000 + vbic.i32 $H1,#0xfc000000 + + bls .Lskip_loop + + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + b .Loop_neon + +.align 5 +.Loop_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + @ \___________________/ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + @ \___________________/ \____________________/ + @ + @ Note that we start with inp[2:3]*r^2. This is because it + @ doesn't depend on reduction in previous iteration. + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ inp[2:3]*r^2 + + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1] + vmull.u32 $D2,$H2#hi,${R0}[1] + vadd.i32 $H0#lo,$H0#lo,$D0#lo + vmull.u32 $D0,$H0#hi,${R0}[1] + vadd.i32 $H3#lo,$H3#lo,$D3#lo + vmull.u32 $D3,$H3#hi,${R0}[1] + vmlal.u32 $D2,$H1#hi,${R1}[1] + vadd.i32 $H1#lo,$H1#lo,$D1#lo + vmull.u32 $D1,$H1#hi,${R0}[1] + + vadd.i32 $H4#lo,$H4#lo,$D4#lo + vmull.u32 $D4,$H4#hi,${R0}[1] + subs $len,$len,#64 + vmlal.u32 $D0,$H4#hi,${S1}[1] + it lo + movlo $in2,$zeros + vmlal.u32 $D3,$H2#hi,${R1}[1] + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D1,$H0#hi,${R1}[1] + vmlal.u32 $D4,$H3#hi,${R1}[1] + + vmlal.u32 $D0,$H3#hi,${S2}[1] + vmlal.u32 $D3,$H1#hi,${R2}[1] + vmlal.u32 $D4,$H2#hi,${R2}[1] + vmlal.u32 $D1,$H4#hi,${S2}[1] + vmlal.u32 $D2,$H0#hi,${R2}[1] + + vmlal.u32 $D3,$H0#hi,${R3}[1] + vmlal.u32 $D0,$H2#hi,${S3}[1] + vmlal.u32 $D4,$H1#hi,${R3}[1] + vmlal.u32 $D1,$H3#hi,${S3}[1] + vmlal.u32 $D2,$H4#hi,${S3}[1] + + vmlal.u32 $D3,$H4#hi,${S4}[1] + vmlal.u32 $D0,$H1#hi,${S4}[1] + vmlal.u32 $D4,$H0#hi,${R4}[1] + vmlal.u32 $D1,$H2#hi,${S4}[1] + vmlal.u32 $D2,$H3#hi,${S4}[1] + + vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0) + add $in2,$in2,#64 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4 and accumulate + + vmlal.u32 $D3,$H3#lo,${R0}[0] + vmlal.u32 $D0,$H0#lo,${R0}[0] + vmlal.u32 $D4,$H4#lo,${R0}[0] + vmlal.u32 $D1,$H1#lo,${R0}[0] + vmlal.u32 $D2,$H2#lo,${R0}[0] + vld1.32 ${S4}[0],[$tbl0,:32] + + vmlal.u32 $D3,$H2#lo,${R1}[0] + vmlal.u32 $D0,$H4#lo,${S1}[0] + vmlal.u32 $D4,$H3#lo,${R1}[0] + vmlal.u32 $D1,$H0#lo,${R1}[0] + vmlal.u32 $D2,$H1#lo,${R1}[0] + + vmlal.u32 $D3,$H1#lo,${R2}[0] + vmlal.u32 $D0,$H3#lo,${S2}[0] + vmlal.u32 $D4,$H2#lo,${R2}[0] + vmlal.u32 $D1,$H4#lo,${S2}[0] + vmlal.u32 $D2,$H0#lo,${R2}[0] + + vmlal.u32 $D3,$H0#lo,${R3}[0] + vmlal.u32 $D0,$H2#lo,${S3}[0] + vmlal.u32 $D4,$H1#lo,${R3}[0] + vmlal.u32 $D1,$H3#lo,${S3}[0] + vmlal.u32 $D3,$H4#lo,${S4}[0] + + vmlal.u32 $D2,$H4#lo,${S3}[0] + vmlal.u32 $D0,$H1#lo,${S4}[0] + vmlal.u32 $D4,$H0#lo,${R4}[0] + vmov.i32 $H4,#1<<24 @ padbit, yes, always + vmlal.u32 $D1,$H2#lo,${S4}[0] + vmlal.u32 $D2,$H3#lo,${S4}[0] + + vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1] + add $inp,$inp,#64 +# ifdef __ARMEB__ + vrev32.8 $H0,$H0 + vrev32.8 $H1,$H1 + vrev32.8 $H2,$H2 + vrev32.8 $H3,$H3 +# endif + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction interleaved with base 2^32 -> base 2^26 of + @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4. + + vshr.u64 $T0,$D3,#26 + vmovn.i64 $D3#lo,$D3 + vshr.u64 $T1,$D0,#26 + vmovn.i64 $D0#lo,$D0 + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vbic.i32 $D3#lo,#0xfc000000 + vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26 + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + vshl.u32 $H3,$H3,#18 + vbic.i32 $D0#lo,#0xfc000000 + + vshrn.u64 $T0#lo,$D4,#26 + vmovn.i64 $D4#lo,$D4 + vshr.u64 $T1,$D1,#26 + vmovn.i64 $D1#lo,$D1 + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + vsri.u32 $H3,$H2,#14 + vbic.i32 $D4#lo,#0xfc000000 + vshl.u32 $H2,$H2,#12 + vbic.i32 $D1#lo,#0xfc000000 + + vadd.i32 $D0#lo,$D0#lo,$T0#lo + vshl.u32 $T0#lo,$T0#lo,#2 + vbic.i32 $H3,#0xfc000000 + vshrn.u64 $T1#lo,$D2,#26 + vmovn.i64 $D2#lo,$D2 + vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec] + vsri.u32 $H2,$H1,#20 + vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3 + vshl.u32 $H1,$H1,#6 + vbic.i32 $D2#lo,#0xfc000000 + vbic.i32 $H2,#0xfc000000 + + vshrn.u64 $T0#lo,$D0,#26 @ re-narrow + vmovn.i64 $D0#lo,$D0 + vsri.u32 $H1,$H0,#26 + vbic.i32 $H0,#0xfc000000 + vshr.u32 $T1#lo,$D3#lo,#26 + vbic.i32 $D3#lo,#0xfc000000 + vbic.i32 $D0#lo,#0xfc000000 + vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1 + vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4 + vbic.i32 $H1,#0xfc000000 + + bhi .Loop_neon + +.Lskip_loop: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + add $tbl1,$ctx,#(48+0*9*4) + add $tbl0,$ctx,#(48+1*9*4) + adds $len,$len,#32 + it ne + movne $len,#0 + bne .Long_tail + + vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi + vadd.i32 $H0#hi,$H0#lo,$D0#lo + vadd.i32 $H3#hi,$H3#lo,$D3#lo + vadd.i32 $H1#hi,$H1#lo,$D1#lo + vadd.i32 $H4#hi,$H4#lo,$D4#lo + +.Long_tail: + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2 + + vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant + vmull.u32 $D2,$H2#hi,$R0 + vadd.i32 $H0#lo,$H0#lo,$D0#lo + vmull.u32 $D0,$H0#hi,$R0 + vadd.i32 $H3#lo,$H3#lo,$D3#lo + vmull.u32 $D3,$H3#hi,$R0 + vadd.i32 $H1#lo,$H1#lo,$D1#lo + vmull.u32 $D1,$H1#hi,$R0 + vadd.i32 $H4#lo,$H4#lo,$D4#lo + vmull.u32 $D4,$H4#hi,$R0 + + vmlal.u32 $D0,$H4#hi,$S1 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vmlal.u32 $D3,$H2#hi,$R1 + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vmlal.u32 $D1,$H0#hi,$R1 + vmlal.u32 $D4,$H3#hi,$R1 + vmlal.u32 $D2,$H1#hi,$R1 + + vmlal.u32 $D3,$H1#hi,$R2 + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D0,$H3#hi,$S2 + vld1.32 ${S4}[0],[$tbl0,:32] + vmlal.u32 $D4,$H2#hi,$R2 + vmlal.u32 $D1,$H4#hi,$S2 + vmlal.u32 $D2,$H0#hi,$R2 + + vmlal.u32 $D3,$H0#hi,$R3 + it ne + addne $tbl1,$ctx,#(48+2*9*4) + vmlal.u32 $D0,$H2#hi,$S3 + it ne + addne $tbl0,$ctx,#(48+3*9*4) + vmlal.u32 $D4,$H1#hi,$R3 + vmlal.u32 $D1,$H3#hi,$S3 + vmlal.u32 $D2,$H4#hi,$S3 + + vmlal.u32 $D3,$H4#hi,$S4 + vorn $MASK,$MASK,$MASK @ all-ones, can be redundant + vmlal.u32 $D0,$H1#hi,$S4 + vshr.u64 $MASK,$MASK,#38 + vmlal.u32 $D4,$H0#hi,$R4 + vmlal.u32 $D1,$H2#hi,$S4 + vmlal.u32 $D2,$H3#hi,$S4 + + beq .Lshort_tail + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4:r^3 and accumulate + + vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3 + vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4 + + vmlal.u32 $D2,$H2#lo,$R0 + vmlal.u32 $D0,$H0#lo,$R0 + vmlal.u32 $D3,$H3#lo,$R0 + vmlal.u32 $D1,$H1#lo,$R0 + vmlal.u32 $D4,$H4#lo,$R0 + + vmlal.u32 $D0,$H4#lo,$S1 + vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]! + vmlal.u32 $D3,$H2#lo,$R1 + vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]! + vmlal.u32 $D1,$H0#lo,$R1 + vmlal.u32 $D4,$H3#lo,$R1 + vmlal.u32 $D2,$H1#lo,$R1 + + vmlal.u32 $D3,$H1#lo,$R2 + vld1.32 ${S4}[1],[$tbl1,:32] + vmlal.u32 $D0,$H3#lo,$S2 + vld1.32 ${S4}[0],[$tbl0,:32] + vmlal.u32 $D4,$H2#lo,$R2 + vmlal.u32 $D1,$H4#lo,$S2 + vmlal.u32 $D2,$H0#lo,$R2 + + vmlal.u32 $D3,$H0#lo,$R3 + vmlal.u32 $D0,$H2#lo,$S3 + vmlal.u32 $D4,$H1#lo,$R3 + vmlal.u32 $D1,$H3#lo,$S3 + vmlal.u32 $D2,$H4#lo,$S3 + + vmlal.u32 $D3,$H4#lo,$S4 + vorn $MASK,$MASK,$MASK @ all-ones + vmlal.u32 $D0,$H1#lo,$S4 + vshr.u64 $MASK,$MASK,#38 + vmlal.u32 $D4,$H0#lo,$R4 + vmlal.u32 $D1,$H2#lo,$S4 + vmlal.u32 $D2,$H3#lo,$S4 + +.Lshort_tail: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ horizontal addition + + vadd.i64 $D3#lo,$D3#lo,$D3#hi + vadd.i64 $D0#lo,$D0#lo,$D0#hi + vadd.i64 $D4#lo,$D4#lo,$D4#hi + vadd.i64 $D1#lo,$D1#lo,$D1#hi + vadd.i64 $D2#lo,$D2#lo,$D2#hi + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction, but without narrowing + + vshr.u64 $T0,$D3,#26 + vand.i64 $D3,$D3,$MASK + vshr.u64 $T1,$D0,#26 + vand.i64 $D0,$D0,$MASK + vadd.i64 $D4,$D4,$T0 @ h3 -> h4 + vadd.i64 $D1,$D1,$T1 @ h0 -> h1 + + vshr.u64 $T0,$D4,#26 + vand.i64 $D4,$D4,$MASK + vshr.u64 $T1,$D1,#26 + vand.i64 $D1,$D1,$MASK + vadd.i64 $D2,$D2,$T1 @ h1 -> h2 + + vadd.i64 $D0,$D0,$T0 + vshl.u64 $T0,$T0,#2 + vshr.u64 $T1,$D2,#26 + vand.i64 $D2,$D2,$MASK + vadd.i64 $D0,$D0,$T0 @ h4 -> h0 + vadd.i64 $D3,$D3,$T1 @ h2 -> h3 + + vshr.u64 $T0,$D0,#26 + vand.i64 $D0,$D0,$MASK + vshr.u64 $T1,$D3,#26 + vand.i64 $D3,$D3,$MASK + vadd.i64 $D1,$D1,$T0 @ h0 -> h1 + vadd.i64 $D4,$D4,$T1 @ h3 -> h4 + + cmp $len,#0 + bne .Leven + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ store hash value + + vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]! + vst1.32 {$D4#lo[0]},[$ctx] + + vldmia sp!,{d8-d15} @ epilogue + ldmia sp!,{r4-r7} + ret @ bx lr +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +#ifndef __KERNEL__ +.LOPENSSL_armcap: +# ifdef _WIN32 +.word OPENSSL_armcap_P +# else +.word OPENSSL_armcap_P-.Lpoly1305_init +# endif +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif +___ +} } +$code.=<<___; +.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm" +.align 2 +___ + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped new file mode 100644 index 000000000000..37b71d990293 --- /dev/null +++ b/arch/arm/crypto/poly1305-core.S_shipped @@ -0,0 +1,1158 @@ +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +# define poly1305_init poly1305_init_arm +# define poly1305_blocks poly1305_blocks_arm +# define poly1305_emit poly1305_emit_arm +.globl poly1305_blocks_neon +#endif + +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.text + +.globl poly1305_emit +.globl poly1305_blocks +.globl poly1305_init +.type poly1305_init,%function +.align 5 +poly1305_init: +.Lpoly1305_init: + stmdb sp!,{r4-r11} + + eor r3,r3,r3 + cmp r1,#0 + str r3,[r0,#0] @ zero hash value + str r3,[r0,#4] + str r3,[r0,#8] + str r3,[r0,#12] + str r3,[r0,#16] + str r3,[r0,#36] @ clear is_base2_26 + add r0,r0,#20 + +#ifdef __thumb2__ + it eq +#endif + moveq r0,#0 + beq .Lno_key + +#if __ARM_MAX_ARCH__>=7 + mov r3,#-1 + str r3,[r0,#28] @ impossible key power value +# ifndef __KERNEL__ + adr r11,.Lpoly1305_init + ldr r12,.LOPENSSL_armcap +# endif +#endif + ldrb r4,[r1,#0] + mov r10,#0x0fffffff + ldrb r5,[r1,#1] + and r3,r10,#-4 @ 0x0ffffffc + ldrb r6,[r1,#2] + ldrb r7,[r1,#3] + orr r4,r4,r5,lsl#8 + ldrb r5,[r1,#4] + orr r4,r4,r6,lsl#16 + ldrb r6,[r1,#5] + orr r4,r4,r7,lsl#24 + ldrb r7,[r1,#6] + and r4,r4,r10 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +# if !defined(_WIN32) + ldr r12,[r11,r12] @ OPENSSL_armcap_P +# endif +# if defined(__APPLE__) || defined(_WIN32) + ldr r12,[r12] +# endif +#endif + ldrb r8,[r1,#7] + orr r5,r5,r6,lsl#8 + ldrb r6,[r1,#8] + orr r5,r5,r7,lsl#16 + ldrb r7,[r1,#9] + orr r5,r5,r8,lsl#24 + ldrb r8,[r1,#10] + and r5,r5,r3 + +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + tst r12,#ARMV7_NEON @ check for NEON +# ifdef __thumb2__ + adr r9,.Lpoly1305_blocks_neon + adr r11,.Lpoly1305_blocks + it ne + movne r11,r9 + adr r12,.Lpoly1305_emit + orr r11,r11,#1 @ thumb-ify addresses + orr r12,r12,#1 +# else + add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init) + ite eq + addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init) + addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init) +# endif +#endif + ldrb r9,[r1,#11] + orr r6,r6,r7,lsl#8 + ldrb r7,[r1,#12] + orr r6,r6,r8,lsl#16 + ldrb r8,[r1,#13] + orr r6,r6,r9,lsl#24 + ldrb r9,[r1,#14] + and r6,r6,r3 + + ldrb r10,[r1,#15] + orr r7,r7,r8,lsl#8 + str r4,[r0,#0] + orr r7,r7,r9,lsl#16 + str r5,[r0,#4] + orr r7,r7,r10,lsl#24 + str r6,[r0,#8] + and r7,r7,r3 + str r7,[r0,#12] +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + stmia r2,{r11,r12} @ fill functions table + mov r0,#1 +#else + mov r0,#0 +#endif +.Lno_key: + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + bx lr @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size poly1305_init,.-poly1305_init +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + stmdb sp!,{r3-r11,lr} + + ands r2,r2,#-16 + beq .Lno_data + + add r2,r2,r1 @ end pointer + sub sp,sp,#32 + +#if __ARM_ARCH__<7 + ldmia r0,{r4-r12} @ load context + add r0,r0,#20 + str r2,[sp,#16] @ offload stuff + str r0,[sp,#12] +#else + ldr lr,[r0,#36] @ is_base2_26 + ldmia r0!,{r4-r8} @ load hash value + str r2,[sp,#16] @ offload stuff + str r0,[sp,#12] + + adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32 + mov r10,r5,lsr#6 + adcs r10,r10,r6,lsl#20 + mov r11,r6,lsr#12 + adcs r11,r11,r7,lsl#14 + mov r12,r7,lsr#18 + adcs r12,r12,r8,lsl#8 + mov r2,#0 + teq lr,#0 + str r2,[r0,#16] @ clear is_base2_26 + adc r2,r2,r8,lsr#24 + + itttt ne + movne r4,r9 @ choose between radixes + movne r5,r10 + movne r6,r11 + movne r7,r12 + ldmia r0,{r9-r12} @ load key + it ne + movne r8,r2 +#endif + + mov lr,r1 + cmp r3,#0 + str r10,[sp,#20] + str r11,[sp,#24] + str r12,[sp,#28] + b .Loop + +.align 4 +.Loop: +#if __ARM_ARCH__<7 + ldrb r0,[lr],#16 @ load input +# ifdef __thumb2__ + it hi +# endif + addhi r8,r8,#1 @ 1<<128 + ldrb r1,[lr,#-15] + ldrb r2,[lr,#-14] + ldrb r3,[lr,#-13] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-12] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-11] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-10] + adds r4,r4,r3 @ accumulate input + + ldrb r3,[lr,#-9] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-8] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-7] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-6] + adcs r5,r5,r3 + + ldrb r3,[lr,#-5] + orr r1,r0,r1,lsl#8 + ldrb r0,[lr,#-4] + orr r2,r1,r2,lsl#16 + ldrb r1,[lr,#-3] + orr r3,r2,r3,lsl#24 + ldrb r2,[lr,#-2] + adcs r6,r6,r3 + + ldrb r3,[lr,#-1] + orr r1,r0,r1,lsl#8 + str lr,[sp,#8] @ offload input pointer + orr r2,r1,r2,lsl#16 + add r10,r10,r10,lsr#2 + orr r3,r2,r3,lsl#24 +#else + ldr r0,[lr],#16 @ load input + it hi + addhi r8,r8,#1 @ padbit + ldr r1,[lr,#-12] + ldr r2,[lr,#-8] + ldr r3,[lr,#-4] +# ifdef __ARMEB__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +# endif + adds r4,r4,r0 @ accumulate input + str lr,[sp,#8] @ offload input pointer + adcs r5,r5,r1 + add r10,r10,r10,lsr#2 + adcs r6,r6,r2 +#endif + add r11,r11,r11,lsr#2 + adcs r7,r7,r3 + add r12,r12,r12,lsr#2 + + umull r2,r3,r5,r9 + adc r8,r8,#0 + umull r0,r1,r4,r9 + umlal r2,r3,r8,r10 + umlal r0,r1,r7,r10 + ldr r10,[sp,#20] @ reload r10 + umlal r2,r3,r6,r12 + umlal r0,r1,r5,r12 + umlal r2,r3,r7,r11 + umlal r0,r1,r6,r11 + umlal r2,r3,r4,r10 + str r0,[sp,#0] @ future r4 + mul r0,r11,r8 + ldr r11,[sp,#24] @ reload r11 + adds r2,r2,r1 @ d1+=d0>>32 + eor r1,r1,r1 + adc lr,r3,#0 @ future r6 + str r2,[sp,#4] @ future r5 + + mul r2,r12,r8 + eor r3,r3,r3 + umlal r0,r1,r7,r12 + ldr r12,[sp,#28] @ reload r12 + umlal r2,r3,r7,r9 + umlal r0,r1,r6,r9 + umlal r2,r3,r6,r10 + umlal r0,r1,r5,r10 + umlal r2,r3,r5,r11 + umlal r0,r1,r4,r11 + umlal r2,r3,r4,r12 + ldr r4,[sp,#0] + mul r8,r9,r8 + ldr r5,[sp,#4] + + adds r6,lr,r0 @ d2+=d1>>32 + ldr lr,[sp,#8] @ reload input pointer + adc r1,r1,#0 + adds r7,r2,r1 @ d3+=d2>>32 + ldr r0,[sp,#16] @ reload end pointer + adc r3,r3,#0 + add r8,r8,r3 @ h4+=d3>>32 + + and r1,r8,#-4 + and r8,r8,#3 + add r1,r1,r1,lsr#2 @ *=5 + adds r4,r4,r1 + adcs r5,r5,#0 + adcs r6,r6,#0 + adcs r7,r7,#0 + adc r8,r8,#0 + + cmp r0,lr @ done yet? + bhi .Loop + + ldr r0,[sp,#12] + add sp,sp,#32 + stmdb r0,{r4-r8} @ store the result + +.Lno_data: +#if __ARM_ARCH__>=5 + ldmia sp!,{r3-r11,pc} +#else + ldmia sp!,{r3-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size poly1305_blocks,.-poly1305_blocks +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + stmdb sp!,{r4-r11} + + ldmia r0,{r3-r7} + +#if __ARM_ARCH__>=7 + ldr ip,[r0,#36] @ is_base2_26 + + adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32 + mov r9,r4,lsr#6 + adcs r9,r9,r5,lsl#20 + mov r10,r5,lsr#12 + adcs r10,r10,r6,lsl#14 + mov r11,r6,lsr#18 + adcs r11,r11,r7,lsl#8 + mov r0,#0 + adc r0,r0,r7,lsr#24 + + tst ip,ip + itttt ne + movne r3,r8 + movne r4,r9 + movne r5,r10 + movne r6,r11 + it ne + movne r7,r0 +#endif + + adds r8,r3,#5 @ compare to modulus + adcs r9,r4,#0 + adcs r10,r5,#0 + adcs r11,r6,#0 + adc r0,r7,#0 + tst r0,#4 @ did it carry/borrow? + +#ifdef __thumb2__ + it ne +#endif + movne r3,r8 + ldr r8,[r2,#0] +#ifdef __thumb2__ + it ne +#endif + movne r4,r9 + ldr r9,[r2,#4] +#ifdef __thumb2__ + it ne +#endif + movne r5,r10 + ldr r10,[r2,#8] +#ifdef __thumb2__ + it ne +#endif + movne r6,r11 + ldr r11,[r2,#12] + + adds r3,r3,r8 + adcs r4,r4,r9 + adcs r5,r5,r10 + adc r6,r6,r11 + +#if __ARM_ARCH__>=7 +# ifdef __ARMEB__ + rev r3,r3 + rev r4,r4 + rev r5,r5 + rev r6,r6 +# endif + str r3,[r1,#0] + str r4,[r1,#4] + str r5,[r1,#8] + str r6,[r1,#12] +#else + strb r3,[r1,#0] + mov r3,r3,lsr#8 + strb r4,[r1,#4] + mov r4,r4,lsr#8 + strb r5,[r1,#8] + mov r5,r5,lsr#8 + strb r6,[r1,#12] + mov r6,r6,lsr#8 + + strb r3,[r1,#1] + mov r3,r3,lsr#8 + strb r4,[r1,#5] + mov r4,r4,lsr#8 + strb r5,[r1,#9] + mov r5,r5,lsr#8 + strb r6,[r1,#13] + mov r6,r6,lsr#8 + + strb r3,[r1,#2] + mov r3,r3,lsr#8 + strb r4,[r1,#6] + mov r4,r4,lsr#8 + strb r5,[r1,#10] + mov r5,r5,lsr#8 + strb r6,[r1,#14] + mov r6,r6,lsr#8 + + strb r3,[r1,#3] + strb r4,[r1,#7] + strb r5,[r1,#11] + strb r6,[r1,#15] +#endif + ldmia sp!,{r4-r11} +#if __ARM_ARCH__>=5 + bx lr @ bx lr +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size poly1305_emit,.-poly1305_emit +#if __ARM_MAX_ARCH__>=7 +.fpu neon + +.type poly1305_init_neon,%function +.align 5 +poly1305_init_neon: +.Lpoly1305_init_neon: + ldr r3,[r0,#48] @ first table element + cmp r3,#-1 @ is value impossible? + bne .Lno_init_neon + + ldr r4,[r0,#20] @ load key base 2^32 + ldr r5,[r0,#24] + ldr r6,[r0,#28] + ldr r7,[r0,#32] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + and r3,r3,#0x03ffffff + and r4,r4,#0x03ffffff + and r5,r5,#0x03ffffff + + vdup.32 d0,r2 @ r^1 in both lanes + add r2,r3,r3,lsl#2 @ *5 + vdup.32 d1,r3 + add r3,r4,r4,lsl#2 + vdup.32 d2,r2 + vdup.32 d3,r4 + add r4,r5,r5,lsl#2 + vdup.32 d4,r3 + vdup.32 d5,r5 + add r5,r6,r6,lsl#2 + vdup.32 d6,r4 + vdup.32 d7,r6 + vdup.32 d8,r5 + + mov r5,#2 @ counter + +.Lsquare_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + + vmull.u32 q5,d0,d0[1] + vmull.u32 q6,d1,d0[1] + vmull.u32 q7,d3,d0[1] + vmull.u32 q8,d5,d0[1] + vmull.u32 q9,d7,d0[1] + + vmlal.u32 q5,d7,d2[1] + vmlal.u32 q6,d0,d1[1] + vmlal.u32 q7,d1,d1[1] + vmlal.u32 q8,d3,d1[1] + vmlal.u32 q9,d5,d1[1] + + vmlal.u32 q5,d5,d4[1] + vmlal.u32 q6,d7,d4[1] + vmlal.u32 q8,d1,d3[1] + vmlal.u32 q7,d0,d3[1] + vmlal.u32 q9,d3,d3[1] + + vmlal.u32 q5,d3,d6[1] + vmlal.u32 q8,d0,d5[1] + vmlal.u32 q6,d5,d6[1] + vmlal.u32 q7,d7,d6[1] + vmlal.u32 q9,d1,d5[1] + + vmlal.u32 q8,d7,d8[1] + vmlal.u32 q5,d1,d8[1] + vmlal.u32 q6,d3,d8[1] + vmlal.u32 q7,d5,d8[1] + vmlal.u32 q9,d0,d7[1] + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + @ and P. Schwabe + @ + @ H0>>+H1>>+H2>>+H3>>+H4 + @ H3>>+H4>>*5+H0>>+H1 + @ + @ Trivia. + @ + @ Result of multiplication of n-bit number by m-bit number is + @ n+m bits wide. However! Even though 2^n is a n+1-bit number, + @ m-bit number multiplied by 2^n is still n+m bits wide. + @ + @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2, + @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit + @ one is n+1 bits wide. + @ + @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that + @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4 + @ can be 27. However! In cases when their width exceeds 26 bits + @ they are limited by 2^26+2^6. This in turn means that *sum* + @ of the products with these values can still be viewed as sum + @ of 52-bit numbers as long as the amount of addends is not a + @ power of 2. For example, + @ + @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4, + @ + @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or + @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than + @ 8 * (2^52) or 2^55. However, the value is then multiplied by + @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12), + @ which is less than 32 * (2^52) or 2^57. And when processing + @ data we are looking at triple as many addends... + @ + @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and + @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the + @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while + @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32 + @ instruction accepts 2x32-bit input and writes 2x64-bit result. + @ This means that result of reduction have to be compressed upon + @ loop wrap-around. This can be done in the process of reduction + @ to minimize amount of instructions [as well as amount of + @ 128-bit instructions, which benefits low-end processors], but + @ one has to watch for H2 (which is narrower than H0) and 5*H4 + @ not being wider than 58 bits, so that result of right shift + @ by 26 bits fits in 32 bits. This is also useful on x86, + @ because it allows to use paddd in place for paddq, which + @ benefits Atom, where paddq is ridiculously slow. + + vshr.u64 q15,q8,#26 + vmovn.i64 d16,q8 + vshr.u64 q4,q5,#26 + vmovn.i64 d10,q5 + vadd.i64 q9,q9,q15 @ h3 -> h4 + vbic.i32 d16,#0xfc000000 @ &=0x03ffffff + vadd.i64 q6,q6,q4 @ h0 -> h1 + vbic.i32 d10,#0xfc000000 + + vshrn.u64 d30,q9,#26 + vmovn.i64 d18,q9 + vshr.u64 q4,q6,#26 + vmovn.i64 d12,q6 + vadd.i64 q7,q7,q4 @ h1 -> h2 + vbic.i32 d18,#0xfc000000 + vbic.i32 d12,#0xfc000000 + + vadd.i32 d10,d10,d30 + vshl.u32 d30,d30,#2 + vshrn.u64 d8,q7,#26 + vmovn.i64 d14,q7 + vadd.i32 d10,d10,d30 @ h4 -> h0 + vadd.i32 d16,d16,d8 @ h2 -> h3 + vbic.i32 d14,#0xfc000000 + + vshr.u32 d30,d10,#26 + vbic.i32 d10,#0xfc000000 + vshr.u32 d8,d16,#26 + vbic.i32 d16,#0xfc000000 + vadd.i32 d12,d12,d30 @ h0 -> h1 + vadd.i32 d18,d18,d8 @ h3 -> h4 + + subs r5,r5,#1 + beq .Lsquare_break_neon + + add r6,r0,#(48+0*9*4) + add r7,r0,#(48+1*9*4) + + vtrn.32 d0,d10 @ r^2:r^1 + vtrn.32 d3,d14 + vtrn.32 d5,d16 + vtrn.32 d1,d12 + vtrn.32 d7,d18 + + vshl.u32 d4,d3,#2 @ *5 + vshl.u32 d6,d5,#2 + vshl.u32 d2,d1,#2 + vshl.u32 d8,d7,#2 + vadd.i32 d4,d4,d3 + vadd.i32 d2,d2,d1 + vadd.i32 d6,d6,d5 + vadd.i32 d8,d8,d7 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! + vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! + vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vst1.32 {d8[0]},[r6,:32] + vst1.32 {d8[1]},[r7,:32] + + b .Lsquare_neon + +.align 4 +.Lsquare_break_neon: + add r6,r0,#(48+2*4*9) + add r7,r0,#(48+3*4*9) + + vmov d0,d10 @ r^4:r^3 + vshl.u32 d2,d12,#2 @ *5 + vmov d1,d12 + vshl.u32 d4,d14,#2 + vmov d3,d14 + vshl.u32 d6,d16,#2 + vmov d5,d16 + vshl.u32 d8,d18,#2 + vmov d7,d18 + vadd.i32 d2,d2,d12 + vadd.i32 d4,d4,d14 + vadd.i32 d6,d6,d16 + vadd.i32 d8,d8,d18 + + vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! + vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! + vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vst1.32 {d8[0]},[r6] + vst1.32 {d8[1]},[r7] + +.Lno_init_neon: + bx lr @ bx lr +.size poly1305_init_neon,.-poly1305_init_neon + +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + ldr ip,[r0,#36] @ is_base2_26 + + cmp r2,#64 + blo .Lpoly1305_blocks + + stmdb sp!,{r4-r7} + vstmdb sp!,{d8-d15} @ ABI specification says so + + tst ip,ip @ is_base2_26? + bne .Lbase2_26_neon + + stmdb sp!,{r1-r3,lr} + bl .Lpoly1305_init_neon + + ldr r4,[r0,#0] @ load hash value base 2^32 + ldr r5,[r0,#4] + ldr r6,[r0,#8] + ldr r7,[r0,#12] + ldr ip,[r0,#16] + + and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26 + mov r3,r4,lsr#26 + veor d10,d10,d10 + mov r4,r5,lsr#20 + orr r3,r3,r5,lsl#6 + veor d12,d12,d12 + mov r5,r6,lsr#14 + orr r4,r4,r6,lsl#12 + veor d14,d14,d14 + mov r6,r7,lsr#8 + orr r5,r5,r7,lsl#18 + veor d16,d16,d16 + and r3,r3,#0x03ffffff + orr r6,r6,ip,lsl#24 + veor d18,d18,d18 + and r4,r4,#0x03ffffff + mov r1,#1 + and r5,r5,#0x03ffffff + str r1,[r0,#36] @ set is_base2_26 + + vmov.32 d10[0],r2 + vmov.32 d12[0],r3 + vmov.32 d14[0],r4 + vmov.32 d16[0],r5 + vmov.32 d18[0],r6 + adr r5,.Lzeros + + ldmia sp!,{r1-r3,lr} + b .Lhash_loaded + +.align 4 +.Lbase2_26_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ load hash value + + veor d10,d10,d10 + veor d12,d12,d12 + veor d14,d14,d14 + veor d16,d16,d16 + veor d18,d18,d18 + vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! + adr r5,.Lzeros + vld1.32 {d18[0]},[r0] + sub r0,r0,#16 @ rewind + +.Lhash_loaded: + add r4,r1,#32 + mov r3,r3,lsl#24 + tst r2,#31 + beq .Leven + + vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]! + vmov.32 d28[0],r3 + sub r2,r2,#16 + add r4,r1,#32 + +# ifdef __ARMEB__ + vrev32.8 q10,q10 + vrev32.8 q13,q13 + vrev32.8 q11,q11 + vrev32.8 q12,q12 +# endif + vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26 + vshl.u32 d26,d26,#18 + + vsri.u32 d26,d24,#14 + vshl.u32 d24,d24,#12 + vadd.i32 d29,d28,d18 @ add hash value and move to #hi + + vbic.i32 d26,#0xfc000000 + vsri.u32 d24,d22,#20 + vshl.u32 d22,d22,#6 + + vbic.i32 d24,#0xfc000000 + vsri.u32 d22,d20,#26 + vadd.i32 d27,d26,d16 + + vbic.i32 d20,#0xfc000000 + vbic.i32 d22,#0xfc000000 + vadd.i32 d25,d24,d14 + + vadd.i32 d21,d20,d10 + vadd.i32 d23,d22,d12 + + mov r7,r5 + add r6,r0,#48 + + cmp r2,r2 + b .Long_tail + +.align 4 +.Leven: + subs r2,r2,#64 + it lo + movlo r4,r5 + + vmov.i32 q14,#1<<24 @ padbit, yes, always + vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] + add r1,r1,#64 + vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) + add r4,r4,#64 + itt hi + addhi r7,r0,#(48+1*9*4) + addhi r6,r0,#(48+3*9*4) + +# ifdef __ARMEB__ + vrev32.8 q10,q10 + vrev32.8 q13,q13 + vrev32.8 q11,q11 + vrev32.8 q12,q12 +# endif + vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 + vshl.u32 q13,q13,#18 + + vsri.u32 q13,q12,#14 + vshl.u32 q12,q12,#12 + + vbic.i32 q13,#0xfc000000 + vsri.u32 q12,q11,#20 + vshl.u32 q11,q11,#6 + + vbic.i32 q12,#0xfc000000 + vsri.u32 q11,q10,#26 + + vbic.i32 q10,#0xfc000000 + vbic.i32 q11,#0xfc000000 + + bls .Lskip_loop + + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2 + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + b .Loop_neon + +.align 5 +.Loop_neon: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + @ ___________________/ + @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + @ ___________________/ ____________________/ + @ + @ Note that we start with inp[2:3]*r^2. This is because it + @ doesn't depend on reduction in previous iteration. + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4 + @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4 + @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4 + @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4 + @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ inp[2:3]*r^2 + + vadd.i32 d24,d24,d14 @ accumulate inp[0:1] + vmull.u32 q7,d25,d0[1] + vadd.i32 d20,d20,d10 + vmull.u32 q5,d21,d0[1] + vadd.i32 d26,d26,d16 + vmull.u32 q8,d27,d0[1] + vmlal.u32 q7,d23,d1[1] + vadd.i32 d22,d22,d12 + vmull.u32 q6,d23,d0[1] + + vadd.i32 d28,d28,d18 + vmull.u32 q9,d29,d0[1] + subs r2,r2,#64 + vmlal.u32 q5,d29,d2[1] + it lo + movlo r4,r5 + vmlal.u32 q8,d25,d1[1] + vld1.32 d8[1],[r7,:32] + vmlal.u32 q6,d21,d1[1] + vmlal.u32 q9,d27,d1[1] + + vmlal.u32 q5,d27,d4[1] + vmlal.u32 q8,d23,d3[1] + vmlal.u32 q9,d25,d3[1] + vmlal.u32 q6,d29,d4[1] + vmlal.u32 q7,d21,d3[1] + + vmlal.u32 q8,d21,d5[1] + vmlal.u32 q5,d25,d6[1] + vmlal.u32 q9,d23,d5[1] + vmlal.u32 q6,d27,d6[1] + vmlal.u32 q7,d29,d6[1] + + vmlal.u32 q8,d29,d8[1] + vmlal.u32 q5,d23,d8[1] + vmlal.u32 q9,d21,d7[1] + vmlal.u32 q6,d25,d8[1] + vmlal.u32 q7,d27,d8[1] + + vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0) + add r4,r4,#64 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4 and accumulate + + vmlal.u32 q8,d26,d0[0] + vmlal.u32 q5,d20,d0[0] + vmlal.u32 q9,d28,d0[0] + vmlal.u32 q6,d22,d0[0] + vmlal.u32 q7,d24,d0[0] + vld1.32 d8[0],[r6,:32] + + vmlal.u32 q8,d24,d1[0] + vmlal.u32 q5,d28,d2[0] + vmlal.u32 q9,d26,d1[0] + vmlal.u32 q6,d20,d1[0] + vmlal.u32 q7,d22,d1[0] + + vmlal.u32 q8,d22,d3[0] + vmlal.u32 q5,d26,d4[0] + vmlal.u32 q9,d24,d3[0] + vmlal.u32 q6,d28,d4[0] + vmlal.u32 q7,d20,d3[0] + + vmlal.u32 q8,d20,d5[0] + vmlal.u32 q5,d24,d6[0] + vmlal.u32 q9,d22,d5[0] + vmlal.u32 q6,d26,d6[0] + vmlal.u32 q8,d28,d8[0] + + vmlal.u32 q7,d28,d6[0] + vmlal.u32 q5,d22,d8[0] + vmlal.u32 q9,d20,d7[0] + vmov.i32 q14,#1<<24 @ padbit, yes, always + vmlal.u32 q6,d24,d8[0] + vmlal.u32 q7,d26,d8[0] + + vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1] + add r1,r1,#64 +# ifdef __ARMEB__ + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vrev32.8 q12,q12 + vrev32.8 q13,q13 +# endif + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction interleaved with base 2^32 -> base 2^26 of + @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14. + + vshr.u64 q15,q8,#26 + vmovn.i64 d16,q8 + vshr.u64 q4,q5,#26 + vmovn.i64 d10,q5 + vadd.i64 q9,q9,q15 @ h3 -> h4 + vbic.i32 d16,#0xfc000000 + vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26 + vadd.i64 q6,q6,q4 @ h0 -> h1 + vshl.u32 q13,q13,#18 + vbic.i32 d10,#0xfc000000 + + vshrn.u64 d30,q9,#26 + vmovn.i64 d18,q9 + vshr.u64 q4,q6,#26 + vmovn.i64 d12,q6 + vadd.i64 q7,q7,q4 @ h1 -> h2 + vsri.u32 q13,q12,#14 + vbic.i32 d18,#0xfc000000 + vshl.u32 q12,q12,#12 + vbic.i32 d12,#0xfc000000 + + vadd.i32 d10,d10,d30 + vshl.u32 d30,d30,#2 + vbic.i32 q13,#0xfc000000 + vshrn.u64 d8,q7,#26 + vmovn.i64 d14,q7 + vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec] + vsri.u32 q12,q11,#20 + vadd.i32 d16,d16,d8 @ h2 -> h3 + vshl.u32 q11,q11,#6 + vbic.i32 d14,#0xfc000000 + vbic.i32 q12,#0xfc000000 + + vshrn.u64 d30,q5,#26 @ re-narrow + vmovn.i64 d10,q5 + vsri.u32 q11,q10,#26 + vbic.i32 q10,#0xfc000000 + vshr.u32 d8,d16,#26 + vbic.i32 d16,#0xfc000000 + vbic.i32 d10,#0xfc000000 + vadd.i32 d12,d12,d30 @ h0 -> h1 + vadd.i32 d18,d18,d8 @ h3 -> h4 + vbic.i32 q11,#0xfc000000 + + bhi .Loop_neon + +.Lskip_loop: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + add r7,r0,#(48+0*9*4) + add r6,r0,#(48+1*9*4) + adds r2,r2,#32 + it ne + movne r2,#0 + bne .Long_tail + + vadd.i32 d25,d24,d14 @ add hash value and move to #hi + vadd.i32 d21,d20,d10 + vadd.i32 d27,d26,d16 + vadd.i32 d23,d22,d12 + vadd.i32 d29,d28,d18 + +.Long_tail: + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1 + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2 + + vadd.i32 d24,d24,d14 @ can be redundant + vmull.u32 q7,d25,d0 + vadd.i32 d20,d20,d10 + vmull.u32 q5,d21,d0 + vadd.i32 d26,d26,d16 + vmull.u32 q8,d27,d0 + vadd.i32 d22,d22,d12 + vmull.u32 q6,d23,d0 + vadd.i32 d28,d28,d18 + vmull.u32 q9,d29,d0 + + vmlal.u32 q5,d29,d2 + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vmlal.u32 q8,d25,d1 + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vmlal.u32 q6,d21,d1 + vmlal.u32 q9,d27,d1 + vmlal.u32 q7,d23,d1 + + vmlal.u32 q8,d23,d3 + vld1.32 d8[1],[r7,:32] + vmlal.u32 q5,d27,d4 + vld1.32 d8[0],[r6,:32] + vmlal.u32 q9,d25,d3 + vmlal.u32 q6,d29,d4 + vmlal.u32 q7,d21,d3 + + vmlal.u32 q8,d21,d5 + it ne + addne r7,r0,#(48+2*9*4) + vmlal.u32 q5,d25,d6 + it ne + addne r6,r0,#(48+3*9*4) + vmlal.u32 q9,d23,d5 + vmlal.u32 q6,d27,d6 + vmlal.u32 q7,d29,d6 + + vmlal.u32 q8,d29,d8 + vorn q0,q0,q0 @ all-ones, can be redundant + vmlal.u32 q5,d23,d8 + vshr.u64 q0,q0,#38 + vmlal.u32 q9,d21,d7 + vmlal.u32 q6,d25,d8 + vmlal.u32 q7,d27,d8 + + beq .Lshort_tail + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ (hash+inp[0:1])*r^4:r^3 and accumulate + + vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3 + vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4 + + vmlal.u32 q7,d24,d0 + vmlal.u32 q5,d20,d0 + vmlal.u32 q8,d26,d0 + vmlal.u32 q6,d22,d0 + vmlal.u32 q9,d28,d0 + + vmlal.u32 q5,d28,d2 + vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]! + vmlal.u32 q8,d24,d1 + vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]! + vmlal.u32 q6,d20,d1 + vmlal.u32 q9,d26,d1 + vmlal.u32 q7,d22,d1 + + vmlal.u32 q8,d22,d3 + vld1.32 d8[1],[r7,:32] + vmlal.u32 q5,d26,d4 + vld1.32 d8[0],[r6,:32] + vmlal.u32 q9,d24,d3 + vmlal.u32 q6,d28,d4 + vmlal.u32 q7,d20,d3 + + vmlal.u32 q8,d20,d5 + vmlal.u32 q5,d24,d6 + vmlal.u32 q9,d22,d5 + vmlal.u32 q6,d26,d6 + vmlal.u32 q7,d28,d6 + + vmlal.u32 q8,d28,d8 + vorn q0,q0,q0 @ all-ones + vmlal.u32 q5,d22,d8 + vshr.u64 q0,q0,#38 + vmlal.u32 q9,d20,d7 + vmlal.u32 q6,d24,d8 + vmlal.u32 q7,d26,d8 + +.Lshort_tail: + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ horizontal addition + + vadd.i64 d16,d16,d17 + vadd.i64 d10,d10,d11 + vadd.i64 d18,d18,d19 + vadd.i64 d12,d12,d13 + vadd.i64 d14,d14,d15 + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ lazy reduction, but without narrowing + + vshr.u64 q15,q8,#26 + vand.i64 q8,q8,q0 + vshr.u64 q4,q5,#26 + vand.i64 q5,q5,q0 + vadd.i64 q9,q9,q15 @ h3 -> h4 + vadd.i64 q6,q6,q4 @ h0 -> h1 + + vshr.u64 q15,q9,#26 + vand.i64 q9,q9,q0 + vshr.u64 q4,q6,#26 + vand.i64 q6,q6,q0 + vadd.i64 q7,q7,q4 @ h1 -> h2 + + vadd.i64 q5,q5,q15 + vshl.u64 q15,q15,#2 + vshr.u64 q4,q7,#26 + vand.i64 q7,q7,q0 + vadd.i64 q5,q5,q15 @ h4 -> h0 + vadd.i64 q8,q8,q4 @ h2 -> h3 + + vshr.u64 q15,q5,#26 + vand.i64 q5,q5,q0 + vshr.u64 q4,q8,#26 + vand.i64 q8,q8,q0 + vadd.i64 q6,q6,q15 @ h0 -> h1 + vadd.i64 q9,q9,q4 @ h3 -> h4 + + cmp r2,#0 + bne .Leven + + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + @ store hash value + + vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]! + vst1.32 {d18[0]},[r0] + + vldmia sp!,{d8-d15} @ epilogue + ldmia sp!,{r4-r7} + bx lr @ bx lr +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +#ifndef __KERNEL__ +.LOPENSSL_armcap: +# ifdef _WIN32 +.word OPENSSL_armcap_P +# else +.word OPENSSL_armcap_P-.Lpoly1305_init +# endif +.comm OPENSSL_armcap_P,4,4 +.hidden OPENSSL_armcap_P +#endif +#endif +.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm" +.align 2 diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c new file mode 100644 index 000000000000..abe3f2d587dc --- /dev/null +++ b/arch/arm/crypto/poly1305-glue.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM + * + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> +#include <crypto/internal/simd.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/jump_label.h> +#include <linux/module.h> + +void poly1305_init_arm(void *state, const u8 *key); +void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); +void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce); + +void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) +{ +} + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +{ + poly1305_init_arm(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(key + 16); + dctx->s[1] = get_unaligned_le32(key + 20); + dctx->s[2] = get_unaligned_le32(key + 24); + dctx->s[3] = get_unaligned_le32(key + 28); + dctx->buflen = 0; +} +EXPORT_SYMBOL(poly1305_init_arch); + +static int arm_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, + u32 len, u32 hibit, bool do_neon) +{ + if (unlikely(!dctx->sset)) { + if (!dctx->rset) { + poly1305_init_arm(&dctx->h, src); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + if (len < POLY1305_BLOCK_SIZE) + return; + } + + len &= ~(POLY1305_BLOCK_SIZE - 1); + + if (static_branch_likely(&have_neon) && likely(do_neon)) + poly1305_blocks_neon(&dctx->h, src, len, hibit); + else + poly1305_blocks_arm(&dctx->h, src, len, hibit); +} + +static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, + const u8 *src, u32 len, bool do_neon) +{ + if (unlikely(dctx->buflen)) { + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + len -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + arm_poly1305_blocks(dctx, dctx->buf, + POLY1305_BLOCK_SIZE, 1, false); + dctx->buflen = 0; + } + } + + if (likely(len >= POLY1305_BLOCK_SIZE)) { + arm_poly1305_blocks(dctx, src, len, 1, do_neon); + src += round_down(len, POLY1305_BLOCK_SIZE); + len %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(len)) { + dctx->buflen = len; + memcpy(dctx->buf, src, len); + } +} + +static int arm_poly1305_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + arm_poly1305_do_update(dctx, src, srclen, false); + return 0; +} + +static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, + const u8 *src, + unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + bool do_neon = crypto_simd_usable() && srclen > 128; + + if (static_branch_likely(&have_neon) && do_neon) + kernel_neon_begin(); + arm_poly1305_do_update(dctx, src, srclen, do_neon); + if (static_branch_likely(&have_neon) && do_neon) + kernel_neon_end(); + return 0; +} + +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int nbytes) +{ + bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + crypto_simd_usable(); + + if (unlikely(dctx->buflen)) { + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + nbytes -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + poly1305_blocks_arm(&dctx->h, dctx->buf, + POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); + + if (static_branch_likely(&have_neon) && do_neon) { + kernel_neon_begin(); + poly1305_blocks_neon(&dctx->h, src, len, 1); + kernel_neon_end(); + } else { + poly1305_blocks_arm(&dctx->h, src, len, 1); + } + src += len; + nbytes %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(nbytes)) { + dctx->buflen = nbytes; + memcpy(dctx->buf, src, nbytes); + } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) +{ + __le32 digest[4]; + u64 f = 0; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_emit_arm(&dctx->h, digest, dctx->s); + + /* mac = (h + s) % (2^128) */ + f = (f >> 32) + le32_to_cpu(digest[0]); + put_unaligned_le32(f, dst); + f = (f >> 32) + le32_to_cpu(digest[1]); + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]); + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]); + put_unaligned_le32(f, dst + 12); + + *dctx = (struct poly1305_desc_ctx){}; +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); + return 0; +} + +static struct shash_alg arm_poly1305_algs[] = {{ + .init = arm_poly1305_init, + .update = arm_poly1305_update, + .final = arm_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-arm", + .base.cra_priority = 150, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +#ifdef CONFIG_KERNEL_MODE_NEON +}, { + .init = arm_poly1305_init, + .update = arm_poly1305_update_neon, + .final = arm_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-neon", + .base.cra_priority = 200, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +#endif +}}; + +static int __init arm_poly1305_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + (elf_hwcap & HWCAP_NEON)) + static_branch_enable(&have_neon); + else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + /* register only the first entry */ + return crypto_register_shash(&arm_poly1305_algs[0]); + + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? + crypto_register_shashes(arm_poly1305_algs, + ARRAY_SIZE(arm_poly1305_algs)) : 0; +} + +static void __exit arm_poly1305_mod_exit(void) +{ + if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) + return; + if (!static_branch_likely(&have_neon)) { + crypto_unregister_shash(&arm_poly1305_algs[0]); + return; + } + crypto_unregister_shashes(arm_poly1305_algs, + ARRAY_SIZE(arm_poly1305_algs)); +} + +module_init(arm_poly1305_mod_init); +module_exit(arm_poly1305_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-arm"); +MODULE_ALIAS_CRYPTO("poly1305-neon"); diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S index 49a74a441aec..8a702e051738 100644 --- a/arch/arm/crypto/sha1-ce-core.S +++ b/arch/arm/crypto/sha1-ce-core.S @@ -10,6 +10,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q0 diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S index 4ad517577e23..b6369d2440a1 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/crypto/sha2-ce-core.S @@ -10,6 +10,7 @@ #include <asm/assembler.h> .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q7 diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 68ca86f85eb7..fa579b23b4df 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += local.h generic-y += local64.h generic-y += mm-arch-hooks.h generic-y += mmiowb.h -generic-y += msi.h generic-y += parport.h generic-y += preempt.h generic-y += seccomp.h diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 0555f14cc8be..fa50bb04f580 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -333,7 +333,7 @@ static inline u64 __gic_readq_nonatomic(const volatile void __iomem *addr) * GITS_VPENDBASER - the Valid bit must be cleared before changing * anything else. */ -static inline void gits_write_vpendbaser(u64 val, void * __iomem addr) +static inline void gits_write_vpendbaser(u64 val, void __iomem *addr) { u32 tmp; diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index b67e5fc1fe43..7c3001a6a775 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -14,23 +14,4 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - u64 limit, mask; - - if (!dev->dma_mask) - return 0; - - mask = *dev->dma_mask; - - limit = (mask + 1) & ~mask; - if (limit && size > limit) - return 0; - - if ((addr | (addr + size - 1)) & ~mask) - return 0; - - return 1; -} - #endif /* ASM_ARM_DMA_DIRECT_H */ diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 567dbede4785..f1d0a7807cd0 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -82,7 +82,7 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_CPU_CP15_MMU -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { unsigned int domain; @@ -94,7 +94,7 @@ static inline unsigned int get_domain(void) return domain; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" @@ -102,12 +102,12 @@ static inline void set_domain(unsigned val) isb(); } #else -static inline unsigned int get_domain(void) +static __always_inline unsigned int get_domain(void) { return 0; } -static inline void set_domain(unsigned val) +static __always_inline void set_domain(unsigned int val) { } #endif diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 18b0197f2384..48ec1d0337da 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -11,7 +11,6 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ #ifndef __ASSEMBLY__ -extern void mcount(void); extern void __gnu_mcount_nc(void); #ifdef CONFIG_DYNAMIC_FTRACE @@ -23,9 +22,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) /* With Thumb-2, the recorded addresses have the lsb set */ return addr & ~1; } - -extern void ftrace_caller_old(void); -extern void ftrace_call_old(void); #endif #endif diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 32edfadb1593..a6d4ee86ba54 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -118,6 +118,8 @@ #define L310_AUX_CTRL_STORE_LIMITATION BIT(11) /* R2P0+ */ #define L310_AUX_CTRL_EXCLUSIVE_CACHE BIT(12) #define L310_AUX_CTRL_ASSOCIATIVITY_16 BIT(16) +#define L310_AUX_CTRL_FWA_SHIFT 23 +#define L310_AUX_CTRL_FWA_MASK (3 << 23) #define L310_AUX_CTRL_CACHE_REPLACE_RR BIT(25) /* R2P0+ */ #define L310_AUX_CTRL_NS_LOCKDOWN BIT(26) #define L310_AUX_CTRL_NS_INT_CTRL BIT(27) diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index ac54c06764e6..62358d3ca0a8 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -53,6 +53,9 @@ static inline void decode_ctrl_reg(u32 reg, #define ARM_DEBUG_ARCH_V7_MM 4 #define ARM_DEBUG_ARCH_V7_1 5 #define ARM_DEBUG_ARCH_V8 6 +#define ARM_DEBUG_ARCH_V8_1 7 +#define ARM_DEBUG_ARCH_V8_2 8 +#define ARM_DEBUG_ARCH_V8_4 9 /* Breakpoint */ #define ARM_BREAKPOINT_EXECUTE 0 diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 7a0596fcb2e7..aefdabdbeb84 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -392,7 +392,6 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, */ void __iomem *ioremap(resource_size_t res_cookie, size_t size); #define ioremap ioremap -#define ioremap_nocache ioremap /* * Do not use ioremap_cache for mapping memory. Use memremap instead. @@ -400,12 +399,6 @@ void __iomem *ioremap(resource_size_t res_cookie, size_t size); void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size); #define ioremap_cache ioremap_cache -/* - * Do not use ioremap_cached in new code. Provided for the benefit of - * the pxa2xx-flash MTD driver only. - */ -void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size); - void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); #define ioremap_wc ioremap_wc #define ioremap_wt ioremap_wc diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 0125aa059d5b..9c04bd810d07 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -162,6 +162,7 @@ #define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) #define HSR_SRT_SHIFT (16) #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) +#define HSR_CM (1 << 8) #define HSR_FSC (0x3f) #define HSR_FSC_TYPE (0x3c) #define HSR_SSE (1 << 21) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 40002416efec..9b118516d2db 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr; } -static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr &= ~HCR_TWE; } -static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr |= HCR_TWE; } @@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_ISV; } +static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC); +} + static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_WNR; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a37c8e89777..556cd818eccf 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -7,6 +7,7 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include <linux/arm-smccc.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/kvm_types.h> @@ -38,6 +39,7 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -76,6 +78,14 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + + /* + * If we encounter a data abort without valid instruction syndrome + * information, report this to user space. User space can (and + * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is + * supported. + */ + bool return_nisv_io_abort_to_user; }; #define KVM_NR_MEM_OBJS 40 @@ -323,6 +333,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + +static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) +{ + return GPA_INVALID; +} + +static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ +} + +static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ +} + +static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return false; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index 0abd389cf0ec..68e6f25784a4 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -27,5 +27,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? 15 : 14; } +extern void pcibios_report_status(unsigned int status_mask, int warn); + #endif /* __KERNEL__ */ #endif diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 3ae120cd1715..eabcb48a7840 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -12,7 +12,7 @@ #ifndef CONFIG_MMU -#include <asm-generic/4level-fixup.h> +#include <asm-generic/pgtable-nopud.h> #include <asm/pgtable-nommu.h> #else diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 303248e5b990..98c6b91be4a8 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -22,7 +22,7 @@ * perform such accesses (eg, via list poison values) which could then * be exploited for priviledge escalation. */ -static inline unsigned int uaccess_save_and_enable(void) +static __always_inline unsigned int uaccess_save_and_enable(void) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN unsigned int old_domain = get_domain(); @@ -37,7 +37,7 @@ static inline unsigned int uaccess_save_and_enable(void) #endif } -static inline void uaccess_restore(unsigned int flags) +static __always_inline void uaccess_restore(unsigned int flags) { #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* Restore the user access mask */ diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..5b879ae7afc1 --- /dev/null +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/barrier.h> +#include <asm/cp15.h> +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#define VDSO_HAS_CLOCK_GETRES 1 + +extern struct vdso_data *__get_datapage(void); + +static __always_inline int gettimeofday_fallback( + struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("r1") = _tz; + register struct __kernel_old_timeval *tv asm("r0") = _tv; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_gettimeofday; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline long clock_gettime_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_gettime64; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline int clock_getres_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_clock_getres_time64; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(int clock_mode) +{ +#ifdef CONFIG_ARM_ARCH_TIMER + u64 cycle_now; + + isb(); + cycle_now = read_sysreg(CNTVCT); + + return cycle_now; +#else + return -EINVAL; /* use fallback */ +#endif +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __get_datapage(); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..c4166f317071 --- /dev/null +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <asm/cacheflush.h> + +extern struct vdso_data *vdso_data; +extern bool cntvct_ok; + +static __always_inline +bool tk_is_cntvct(const struct timekeeper *tk) +{ + if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) + return false; + + if (!tk->tkr_mono.clock->archdata.vdso_direct) + return false; + + return true; +} + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arm_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __arm_get_k_vdso_data + +static __always_inline +int __arm_update_vdso_data(void) +{ + return !cntvct_ok; +} +#define __arch_update_vdso_data __arm_update_vdso_data + +static __always_inline +int __arm_get_clock_mode(struct timekeeper *tk) +{ + u32 __tk_is_cntvct = tk_is_cntvct(tk); + + return __tk_is_cntvct; +} +#define __arch_get_clock_mode __arm_get_clock_mode + +static __always_inline +int __arm_use_vsyscall(struct vdso_data *vdata) +{ + return vdata[CS_HRES_COARSE].clock_mode; +} +#define __arch_use_vsyscall __arm_use_vsyscall + +static __always_inline +void __arm_sync_vdso_data(struct vdso_data *vdata) +{ + flush_dcache_page(virt_to_page(vdata)); +} +#define __arch_sync_vdso_data __arm_sync_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm/include/asm/vdso_datapage.h b/arch/arm/include/asm/vdso_datapage.h index 7910abf89b1c..bef68f59928d 100644 --- a/arch/arm/include/asm/vdso_datapage.h +++ b/arch/arm/include/asm/vdso_datapage.h @@ -11,35 +11,12 @@ #ifndef __ASSEMBLY__ +#include <vdso/datapage.h> #include <asm/page.h> -/* Try to be cache-friendly on systems that don't implement the - * generic timer: fit the unconditionally updated fields in the first - * 32 bytes. - */ -struct vdso_data { - u32 seq_count; /* sequence count - odd during updates */ - u16 tk_is_cntvct; /* fall back to syscall if false */ - u16 cs_shift; /* clocksource shift */ - u32 xtime_coarse_sec; /* coarse time */ - u32 xtime_coarse_nsec; - - u32 wtm_clock_sec; /* wall to monotonic offset */ - u32 wtm_clock_nsec; - u32 xtime_clock_sec; /* CLOCK_REALTIME - seconds */ - u32 cs_mult; /* clocksource multiplier */ - - u64 cs_cycle_last; /* last cycle value */ - u64 cs_mask; /* clocksource mask */ - - u64 xtime_clock_snsec; /* CLOCK_REALTIME sub-ns base */ - u32 tz_minuteswest; /* timezone info for gettimeofday(2) */ - u32 tz_dsttime; -}; - union vdso_data_store { - struct vdso_data data; - u8 page[PAGE_SIZE]; + struct vdso_data data[CS_BASES]; + u8 page[PAGE_SIZE]; }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 2769360f195c..03cd7c19a683 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -131,8 +131,9 @@ struct kvm_vcpu_events { struct { __u8 serror_pending; __u8 serror_has_esr; + __u8 ext_dabt_pending; /* Align it to 8 bytes */ - __u8 pad[6]; + __u8 pad[5]; __u64 serror_esr; } exception; __u32 reserved[12]; diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8cad59465af3..8b679e2ca3c3 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,10 +17,14 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o reboot.o return_address.o \ + process.o ptrace.o reboot.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +ifneq ($(CONFIG_ARM_UNWIND),y) +obj-$(CONFIG_FRAME_POINTER) += return_address.o +endif + obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index c125582de2e7..b5e217907686 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <asm/delay.h> +#include <asm/arch_timer.h> #include <clocksource/arm_arch_timer.h> diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index a7810be07da1..4a3982812a40 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -68,7 +68,7 @@ ENDPROC(__vet_atags) * The following fragment of code is executed with the MMU on in MMU mode, * and uses absolute addresses; this is not position independent. * - * r0 = cp#15 control register + * r0 = cp#15 control register (exc_ret for M-class) * r1 = machine ID * r2 = atags/dtb pointer * r9 = processor ID @@ -137,7 +137,8 @@ __mmap_switched_data: #ifdef CONFIG_CPU_CP15 .long cr_alignment @ r3 #else - .long 0 @ r3 +M_CLASS(.long exc_ret) @ r3 +AR_CLASS(.long 0) @ r3 #endif .size __mmap_switched_data, . - __mmap_switched_data diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index afa350f44dea..0fc814bbc34b 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -201,6 +201,8 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) bic r0, r0, #V7M_SCB_CCR_IC #endif str r0, [r12, V7M_SCB_CCR] + /* Pass exc_ret to __mmap_switched */ + mov r0, r10 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */ ret lr ENDPROC(__after_proc_init) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b0c195e3a06d..02ca7adf5375 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -246,6 +246,9 @@ static int enable_monitor_mode(void) case ARM_DEBUG_ARCH_V7_ECP14: case ARM_DEBUG_ARCH_V7_1: case ARM_DEBUG_ARCH_V8: + case ARM_DEBUG_ARCH_V8_1: + case ARM_DEBUG_ARCH_V8_2: + case ARM_DEBUG_ARCH_V8_4: ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN)); isb(); break; diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index b647741c0ab0..6e626abaefc5 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/moduleloader.h> #include <asm/cache.h> #include <asm/opcodes.h> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 9485acc520a4..cea1c27c29cb 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -36,6 +36,8 @@ #include <asm/tls.h> #include <asm/vdso.h> +#include "signal.h" + #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> unsigned long __stack_chk_guard __read_mostly; diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index aba6b2ab7a58..d4392e177484 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -51,7 +51,7 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle) } #ifdef CONFIG_HOTPLUG_CPU -int psci_cpu_disable(unsigned int cpu) +static int psci_cpu_disable(unsigned int cpu) { /* Fail early if we don't have CPU_OFF support */ if (!psci_ops.cpu_off) @@ -64,7 +64,7 @@ int psci_cpu_disable(unsigned int cpu) return 0; } -void psci_cpu_die(unsigned int cpu) +static void psci_cpu_die(unsigned int cpu) { u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; @@ -76,7 +76,7 @@ void psci_cpu_die(unsigned int cpu) panic("psci: cpu %d failed to shutdown\n", cpu); } -int psci_cpu_kill(unsigned int cpu) +static int psci_cpu_kill(unsigned int cpu) { int err, i; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 324352787aea..b606cded90cd 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -923,7 +923,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) /* Do seccomp after ptrace; syscall may have changed. */ #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER - if (secure_computing(NULL) == -1) + if (secure_computing() == -1) return -1; #else /* XXX: remove this once OABI gets fixed */ diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index b0d2f1fe891d..7b42ac010fdf 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -7,8 +7,6 @@ */ #include <linux/export.h> #include <linux/ftrace.h> - -#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) #include <linux/sched.h> #include <asm/stacktrace.h> @@ -53,6 +51,4 @@ void *return_address(unsigned int level) return NULL; } -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h index b7b838b05229..cb076d30ab38 100644 --- a/arch/arm/kernel/signal.h +++ b/arch/arm/kernel/signal.h @@ -9,3 +9,5 @@ struct rt_sigframe { struct siginfo info; struct sigframe sig; }; + +extern struct page *get_signal_page(void); diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 9d9b1db73932..d3a85f01b328 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -18,6 +18,7 @@ #include <asm/memory.h> #include <asm/system_info.h> #include <asm/traps.h> +#include <asm/tcm.h> #define TCMTR_FORMAT_MASK 0xe0000000U @@ -30,8 +31,8 @@ extern char __itcm_start, __sitcm_text, __eitcm_text; extern char __dtcm_start, __sdtcm_data, __edtcm_data; /* These will be increased as we run */ -u32 dtcm_end = DTCM_OFFSET; -u32 itcm_end = ITCM_OFFSET; +static u32 dtcm_end = DTCM_OFFSET; +static u32 itcm_end = ITCM_OFFSET; /* * TCM memory resources diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index b996b2cf0703..dddc7ebf4db4 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -9,6 +9,7 @@ * reading the RTC at bootup, etc... */ #include <linux/clk-provider.h> +#include <linux/clockchips.h> #include <linux/clocksource.h> #include <linux/errno.h> #include <linux/export.h> @@ -107,5 +108,6 @@ void __init time_init(void) of_clk_init(NULL); #endif timer_probe(); + tick_setup_hrtimer_broadcast(); } } diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 5b9faba03afb..3a4dde081c13 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -95,7 +95,7 @@ static void __init parse_dt_topology(void) GFP_NOWAIT); for_each_possible_cpu(cpu) { - const u32 *rate; + const __be32 *rate; int len; /* too early to use cpu->of_node */ diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 9bf16c93ee6a..c89ac1b9d28b 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -23,6 +23,8 @@ #include <asm/vdso.h> #include <asm/vdso_datapage.h> #include <clocksource/arm_arch_timer.h> +#include <vdso/helpers.h> +#include <vdso/vsyscall.h> #define MAX_SYMNAME 64 @@ -37,7 +39,7 @@ unsigned int vdso_total_pages __ro_after_init; * The VDSO data page. */ static union vdso_data_store vdso_data_store __page_aligned_data; -static struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = vdso_data_store.data; static struct page *vdso_data_page __ro_after_init; static const struct vm_special_mapping vdso_data_mapping = { @@ -77,7 +79,7 @@ struct elfinfo { /* Cached result of boot-time check for whether the arch timer exists, * and if so, whether the virtual counter is useable. */ -static bool cntvct_ok __ro_after_init; +bool cntvct_ok __ro_after_init; static bool __init cntvct_functional(void) { @@ -262,84 +264,3 @@ void arm_install_vdso(struct mm_struct *mm, unsigned long addr) mm->context.vdso = addr; } -static void vdso_write_begin(struct vdso_data *vdata) -{ - ++vdso_data->seq_count; - smp_wmb(); /* Pairs with smp_rmb in vdso_read_retry */ -} - -static void vdso_write_end(struct vdso_data *vdata) -{ - smp_wmb(); /* Pairs with smp_rmb in vdso_read_begin */ - ++vdso_data->seq_count; -} - -static bool tk_is_cntvct(const struct timekeeper *tk) -{ - if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) - return false; - - if (!tk->tkr_mono.clock->archdata.vdso_direct) - return false; - - return true; -} - -/** - * update_vsyscall - update the vdso data page - * - * Increment the sequence counter, making it odd, indicating to - * userspace that an update is in progress. Update the fields used - * for coarse clocks and, if the architected system timer is in use, - * the fields used for high precision clocks. Increment the sequence - * counter again, making it even, indicating to userspace that the - * update is finished. - * - * Userspace is expected to sample seq_count before reading any other - * fields from the data page. If seq_count is odd, userspace is - * expected to wait until it becomes even. After copying data from - * the page, userspace must sample seq_count again; if it has changed - * from its previous value, userspace must retry the whole sequence. - * - * Calls to update_vsyscall are serialized by the timekeeping core. - */ -void update_vsyscall(struct timekeeper *tk) -{ - struct timespec64 *wtm = &tk->wall_to_monotonic; - - if (!cntvct_ok) { - /* The entry points have been zeroed, so there is no - * point in updating the data page. - */ - return; - } - - vdso_write_begin(vdso_data); - - vdso_data->tk_is_cntvct = tk_is_cntvct(tk); - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = (u32)(tk->tkr_mono.xtime_nsec >> - tk->tkr_mono.shift); - vdso_data->wtm_clock_sec = wtm->tv_sec; - vdso_data->wtm_clock_nsec = wtm->tv_nsec; - - if (vdso_data->tk_is_cntvct) { - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mult = tk->tkr_mono.mult; - vdso_data->cs_shift = tk->tkr_mono.shift; - vdso_data->cs_mask = tk->tkr_mono.mask; - } - - vdso_write_end(vdso_data); - - flush_dcache_page(virt_to_page(vdso_data)); -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; - flush_dcache_page(virt_to_page(vdso_data)); -} diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 8c74037ade22..21b8b271c80d 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -70,8 +70,6 @@ SECTIONS ARM_UNWIND_SECTIONS #endif - NOTES - _etext = .; /* End of text and rodata section */ ARM_VECTORS @@ -114,7 +112,7 @@ SECTIONS . = ALIGN(THREAD_SIZE); _sdata = .; - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) { *(.data..ro_after_init) } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 23150c0f0f4d..319ccb10846a 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -81,8 +81,6 @@ SECTIONS ARM_UNWIND_SECTIONS #endif - NOTES - #ifdef CONFIG_STRICT_KERNEL_RWX . = ALIGN(1<<SECTION_SHIFT); #else @@ -143,7 +141,7 @@ SECTIONS __init_end = .; _sdata = .; - RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) _edata = .; BSS_SECTION(0, 0, 0) diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index b76b75bd9e00..e442d82821df 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += handle_exit.o guest.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o obj-y += $(KVM)/arm/aarch32.o obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 684cf64b4033..0e6f23504c26 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -21,6 +21,10 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(halt_successful_poll), + VCPU_STAT(halt_attempted_poll), + VCPU_STAT(halt_poll_invalid), + VCPU_STAT(halt_wakeup), VCPU_STAT(hvc_exit_stat), VCPU_STAT(wfe_exit_stat), VCPU_STAT(wfi_exit_stat), @@ -255,6 +259,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, { events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA); + /* + * We never return a pending ext_dabt here because we deliver it to + * the virtual CPU directly when setting the event and it's no longer + * 'pending' at this point. + */ + return 0; } @@ -263,12 +273,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, { bool serror_pending = events->exception.serror_pending; bool has_esr = events->exception.serror_has_esr; + bool ext_dabt_pending = events->exception.ext_dabt_pending; if (serror_pending && has_esr) return -EINVAL; else if (serror_pending) kvm_inject_vabt(vcpu); + if (ext_dabt_pending) + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 0; } diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 2a6a1394d26e..e58a89d2f13f 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -9,7 +9,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> #include <trace/events/kvm.h> #include "trace.h" diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 5e5f1fabc3d4..e4e25f287ad7 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -161,6 +161,8 @@ config ARCH_BCM2835 select GPIOLIB select ARM_AMBA select ARM_ERRATA_411920 if ARCH_MULTI_V6 + select ARM_GIC if ARCH_MULTI_V7 + select ZONE_DMA if ARCH_MULTI_V7 select ARM_TIMER_SP804 select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7 select TIMER_OF @@ -169,7 +171,7 @@ config ARCH_BCM2835 select PINCTRL_BCM2835 select MFD_CORE help - This enables support for the Broadcom BCM2835 and BCM2836 SoCs. + This enables support for the Broadcom BCM2711 and BCM283x SoCs. This SoC is used in the Raspberry Pi and Roku 2 devices. config ARCH_BCM_53573 diff --git a/arch/arm/mach-bcm/Makefile b/arch/arm/mach-bcm/Makefile index b59c813b1af4..7baa8c9427d5 100644 --- a/arch/arm/mach-bcm/Makefile +++ b/arch/arm/mach-bcm/Makefile @@ -42,8 +42,9 @@ obj-$(CONFIG_ARCH_BCM_MOBILE_L2_CACHE) += kona_l2_cache.o obj-$(CONFIG_ARCH_BCM_MOBILE_SMC) += bcm_kona_smc.o # BCM2835 -obj-$(CONFIG_ARCH_BCM2835) += board_bcm2835.o ifeq ($(CONFIG_ARCH_BCM2835),y) +obj-y += board_bcm2835.o +obj-y += bcm2711.o ifeq ($(CONFIG_ARM),y) obj-$(CONFIG_SMP) += platsmp.o endif diff --git a/arch/arm/mach-bcm/bcm2711.c b/arch/arm/mach-bcm/bcm2711.c new file mode 100644 index 000000000000..dbe296798647 --- /dev/null +++ b/arch/arm/mach-bcm/bcm2711.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Stefan Wahren + */ + +#include <linux/of_address.h> + +#include <asm/mach/arch.h> + +#include "platsmp.h" + +static const char * const bcm2711_compat[] = { +#ifdef CONFIG_ARCH_MULTI_V7 + "brcm,bcm2711", +#endif +}; + +DT_MACHINE_START(BCM2711, "BCM2711") +#ifdef CONFIG_ZONE_DMA + .dma_zone_size = SZ_1G, +#endif + .dt_compat = bcm2711_compat, + .smp = smp_ops(bcm2836_smp_ops), +MACHINE_END diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c index 541e850a736c..43a16f922b53 100644 --- a/arch/arm/mach-bcm/bcm_kona_smc.c +++ b/arch/arm/mach-bcm/bcm_kona_smc.c @@ -140,7 +140,7 @@ static int bcm_kona_do_smc(u32 service_id, u32 buffer_phys) static void __bcm_kona_smc(void *info) { struct bcm_kona_smc_data *data = info; - u32 *args = bcm_smc_buffer; + u32 __iomem *args = bcm_smc_buffer; BUG_ON(smp_processor_id() != 0); BUG_ON(!args); diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c index 47f8053d0240..21400b3fa5fe 100644 --- a/arch/arm/mach-bcm/platsmp.c +++ b/arch/arm/mach-bcm/platsmp.c @@ -22,6 +22,8 @@ #include <asm/smp_plat.h> #include <asm/smp_scu.h> +#include "platsmp.h" + /* Size of mapped Cortex A9 SCU address space */ #define CORTEX_A9_SCU_SIZE 0x58 diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8062412be70f..9fc5c73cc0be 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -462,8 +462,8 @@ static s8 dm365_queue_priority_mapping[][2] = { }; static const struct dma_slave_map dm365_edma_map[] = { - { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) }, - { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) }, { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) }, { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) }, { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) }, diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 9dab1f50a02f..4ef56571145b 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -13,6 +13,7 @@ menuconfig ARCH_EXYNOS select ARM_AMBA select ARM_GIC select COMMON_CLK_SAMSUNG + select EXYNOS_ASV select EXYNOS_CHIPID select EXYNOS_THERMAL select EXYNOS_PMU diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index 8b81a17f675d..416462e3f5d6 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -31,7 +31,6 @@ PCI_STATUS_PARITY) << 16) extern int setup_arm_irq(int, struct irqaction *); -extern void pcibios_report_status(u_int status_mask, int warn); static unsigned long dc21285_base_address(struct pci_bus *bus, unsigned int devfn) diff --git a/arch/arm/mach-hisi/Kconfig b/arch/arm/mach-hisi/Kconfig index 98338a489921..3b010fe7c0e9 100644 --- a/arch/arm/mach-hisi/Kconfig +++ b/arch/arm/mach-hisi/Kconfig @@ -15,7 +15,6 @@ menu "Hisilicon platform type" config ARCH_HI3xxx bool "Hisilicon Hi36xx family" - depends on ARCH_MULTI_V7 select CACHE_L2X0 select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP @@ -25,17 +24,15 @@ config ARCH_HI3xxx Support for Hisilicon Hi36xx SoC family config ARCH_HIP01 - bool "Hisilicon HIP01 family" - depends on ARCH_MULTI_V7 - select HAVE_ARM_SCU if SMP - select HAVE_ARM_TWD if SMP - select ARM_GLOBAL_TIMER - help - Support for Hisilicon HIP01 SoC family + bool "Hisilicon HIP01 family" + select HAVE_ARM_SCU if SMP + select HAVE_ARM_TWD if SMP + select ARM_GLOBAL_TIMER + help + Support for Hisilicon HIP01 SoC family config ARCH_HIP04 bool "Hisilicon HiP04 Cortex A15 family" - depends on ARCH_MULTI_V7 select ARM_ERRATA_798181 if SMP select HAVE_ARM_ARCH_TIMER select MCPM if SMP @@ -46,7 +43,6 @@ config ARCH_HIP04 config ARCH_HIX5HD2 bool "Hisilicon X5HD2 family" - depends on ARCH_MULTI_V7 select CACHE_L2X0 select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP diff --git a/arch/arm/mach-imx/anatop.c b/arch/arm/mach-imx/anatop.c index 777d8c255501..8fb68c0ec34c 100644 --- a/arch/arm/mach-imx/anatop.c +++ b/arch/arm/mach-imx/anatop.c @@ -19,8 +19,6 @@ #define ANADIG_REG_2P5 0x130 #define ANADIG_REG_CORE 0x140 #define ANADIG_ANA_MISC0 0x150 -#define ANADIG_USB1_CHRG_DETECT 0x1b0 -#define ANADIG_USB2_CHRG_DETECT 0x210 #define ANADIG_DIGPROG 0x260 #define ANADIG_DIGPROG_IMX6SL 0x280 #define ANADIG_DIGPROG_IMX7D 0x800 @@ -33,8 +31,6 @@ #define BM_ANADIG_ANA_MISC0_STOP_MODE_CONFIG 0x1000 /* Below MISC0_DISCON_HIGH_SNVS is only for i.MX6SL */ #define BM_ANADIG_ANA_MISC0_DISCON_HIGH_SNVS 0x2000 -#define BM_ANADIG_USB_CHRG_DETECT_CHK_CHRG_B 0x80000 -#define BM_ANADIG_USB_CHRG_DETECT_EN_B 0x100000 static struct regmap *anatop; @@ -96,16 +92,6 @@ void imx_anatop_post_resume(void) } -static void imx_anatop_usb_chrg_detect_disable(void) -{ - regmap_write(anatop, ANADIG_USB1_CHRG_DETECT, - BM_ANADIG_USB_CHRG_DETECT_EN_B - | BM_ANADIG_USB_CHRG_DETECT_CHK_CHRG_B); - regmap_write(anatop, ANADIG_USB2_CHRG_DETECT, - BM_ANADIG_USB_CHRG_DETECT_EN_B | - BM_ANADIG_USB_CHRG_DETECT_CHK_CHRG_B); -} - void __init imx_init_revision_from_anatop(void) { struct device_node *np; @@ -171,10 +157,6 @@ void __init imx_init_revision_from_anatop(void) void __init imx_anatop_init(void) { anatop = syscon_regmap_lookup_by_compatible("fsl,imx6q-anatop"); - if (IS_ERR(anatop)) { + if (IS_ERR(anatop)) pr_err("%s: failed to find imx6q-anatop regmap!\n", __func__); - return; - } - - imx_anatop_usb_chrg_detect_disable(); } diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c index 0b137eeffb61..d8118031c51f 100644 --- a/arch/arm/mach-imx/cpu.c +++ b/arch/arm/mach-imx/cpu.c @@ -1,15 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> +#include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> +#include <linux/regmap.h> #include <linux/slab.h> #include <linux/sys_soc.h> #include "hardware.h" #include "common.h" +#define OCOTP_UID_H 0x420 +#define OCOTP_UID_L 0x410 + unsigned int __mxc_cpu_type; static unsigned int imx_soc_revision; @@ -76,9 +81,13 @@ void __init imx_aips_allow_unprivileged_access( struct device * __init imx_soc_device_init(void) { struct soc_device_attribute *soc_dev_attr; + const char *ocotp_compat = NULL; struct soc_device *soc_dev; struct device_node *root; + struct regmap *ocotp; const char *soc_id; + u64 soc_uid = 0; + u32 val; int ret; soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); @@ -119,30 +128,39 @@ struct device * __init imx_soc_device_init(void) soc_id = "i.MX53"; break; case MXC_CPU_IMX6SL: + ocotp_compat = "fsl,imx6sl-ocotp"; soc_id = "i.MX6SL"; break; case MXC_CPU_IMX6DL: + ocotp_compat = "fsl,imx6q-ocotp"; soc_id = "i.MX6DL"; break; case MXC_CPU_IMX6SX: + ocotp_compat = "fsl,imx6sx-ocotp"; soc_id = "i.MX6SX"; break; case MXC_CPU_IMX6Q: + ocotp_compat = "fsl,imx6q-ocotp"; soc_id = "i.MX6Q"; break; case MXC_CPU_IMX6UL: + ocotp_compat = "fsl,imx6ul-ocotp"; soc_id = "i.MX6UL"; break; case MXC_CPU_IMX6ULL: + ocotp_compat = "fsl,imx6ul-ocotp"; soc_id = "i.MX6ULL"; break; case MXC_CPU_IMX6ULZ: + ocotp_compat = "fsl,imx6ul-ocotp"; soc_id = "i.MX6ULZ"; break; case MXC_CPU_IMX6SLL: + ocotp_compat = "fsl,imx6sll-ocotp"; soc_id = "i.MX6SLL"; break; case MXC_CPU_IMX7D: + ocotp_compat = "fsl,imx7d-ocotp"; soc_id = "i.MX7D"; break; case MXC_CPU_IMX7ULP: @@ -153,18 +171,36 @@ struct device * __init imx_soc_device_init(void) } soc_dev_attr->soc_id = soc_id; + if (ocotp_compat) { + ocotp = syscon_regmap_lookup_by_compatible(ocotp_compat); + if (IS_ERR(ocotp)) + pr_err("%s: failed to find %s regmap!\n", __func__, ocotp_compat); + + regmap_read(ocotp, OCOTP_UID_H, &val); + soc_uid = val; + regmap_read(ocotp, OCOTP_UID_L, &val); + soc_uid <<= 32; + soc_uid |= val; + } + soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%d.%d", (imx_soc_revision >> 4) & 0xf, imx_soc_revision & 0xf); if (!soc_dev_attr->revision) goto free_soc; + soc_dev_attr->serial_number = kasprintf(GFP_KERNEL, "%016llX", soc_uid); + if (!soc_dev_attr->serial_number) + goto free_rev; + soc_dev = soc_device_register(soc_dev_attr); if (IS_ERR(soc_dev)) - goto free_rev; + goto free_serial_number; return soc_device_to_device(soc_dev); +free_serial_number: + kfree(soc_dev_attr->serial_number); free_rev: kfree(soc_dev_attr->revision); free_soc: diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 39a7d9393641..24dd5bbe60e4 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = { */ void imx6q_cpuidle_fec_irqs_used(void) { - imx6q_cpuidle_driver.states[1].disabled = true; + cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, true); } EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used); void imx6q_cpuidle_fec_irqs_unused(void) { - imx6q_cpuidle_driver.states[1].disabled = false; + cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, false); } EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused); diff --git a/arch/arm/mach-imx/hotplug.c b/arch/arm/mach-imx/hotplug.c index 089d11ffaa3e..82e22398d43d 100644 --- a/arch/arm/mach-imx/hotplug.c +++ b/arch/arm/mach-imx/hotplug.c @@ -6,32 +6,12 @@ #include <linux/errno.h> #include <linux/jiffies.h> +#include <asm/cacheflush.h> #include <asm/cp15.h> #include <asm/proc-fns.h> #include "common.h" -static inline void cpu_enter_lowpower(void) -{ - unsigned int v; - - asm volatile( - "mcr p15, 0, %1, c7, c5, 0\n" - " mcr p15, 0, %1, c7, c10, 4\n" - /* - * Turn off coherency - */ - " mrc p15, 0, %0, c1, c0, 1\n" - " bic %0, %0, %3\n" - " mcr p15, 0, %0, c1, c0, 1\n" - " mrc p15, 0, %0, c1, c0, 0\n" - " bic %0, %0, %2\n" - " mcr p15, 0, %0, c1, c0, 0\n" - : "=&r" (v) - : "r" (0), "Ir" (CR_C), "Ir" (0x40) - : "cc"); -} - /* * platform-specific code to shutdown a CPU * @@ -39,7 +19,7 @@ static inline void cpu_enter_lowpower(void) */ void imx_cpu_die(unsigned int cpu) { - cpu_enter_lowpower(); + v7_exit_coherency_flush(louis); /* * We use the cpu jumping argument register to sync with * imx_cpu_kill() which is running on cpu0 and waiting for diff --git a/arch/arm/mach-mmp/Kconfig b/arch/arm/mach-mmp/Kconfig index 0440109e973b..b58a03b18bde 100644 --- a/arch/arm/mach-mmp/Kconfig +++ b/arch/arm/mach-mmp/Kconfig @@ -1,13 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig ARCH_MMP - bool "Marvell PXA168/910/MMP2" + bool "Marvell PXA168/910/MMP2/MMP3" depends on ARCH_MULTI_V5 || ARCH_MULTI_V7 select GPIO_PXA select GPIOLIB select PINCTRL select PLAT_PXA help - Support for Marvell's PXA168/PXA910(MMP) and MMP2 processor line. + Support for Marvell's PXA168/PXA910(MMP), MMP2, and MMP3 processor lines. if ARCH_MMP @@ -129,6 +129,24 @@ config MACH_MMP2_DT Include support for Marvell MMP2 based platforms using the device tree. +config MACH_MMP3_DT + bool "Support MMP3 (ARMv7) platforms" + depends on ARCH_MULTI_V7 + select ARM_GIC + select HAVE_ARM_SCU if SMP + select HAVE_ARM_TWD if SMP + select CACHE_L2X0 + select PINCTRL + select PINCTRL_SINGLE + select ARCH_HAS_RESET_CONTROLLER + select CPU_PJ4B + select PM_GENERIC_DOMAINS if PM + select PM_GENERIC_DOMAINS_OF if PM && OF + help + Say 'Y' here if you want to include support for platforms + with Marvell MMP3 processor, also known as PXA2128 or + Armada 620. + endmenu config CPU_PXA168 diff --git a/arch/arm/mach-mmp/Makefile b/arch/arm/mach-mmp/Makefile index 8f267c7bc6e8..7b3a7f979eec 100644 --- a/arch/arm/mach-mmp/Makefile +++ b/arch/arm/mach-mmp/Makefile @@ -22,6 +22,9 @@ ifeq ($(CONFIG_PM),y) obj-$(CONFIG_CPU_PXA910) += pm-pxa910.o obj-$(CONFIG_CPU_MMP2) += pm-mmp2.o endif +ifeq ($(CONFIG_SMP),y) +obj-$(CONFIG_MACH_MMP3_DT) += platsmp.o +endif # board support obj-$(CONFIG_MACH_ASPENITE) += aspenite.o @@ -34,5 +37,6 @@ obj-$(CONFIG_MACH_FLINT) += flint.o obj-$(CONFIG_MACH_MARVELL_JASPER) += jasper.o obj-$(CONFIG_MACH_MMP_DT) += mmp-dt.o obj-$(CONFIG_MACH_MMP2_DT) += mmp2-dt.o +obj-$(CONFIG_MACH_MMP3_DT) += mmp3.o obj-$(CONFIG_MACH_TETON_BGA) += teton_bga.o obj-$(CONFIG_MACH_GPLUGD) += gplugd.o diff --git a/arch/arm/mach-mmp/addr-map.h b/arch/arm/mach-mmp/addr-map.h index 25edf6a92276..3dc2f0b0ecba 100644 --- a/arch/arm/mach-mmp/addr-map.h +++ b/arch/arm/mach-mmp/addr-map.h @@ -20,6 +20,10 @@ #define AXI_VIRT_BASE IOMEM(0xfe200000) #define AXI_PHYS_SIZE 0x00200000 +#define PGU_PHYS_BASE 0xe0000000 +#define PGU_VIRT_BASE IOMEM(0xfe400000) +#define PGU_PHYS_SIZE 0x00100000 + /* Static Memory Controller - Chip Select 0 and 1 */ #define SMC_CS0_PHYS_BASE 0x80000000 #define SMC_CS0_PHYS_SIZE 0x10000000 @@ -38,4 +42,7 @@ #define CIU_VIRT_BASE (AXI_VIRT_BASE + 0x82c00) #define CIU_REG(x) (CIU_VIRT_BASE + (x)) +#define SCU_VIRT_BASE (PGU_VIRT_BASE) +#define SCU_REG(x) (SCU_VIRT_BASE + (x)) + #endif /* __ASM_MACH_ADDR_MAP_H */ diff --git a/arch/arm/mach-mmp/common.c b/arch/arm/mach-mmp/common.c index 6684abc7708b..e94349d4726c 100644 --- a/arch/arm/mach-mmp/common.c +++ b/arch/arm/mach-mmp/common.c @@ -13,11 +13,11 @@ #include <asm/mach/map.h> #include <asm/system_misc.h> #include "addr-map.h" -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "common.h" -#define MMP_CHIPID (AXI_VIRT_BASE + 0x82c00) +#define MMP_CHIPID CIU_REG(0x00) unsigned int mmp_chip_id; EXPORT_SYMBOL(mmp_chip_id); @@ -36,6 +36,15 @@ static struct map_desc standard_io_desc[] __initdata = { }, }; +static struct map_desc mmp2_io_desc[] __initdata = { + { + .pfn = __phys_to_pfn(PGU_PHYS_BASE), + .virtual = (unsigned long)PGU_VIRT_BASE, + .length = PGU_PHYS_SIZE, + .type = MT_DEVICE, + }, +}; + void __init mmp_map_io(void) { iotable_init(standard_io_desc, ARRAY_SIZE(standard_io_desc)); @@ -44,6 +53,12 @@ void __init mmp_map_io(void) mmp_chip_id = __raw_readl(MMP_CHIPID); } +void __init mmp2_map_io(void) +{ + mmp_map_io(); + iotable_init(mmp2_io_desc, ARRAY_SIZE(mmp2_io_desc)); +} + void mmp_restart(enum reboot_mode mode, const char *cmd) { soft_restart(0); diff --git a/arch/arm/mach-mmp/common.h b/arch/arm/mach-mmp/common.h index 483b8b6d3005..ed56b3f15b45 100644 --- a/arch/arm/mach-mmp/common.h +++ b/arch/arm/mach-mmp/common.h @@ -5,4 +5,5 @@ extern void mmp_timer_init(int irq, unsigned long rate); extern void __init mmp_map_io(void); +extern void __init mmp2_map_io(void); extern void mmp_restart(enum reboot_mode, const char *); diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h deleted file mode 100644 index a96abcf521b4..000000000000 --- a/arch/arm/mach-mmp/cputype.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_CPUTYPE_H -#define __ASM_MACH_CPUTYPE_H - -#include <asm/cputype.h> - -/* - * CPU Stepping CPU_ID CHIP_ID - * - * PXA168 S0 0x56158400 0x0000C910 - * PXA168 A0 0x56158400 0x00A0A168 - * PXA910 Y1 0x56158400 0x00F2C920 - * PXA910 A0 0x56158400 0x00F2C910 - * PXA910 A1 0x56158400 0x00A0C910 - * PXA920 Y0 0x56158400 0x00F2C920 - * PXA920 A0 0x56158400 0x00A0C920 - * PXA920 A1 0x56158400 0x00A1C920 - * MMP2 Z0 0x560f5811 0x00F00410 - * MMP2 Z1 0x560f5811 0x00E00410 - * MMP2 A0 0x560f5811 0x00A0A610 - */ - -extern unsigned int mmp_chip_id; - -#ifdef CONFIG_CPU_PXA168 -static inline int cpu_is_pxa168(void) -{ - return (((read_cpuid_id() >> 8) & 0xff) == 0x84) && - ((mmp_chip_id & 0xfff) == 0x168); -} -#else -#define cpu_is_pxa168() (0) -#endif - -/* cpu_is_pxa910() is shared on both pxa910 and pxa920 */ -#ifdef CONFIG_CPU_PXA910 -static inline int cpu_is_pxa910(void) -{ - return (((read_cpuid_id() >> 8) & 0xff) == 0x84) && - (((mmp_chip_id & 0xfff) == 0x910) || - ((mmp_chip_id & 0xfff) == 0x920)); -} -#else -#define cpu_is_pxa910() (0) -#endif - -#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) -static inline int cpu_is_mmp2(void) -{ - return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && - (((mmp_chip_id & 0xfff) == 0x410) || - ((mmp_chip_id & 0xfff) == 0x610)); -} -#else -#define cpu_is_mmp2() (0) -#endif - -#endif /* __ASM_MACH_CPUTYPE_H */ diff --git a/arch/arm/mach-mmp/devices.c b/arch/arm/mach-mmp/devices.c index 130c1a603ba2..18bee66a671f 100644 --- a/arch/arm/mach-mmp/devices.c +++ b/arch/arm/mach-mmp/devices.c @@ -11,7 +11,7 @@ #include <asm/irq.h> #include "irqs.h" #include "devices.h" -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "regs-usb.h" int __init pxa_register_device(struct pxa_device_desc *desc, diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c index 35559792d5cc..91214996acec 100644 --- a/arch/arm/mach-mmp/mmp-dt.c +++ b/arch/arm/mach-mmp/mmp-dt.c @@ -9,14 +9,13 @@ #include <linux/irqchip.h> #include <linux/of_platform.h> #include <linux/clk-provider.h> +#include <linux/clocksource.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> #include <asm/hardware/cache-tauros2.h> #include "common.h" -extern void __init mmp_dt_init_timer(void); - static const char *const pxa168_dt_board_compat[] __initconst = { "mrvl,pxa168-aspenite", NULL, @@ -32,8 +31,8 @@ static void __init mmp_init_time(void) #ifdef CONFIG_CACHE_TAUROS2 tauros2_init(0); #endif - mmp_dt_init_timer(); of_clk_init(NULL); + timer_probe(); } DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)") diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c index 305a9daba6d6..510c762ddc48 100644 --- a/arch/arm/mach-mmp/mmp2-dt.c +++ b/arch/arm/mach-mmp/mmp2-dt.c @@ -10,21 +10,20 @@ #include <linux/irqchip.h> #include <linux/of_platform.h> #include <linux/clk-provider.h> +#include <linux/clocksource.h> #include <asm/mach/arch.h> #include <asm/mach/time.h> #include <asm/hardware/cache-tauros2.h> #include "common.h" -extern void __init mmp_dt_init_timer(void); - static void __init mmp_init_time(void) { #ifdef CONFIG_CACHE_TAUROS2 tauros2_init(0); #endif of_clk_init(NULL); - mmp_dt_init_timer(); + timer_probe(); } static const char *const mmp2_dt_board_compat[] __initconst = { @@ -33,7 +32,7 @@ static const char *const mmp2_dt_board_compat[] __initconst = { }; DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)") - .map_io = mmp_map_io, + .map_io = mmp2_map_io, .init_time = mmp_init_time, .dt_compat = mmp2_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c index 18ea3e1a26e6..bbc4c2274de3 100644 --- a/arch/arm/mach-mmp/mmp2.c +++ b/arch/arm/mach-mmp/mmp2.c @@ -20,7 +20,7 @@ #include <asm/mach/time.h> #include "addr-map.h" #include "regs-apbc.h" -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "irqs.h" #include "mfp.h" #include "devices.h" diff --git a/arch/arm/mach-mmp/mmp3.c b/arch/arm/mach-mmp/mmp3.c new file mode 100644 index 000000000000..b0e86964f302 --- /dev/null +++ b/arch/arm/mach-mmp/mmp3.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Marvell MMP3 aka PXA2128 aka 88AP2128 support + * + * Copyright (C) 2019 Lubomir Rintel <lkundrak@v3.sk> + */ + +#include <linux/io.h> +#include <linux/irqchip.h> +#include <linux/of_platform.h> +#include <linux/clk-provider.h> +#include <asm/mach/arch.h> +#include <asm/hardware/cache-l2x0.h> + +#include "common.h" + +static const char *const mmp3_dt_board_compat[] __initconst = { + "marvell,mmp3", + NULL, +}; + +DT_MACHINE_START(MMP2_DT, "Marvell MMP3") + .map_io = mmp2_map_io, + .dt_compat = mmp3_dt_board_compat, + .l2c_aux_val = 1 << L310_AUX_CTRL_FWA_SHIFT | + L310_AUX_CTRL_DATA_PREFETCH | + L310_AUX_CTRL_INSTR_PREFETCH, + .l2c_aux_mask = 0xc20fffff, +MACHINE_END diff --git a/arch/arm/mach-mmp/platsmp.c b/arch/arm/mach-mmp/platsmp.c new file mode 100644 index 000000000000..c99405469bb4 --- /dev/null +++ b/arch/arm/mach-mmp/platsmp.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2019 Lubomir Rintel <lkundrak@v3.sk> + */ +#include <linux/io.h> +#include <asm/smp_scu.h> +#include <asm/smp.h> +#include "addr-map.h" + +#define SW_BRANCH_VIRT_ADDR CIU_REG(0x24) + +static int mmp3_boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + /* + * Apparently, the boot ROM on the second core spins on this + * register becoming non-zero and then jumps to the address written + * there. No IPIs involved. + */ + __raw_writel(__pa_symbol(secondary_startup), SW_BRANCH_VIRT_ADDR); + return 0; +} + +static void mmp3_smp_prepare_cpus(unsigned int max_cpus) +{ + scu_enable(SCU_VIRT_BASE); +} + +static const struct smp_operations mmp3_smp_ops __initconst = { + .smp_prepare_cpus = mmp3_smp_prepare_cpus, + .smp_boot_secondary = mmp3_boot_secondary, +}; +CPU_METHOD_OF_DECLARE(mmp3_smp, "marvell,mmp3-smp", &mmp3_smp_ops); diff --git a/arch/arm/mach-mmp/pm-mmp2.c b/arch/arm/mach-mmp/pm-mmp2.c index 2923dd5732a6..2d86381e152d 100644 --- a/arch/arm/mach-mmp/pm-mmp2.c +++ b/arch/arm/mach-mmp/pm-mmp2.c @@ -17,7 +17,7 @@ #include <linux/interrupt.h> #include <asm/mach-types.h> -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "addr-map.h" #include "pm-mmp2.h" #include "regs-icu.h" diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c index 58535ce206dc..69ebe18ff209 100644 --- a/arch/arm/mach-mmp/pm-pxa910.c +++ b/arch/arm/mach-mmp/pm-pxa910.c @@ -18,7 +18,7 @@ #include <asm/mach-types.h> #include <asm/outercache.h> -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "addr-map.h" #include "pm-pxa910.h" #include "regs-icu.h" diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c index 6e0277488967..b642e900727a 100644 --- a/arch/arm/mach-mmp/pxa168.c +++ b/arch/arm/mach-mmp/pxa168.c @@ -21,7 +21,7 @@ #include "addr-map.h" #include "clock.h" #include "common.h" -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "devices.h" #include "irqs.h" #include "mfp.h" diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c index cba31c758dea..b19a069d9fab 100644 --- a/arch/arm/mach-mmp/pxa910.c +++ b/arch/arm/mach-mmp/pxa910.c @@ -18,7 +18,7 @@ #include <asm/mach/time.h> #include "addr-map.h" #include "regs-apbc.h" -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "irqs.h" #include "mfp.h" #include "devices.h" diff --git a/arch/arm/mach-mmp/regs-usb.h b/arch/arm/mach-mmp/regs-usb.h index d9f08c160154..ed0d1aa0ad6c 100644 --- a/arch/arm/mach-mmp/regs-usb.h +++ b/arch/arm/mach-mmp/regs-usb.h @@ -121,100 +121,6 @@ #define UTMI_OTG_ADDON_OTG_ON (1 << 0) -/* For MMP3 USB Phy */ -#define USB2_PLL_REG0 0x4 -#define USB2_PLL_REG1 0x8 -#define USB2_TX_REG0 0x10 -#define USB2_TX_REG1 0x14 -#define USB2_TX_REG2 0x18 -#define USB2_RX_REG0 0x20 -#define USB2_RX_REG1 0x24 -#define USB2_RX_REG2 0x28 -#define USB2_ANA_REG0 0x30 -#define USB2_ANA_REG1 0x34 -#define USB2_ANA_REG2 0x38 -#define USB2_DIG_REG0 0x3C -#define USB2_DIG_REG1 0x40 -#define USB2_DIG_REG2 0x44 -#define USB2_DIG_REG3 0x48 -#define USB2_TEST_REG0 0x4C -#define USB2_TEST_REG1 0x50 -#define USB2_TEST_REG2 0x54 -#define USB2_CHARGER_REG0 0x58 -#define USB2_OTG_REG0 0x5C -#define USB2_PHY_MON0 0x60 -#define USB2_RESETVE_REG0 0x64 -#define USB2_ICID_REG0 0x78 -#define USB2_ICID_REG1 0x7C - -/* USB2_PLL_REG0 */ -/* This is for Ax stepping */ -#define USB2_PLL_FBDIV_SHIFT_MMP3 0 -#define USB2_PLL_FBDIV_MASK_MMP3 (0xFF << 0) - -#define USB2_PLL_REFDIV_SHIFT_MMP3 8 -#define USB2_PLL_REFDIV_MASK_MMP3 (0xF << 8) - -#define USB2_PLL_VDD12_SHIFT_MMP3 12 -#define USB2_PLL_VDD18_SHIFT_MMP3 14 - -/* This is for B0 stepping */ -#define USB2_PLL_FBDIV_SHIFT_MMP3_B0 0 -#define USB2_PLL_REFDIV_SHIFT_MMP3_B0 9 -#define USB2_PLL_VDD18_SHIFT_MMP3_B0 14 -#define USB2_PLL_FBDIV_MASK_MMP3_B0 0x01FF -#define USB2_PLL_REFDIV_MASK_MMP3_B0 0x3E00 - -#define USB2_PLL_CAL12_SHIFT_MMP3 0 -#define USB2_PLL_CALI12_MASK_MMP3 (0x3 << 0) - -#define USB2_PLL_VCOCAL_START_SHIFT_MMP3 2 - -#define USB2_PLL_KVCO_SHIFT_MMP3 4 -#define USB2_PLL_KVCO_MASK_MMP3 (0x7<<4) - -#define USB2_PLL_ICP_SHIFT_MMP3 8 -#define USB2_PLL_ICP_MASK_MMP3 (0x7<<8) - -#define USB2_PLL_LOCK_BYPASS_SHIFT_MMP3 12 - -#define USB2_PLL_PU_PLL_SHIFT_MMP3 13 -#define USB2_PLL_PU_PLL_MASK (0x1 << 13) - -#define USB2_PLL_READY_MASK_MMP3 (0x1 << 15) - -/* USB2_TX_REG0 */ -#define USB2_TX_IMPCAL_VTH_SHIFT_MMP3 8 -#define USB2_TX_IMPCAL_VTH_MASK_MMP3 (0x7 << 8) - -#define USB2_TX_RCAL_START_SHIFT_MMP3 13 - -/* USB2_TX_REG1 */ -#define USB2_TX_CK60_PHSEL_SHIFT_MMP3 0 -#define USB2_TX_CK60_PHSEL_MASK_MMP3 (0xf << 0) - -#define USB2_TX_AMP_SHIFT_MMP3 4 -#define USB2_TX_AMP_MASK_MMP3 (0x7 << 4) - -#define USB2_TX_VDD12_SHIFT_MMP3 8 -#define USB2_TX_VDD12_MASK_MMP3 (0x3 << 8) - -/* USB2_TX_REG2 */ -#define USB2_TX_DRV_SLEWRATE_SHIFT 10 - -/* USB2_RX_REG0 */ -#define USB2_RX_SQ_THRESH_SHIFT_MMP3 4 -#define USB2_RX_SQ_THRESH_MASK_MMP3 (0xf << 4) - -#define USB2_RX_SQ_LENGTH_SHIFT_MMP3 10 -#define USB2_RX_SQ_LENGTH_MASK_MMP3 (0x3 << 10) - -/* USB2_ANA_REG1*/ -#define USB2_ANA_PU_ANA_SHIFT_MMP3 14 - -/* USB2_OTG_REG0 */ -#define USB2_OTG_PU_OTG_SHIFT_MMP3 3 - /* fsic registers */ #define FSIC_MISC 0x4 #define FSIC_INT 0x28 diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 483df32583be..110dcb3314d1 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -33,7 +33,7 @@ #include "regs-timers.h" #include "regs-apbc.h" #include "irqs.h" -#include "cputype.h" +#include <linux/soc/mmp/cputype.h> #include "clock.h" #define TIMERS_VIRT_BASE TIMERS1_VIRT_BASE @@ -155,7 +155,8 @@ static void __init timer_config(void) __raw_writel(0x0, mmp_timer_base + TMR_CER); /* disable */ - ccr &= (cpu_is_mmp2()) ? (TMR_CCR_CS_0(0) | TMR_CCR_CS_1(0)) : + ccr &= (cpu_is_mmp2() || cpu_is_mmp3()) ? + (TMR_CCR_CS_0(0) | TMR_CCR_CS_1(0)) : (TMR_CCR_CS_0(3) | TMR_CCR_CS_1(3)); __raw_writel(ccr, mmp_timer_base + TMR_CCR); @@ -195,30 +196,17 @@ void __init mmp_timer_init(int irq, unsigned long rate) clockevents_config_and_register(&ckevt, rate, MIN_DELTA, MAX_DELTA); } -#ifdef CONFIG_OF -static const struct of_device_id mmp_timer_dt_ids[] = { - { .compatible = "mrvl,mmp-timer", }, - {} -}; - -void __init mmp_dt_init_timer(void) +static int __init mmp_dt_init_timer(struct device_node *np) { - struct device_node *np; struct clk *clk; int irq, ret; unsigned long rate; - np = of_find_matching_node(NULL, mmp_timer_dt_ids); - if (!np) { - ret = -ENODEV; - goto out; - } - clk = of_clk_get(np, 0); if (!IS_ERR(clk)) { ret = clk_prepare_enable(clk); if (ret) - goto out; + return ret; rate = clk_get_rate(clk) / 2; } else if (cpu_is_pj4()) { rate = 6500000; @@ -227,18 +215,15 @@ void __init mmp_dt_init_timer(void) } irq = irq_of_parse_and_map(np, 0); - if (!irq) { - ret = -EINVAL; - goto out; - } + if (!irq) + return -EINVAL; + mmp_timer_base = of_iomap(np, 0); - if (!mmp_timer_base) { - ret = -ENOMEM; - goto out; - } + if (!mmp_timer_base) + return -ENOMEM; + mmp_timer_init(irq, rate); - return; -out: - pr_err("Failed to get timer from device tree with error:%d\n", ret); + return 0; } -#endif + +TIMER_OF_DECLARE(mmp_timer, "mrvl,mmp-timer", mmp_dt_init_timer); diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig index 2a17dc1d122c..948da556162e 100644 --- a/arch/arm/mach-omap1/Kconfig +++ b/arch/arm/mach-omap1/Kconfig @@ -4,30 +4,25 @@ if ARCH_OMAP1 menu "TI OMAP1 specific features" comment "OMAP Core Type" - depends on ARCH_OMAP1 config ARCH_OMAP730 - depends on ARCH_OMAP1 bool "OMAP730 Based System" select ARCH_OMAP_OTG select CPU_ARM926T select OMAP_MPU_TIMER config ARCH_OMAP850 - depends on ARCH_OMAP1 bool "OMAP850 Based System" select ARCH_OMAP_OTG select CPU_ARM926T config ARCH_OMAP15XX - depends on ARCH_OMAP1 default y bool "OMAP15xx Based System" select CPU_ARM925T select OMAP_MPU_TIMER config ARCH_OMAP16XX - depends on ARCH_OMAP1 bool "OMAP16xx Based System" select ARCH_OMAP_OTG select CPU_ARM926T @@ -35,7 +30,6 @@ config ARCH_OMAP16XX config OMAP_MUX bool "OMAP multiplexing support" - depends on ARCH_OMAP default y help Pin multiplexing support for OMAP boards. If your bootloader @@ -60,25 +54,24 @@ config OMAP_MUX_WARNINGS printed, it's safe to deselect OMAP_MUX for your product. comment "OMAP Board Type" - depends on ARCH_OMAP1 config MACH_OMAP_INNOVATOR bool "TI Innovator" - depends on ARCH_OMAP1 && (ARCH_OMAP15XX || ARCH_OMAP16XX) + depends on ARCH_OMAP15XX || ARCH_OMAP16XX help TI OMAP 1510 or 1610 Innovator board support. Say Y here if you have such a board. config MACH_OMAP_H2 bool "TI H2 Support" - depends on ARCH_OMAP1 && ARCH_OMAP16XX + depends on ARCH_OMAP16XX help TI OMAP 1610/1611B H2 board support. Say Y here if you have such a board. config MACH_OMAP_H3 bool "TI H3 Support" - depends on ARCH_OMAP1 && ARCH_OMAP16XX + depends on ARCH_OMAP16XX help TI OMAP 1710 H3 board support. Say Y here if you have such a board. @@ -91,7 +84,7 @@ config MACH_HERALD config MACH_OMAP_OSK bool "TI OSK Support" - depends on ARCH_OMAP1 && ARCH_OMAP16XX + depends on ARCH_OMAP16XX help TI OMAP 5912 OSK (OMAP Starter Kit) board support. Say Y here if you have such a board. @@ -106,21 +99,21 @@ config OMAP_OSK_MISTRAL config MACH_OMAP_PERSEUS2 bool "TI Perseus2" - depends on ARCH_OMAP1 && ARCH_OMAP730 + depends on ARCH_OMAP730 help Support for TI OMAP 730 Perseus2 board. Say Y here if you have such a board. config MACH_OMAP_FSAMPLE bool "TI F-Sample" - depends on ARCH_OMAP1 && ARCH_OMAP730 + depends on ARCH_OMAP730 help Support for TI OMAP 850 F-Sample board. Say Y here if you have such a board. config MACH_OMAP_PALMTE bool "Palm Tungsten E" - depends on ARCH_OMAP1 && ARCH_OMAP15XX + depends on ARCH_OMAP15XX help Support for the Palm Tungsten E PDA. To boot the kernel, you'll need a PalmOS compatible bootloader; check out @@ -129,7 +122,7 @@ config MACH_OMAP_PALMTE config MACH_OMAP_PALMZ71 bool "Palm Zire71" - depends on ARCH_OMAP1 && ARCH_OMAP15XX + depends on ARCH_OMAP15XX help Support for the Palm Zire71 PDA. To boot the kernel, you'll need a PalmOS compatible bootloader; check out @@ -138,7 +131,7 @@ config MACH_OMAP_PALMZ71 config MACH_OMAP_PALMTT bool "Palm Tungsten|T" - depends on ARCH_OMAP1 && ARCH_OMAP15XX + depends on ARCH_OMAP15XX help Support for the Palm Tungsten|T PDA. To boot the kernel, you'll need a PalmOS compatible bootloader (Garux); check out @@ -147,7 +140,7 @@ config MACH_OMAP_PALMTT config MACH_SX1 bool "Siemens SX1" - depends on ARCH_OMAP1 && ARCH_OMAP15XX + depends on ARCH_OMAP15XX select I2C help Support for the Siemens SX1 phone. To boot the kernel, @@ -159,14 +152,14 @@ config MACH_SX1 config MACH_NOKIA770 bool "Nokia 770" - depends on ARCH_OMAP1 && ARCH_OMAP16XX + depends on ARCH_OMAP16XX help Support for the Nokia 770 Internet Tablet. Say Y here if you have such a device. config MACH_AMS_DELTA bool "Amstrad E3 (Delta)" - depends on ARCH_OMAP1 && ARCH_OMAP15XX + depends on ARCH_OMAP15XX select FIQ select GPIO_GENERIC_PLATFORM select LEDS_GPIO_REGISTER @@ -178,7 +171,7 @@ config MACH_AMS_DELTA config MACH_OMAP_GENERIC bool "Generic OMAP board" - depends on ARCH_OMAP1 && (ARCH_OMAP15XX || ARCH_OMAP16XX) + depends on ARCH_OMAP15XX || ARCH_OMAP16XX help Support for generic OMAP-1510, 1610 or 1710 board with no FPGA. Can be used as template for porting Linux to diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 0254eb9cf8c6..4eea3e39e633 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -110,7 +110,7 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip, /* * FIQ handler takes full control over serio data and clk GPIO - * pins. Initiaize them and keep requested so nobody can + * pins. Initialize them and keep requested so nobody can * interfere. Fail if any of those two couldn't be requested. */ switch (i) { diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 8f208197988f..f07cfda85156 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -29,6 +29,11 @@ obj-y += mcbsp.o endif obj-$(CONFIG_TWL4030_CORE) += omap_twl.o + +ifneq ($(CONFIG_MFD_CPCAP),) +obj-y += pmic-cpcap.o +endif + obj-$(CONFIG_SOC_HAS_OMAP2_SDRC) += sdrc.o # SMP support ONLY available for OMAP4 @@ -216,9 +221,6 @@ obj-$(CONFIG_MACH_NOKIA_N8X0) += board-n8x0.o # Platform specific device init code -omap-hsmmc-$(CONFIG_MMC_OMAP_HS) := hsmmc.o -obj-y += $(omap-hsmmc-m) $(omap-hsmmc-y) - obj-y += omap_phy_internal.o obj-$(CONFIG_MACH_OMAP2_TUSB6010) += usb-tusb6010.o diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c index f98c8ecc9ca2..dedd47e30b98 100644 --- a/arch/arm/mach-omap2/clockdomain.c +++ b/arch/arm/mach-omap2/clockdomain.c @@ -1147,7 +1147,21 @@ void clkdm_del_autodeps(struct clockdomain *clkdm) /* Clockdomain-to-clock/hwmod framework interface code */ -static int _clkdm_clk_hwmod_enable(struct clockdomain *clkdm) +/** + * clkdm_clk_enable - add an enabled downstream clock to this clkdm + * @clkdm: struct clockdomain * + * @clk: struct clk * of the enabled downstream clock + * + * Increment the usecount of the clockdomain @clkdm and ensure that it + * is awake before @clk is enabled. Intended to be called by + * clk_enable() code. If the clockdomain is in software-supervised + * idle mode, force the clockdomain to wake. If the clockdomain is in + * hardware-supervised idle mode, add clkdm-pwrdm autodependencies, to + * ensure that devices in the clockdomain can be read from/written to + * by on-chip processors. Returns -EINVAL if passed null pointers; + * returns 0 upon success or if the clockdomain is in hwsup idle mode. + */ +int clkdm_clk_enable(struct clockdomain *clkdm, struct clk *unused) { if (!clkdm || !arch_clkdm || !arch_clkdm->clkdm_clk_enable) return -EINVAL; @@ -1175,33 +1189,6 @@ static int _clkdm_clk_hwmod_enable(struct clockdomain *clkdm) } /** - * clkdm_clk_enable - add an enabled downstream clock to this clkdm - * @clkdm: struct clockdomain * - * @clk: struct clk * of the enabled downstream clock - * - * Increment the usecount of the clockdomain @clkdm and ensure that it - * is awake before @clk is enabled. Intended to be called by - * clk_enable() code. If the clockdomain is in software-supervised - * idle mode, force the clockdomain to wake. If the clockdomain is in - * hardware-supervised idle mode, add clkdm-pwrdm autodependencies, to - * ensure that devices in the clockdomain can be read from/written to - * by on-chip processors. Returns -EINVAL if passed null pointers; - * returns 0 upon success or if the clockdomain is in hwsup idle mode. - */ -int clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk) -{ - /* - * XXX Rewrite this code to maintain a list of enabled - * downstream clocks for debugging purposes? - */ - - if (!clk) - return -EINVAL; - - return _clkdm_clk_hwmod_enable(clkdm); -} - -/** * clkdm_clk_disable - remove an enabled downstream clock from this clkdm * @clkdm: struct clockdomain * * @clk: struct clk * of the disabled downstream clock @@ -1216,13 +1203,13 @@ int clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk) */ int clkdm_clk_disable(struct clockdomain *clkdm, struct clk *clk) { - if (!clkdm || !clk || !arch_clkdm || !arch_clkdm->clkdm_clk_disable) + if (!clkdm || !arch_clkdm || !arch_clkdm->clkdm_clk_disable) return -EINVAL; pwrdm_lock(clkdm->pwrdm.ptr); /* corner case: disabling unused clocks */ - if ((__clk_get_enable_count(clk) == 0) && clkdm->usecount == 0) + if (clk && (__clk_get_enable_count(clk) == 0) && clkdm->usecount == 0) goto ccd_exit; if (clkdm->usecount == 0) { @@ -1277,7 +1264,7 @@ int clkdm_hwmod_enable(struct clockdomain *clkdm, struct omap_hwmod *oh) if (!oh) return -EINVAL; - return _clkdm_clk_hwmod_enable(clkdm); + return clkdm_clk_enable(clkdm, NULL); } /** @@ -1300,35 +1287,10 @@ int clkdm_hwmod_disable(struct clockdomain *clkdm, struct omap_hwmod *oh) if (cpu_is_omap24xx() || cpu_is_omap34xx()) return 0; - /* - * XXX Rewrite this code to maintain a list of enabled - * downstream hwmods for debugging purposes? - */ - - if (!clkdm || !oh || !arch_clkdm || !arch_clkdm->clkdm_clk_disable) + if (!oh) return -EINVAL; - pwrdm_lock(clkdm->pwrdm.ptr); - - if (clkdm->usecount == 0) { - pwrdm_unlock(clkdm->pwrdm.ptr); - WARN_ON(1); /* underflow */ - return -ERANGE; - } - - clkdm->usecount--; - if (clkdm->usecount > 0) { - pwrdm_unlock(clkdm->pwrdm.ptr); - return 0; - } - - arch_clkdm->clkdm_clk_disable(clkdm); - pwrdm_state_switch_nolock(clkdm->pwrdm.ptr); - pwrdm_unlock(clkdm->pwrdm.ptr); - - pr_debug("clockdomain: %s: disabled\n", clkdm->name); - - return 0; + return clkdm_clk_disable(clkdm, NULL); } /** diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 6316da3623b3..223b37c48389 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -352,7 +352,6 @@ void omap_pcs_legacy_init(int irq, void (*rearm)(void)); struct omap_sdrc_params; extern void omap_sdrc_init(struct omap_sdrc_params *sdrc_cs0, struct omap_sdrc_params *sdrc_cs1); -struct omap2_hsmmc_info; extern void omap_reserve(void); struct omap_hwmod; diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c index c84b5e260617..73338cf80d76 100644 --- a/arch/arm/mach-omap2/control.c +++ b/arch/arm/mach-omap2/control.c @@ -684,7 +684,7 @@ static u32 am33xx_control_vals[ARRAY_SIZE(am43xx_control_reg_offsets)]; * * Save the wkup domain registers */ -void am43xx_control_save_context(void) +static void am43xx_control_save_context(void) { int i; @@ -698,7 +698,7 @@ void am43xx_control_save_context(void) * * Restore the wkup domain registers */ -void am43xx_control_restore_context(void) +static void am43xx_control_restore_context(void) { int i; diff --git a/arch/arm/mach-omap2/control.h b/arch/arm/mach-omap2/control.h index 393b42110511..eceb4b09adb2 100644 --- a/arch/arm/mach-omap2/control.h +++ b/arch/arm/mach-omap2/control.h @@ -195,6 +195,7 @@ #define OMAP44XX_CONTROL_FUSE_MPU_OPP100 0x243 #define OMAP44XX_CONTROL_FUSE_MPU_OPPTURBO 0x246 #define OMAP44XX_CONTROL_FUSE_MPU_OPPNITRO 0x249 +#define OMAP44XX_CONTROL_FUSE_MPU_OPPNITROSB 0x24C #define OMAP44XX_CONTROL_FUSE_CORE_OPP50 0x254 #define OMAP44XX_CONTROL_FUSE_CORE_OPP100 0x257 #define OMAP44XX_CONTROL_FUSE_CORE_OPP100OV 0x25A diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 439e143cad7b..46012ca812f4 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -265,6 +265,7 @@ static int __init omapdss_init_of(void) r = of_platform_populate(node, NULL, NULL, &pdev->dev); if (r) { pr_err("Unable to populate DSS submodule devices\n"); + put_device(&pdev->dev); return r; } diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c deleted file mode 100644 index 63423ea6a240..000000000000 --- a/arch/arm/mach-omap2/hsmmc.c +++ /dev/null @@ -1,171 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/arch/arm/mach-omap2/hsmmc.c - * - * Copyright (C) 2007-2008 Texas Instruments - * Copyright (C) 2008 Nokia Corporation - * Author: Texas Instruments - */ -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/delay.h> -#include <linux/mmc/host.h> -#include <linux/platform_data/hsmmc-omap.h> - -#include "soc.h" -#include "omap_device.h" - -#include "hsmmc.h" -#include "control.h" - -#if IS_ENABLED(CONFIG_MMC_OMAP_HS) - -static u16 control_pbias_offset; -static u16 control_devconf1_offset; - -#define HSMMC_NAME_LEN 9 - -static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c, - struct omap_hsmmc_platform_data *mmc) -{ - char *hc_name; - - hc_name = kzalloc(HSMMC_NAME_LEN + 1, GFP_KERNEL); - if (!hc_name) - return -ENOMEM; - - snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", c->mmc, 1); - mmc->name = hc_name; - mmc->caps = c->caps; - mmc->reg_offset = 0; - - return 0; -} - -static int omap_hsmmc_done; - -void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) -{ - struct platform_device *pdev; - int res; - - if (omap_hsmmc_done) - return; - - omap_hsmmc_done = 1; - - for (; c->mmc; c++) { - pdev = c->pdev; - if (!pdev) - continue; - res = omap_device_register(pdev); - if (res) - pr_err("Could not late init MMC\n"); - } -} - -#define MAX_OMAP_MMC_HWMOD_NAME_LEN 16 - -static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo, - int ctrl_nr) -{ - struct omap_hwmod *oh; - struct omap_hwmod *ohs[1]; - struct omap_device *od; - struct platform_device *pdev; - char oh_name[MAX_OMAP_MMC_HWMOD_NAME_LEN]; - struct omap_hsmmc_platform_data *mmc_data; - struct omap_hsmmc_dev_attr *mmc_dev_attr; - char *name; - int res; - - mmc_data = kzalloc(sizeof(*mmc_data), GFP_KERNEL); - if (!mmc_data) - return; - - res = omap_hsmmc_pdata_init(hsmmcinfo, mmc_data); - if (res < 0) - goto free_mmc; - - name = "omap_hsmmc"; - res = snprintf(oh_name, MAX_OMAP_MMC_HWMOD_NAME_LEN, - "mmc%d", ctrl_nr); - WARN(res >= MAX_OMAP_MMC_HWMOD_NAME_LEN, - "String buffer overflow in MMC%d device setup\n", ctrl_nr); - - oh = omap_hwmod_lookup(oh_name); - if (!oh) { - pr_err("Could not look up %s\n", oh_name); - goto free_name; - } - ohs[0] = oh; - if (oh->dev_attr != NULL) { - mmc_dev_attr = oh->dev_attr; - mmc_data->controller_flags = mmc_dev_attr->flags; - } - - pdev = platform_device_alloc(name, ctrl_nr - 1); - if (!pdev) { - pr_err("Could not allocate pdev for %s\n", name); - goto free_name; - } - dev_set_name(&pdev->dev, "%s.%d", pdev->name, pdev->id); - - od = omap_device_alloc(pdev, ohs, 1); - if (IS_ERR(od)) { - pr_err("Could not allocate od for %s\n", name); - goto put_pdev; - } - - res = platform_device_add_data(pdev, mmc_data, - sizeof(struct omap_hsmmc_platform_data)); - if (res) { - pr_err("Could not add pdata for %s\n", name); - goto put_pdev; - } - - hsmmcinfo->pdev = pdev; - - res = omap_device_register(pdev); - if (res) { - pr_err("Could not register od for %s\n", name); - goto free_od; - } - - goto free_mmc; - -free_od: - omap_device_delete(od); - -put_pdev: - platform_device_put(pdev); - -free_name: - kfree(mmc_data->name); - -free_mmc: - kfree(mmc_data); -} - -void __init omap_hsmmc_init(struct omap2_hsmmc_info *controllers) -{ - if (omap_hsmmc_done) - return; - - omap_hsmmc_done = 1; - - if (cpu_is_omap2430()) { - control_pbias_offset = OMAP243X_CONTROL_PBIAS_LITE; - control_devconf1_offset = OMAP243X_CONTROL_DEVCONF1; - } else { - control_pbias_offset = OMAP343X_CONTROL_PBIAS_LITE; - control_devconf1_offset = OMAP343X_CONTROL_DEVCONF1; - } - - for (; controllers->mmc; controllers++) - omap_hsmmc_init_one(controllers, controllers->mmc); - -} - -#endif diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h deleted file mode 100644 index 76c5ed2afa72..000000000000 --- a/arch/arm/mach-omap2/hsmmc.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * MMC definitions for OMAP2 - */ - -struct mmc_card; - -struct omap2_hsmmc_info { - u8 mmc; /* controller 1/2/3 */ - u32 caps; /* 4/8 wires and any additional host - * capabilities OR'd (ref. linux/mmc/host.h) */ - struct platform_device *pdev; /* mmc controller instance */ - /* init some special card */ - void (*init_card)(struct mmc_card *card); -}; - -#if IS_ENABLED(CONFIG_MMC_OMAP_HS) - -void omap_hsmmc_init(struct omap2_hsmmc_info *); -void omap_hsmmc_late_init(struct omap2_hsmmc_info *); - -#else - -static inline void omap_hsmmc_init(struct omap2_hsmmc_info *info) -{ -} - -static inline void omap_hsmmc_late_init(struct omap2_hsmmc_info *info) -{ -} - -#endif diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c index 2d8f90546591..67fa28532a3a 100644 --- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c +++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c @@ -227,7 +227,6 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) { struct omap4_cpu_pm_info *pm_info = &per_cpu(omap4_pm_info, cpu); unsigned int save_state = 0, cpu_logic_state = PWRDM_POWER_RET; - unsigned int wakeup_cpu; if (omap_rev() == OMAP4430_REV_ES1_0) return -ENXIO; @@ -292,7 +291,6 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state) * secure devices, CPUx does WFI which can result in * domain transition */ - wakeup_cpu = smp_processor_id(); pwrdm_set_next_pwrst(pm_info->pwrdm, PWRDM_POWER_ON); pwrdm_post_transition(NULL); diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 3acb4192918d..1d55602b3f8f 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -119,11 +119,7 @@ static void _add_hwmod_clocks_clkdev(struct omap_device *od, /** * omap_device_build_from_dt - build an omap_device with multiple hwmods - * @pdev_name: name of the platform_device driver to use - * @pdev_id: this platform_device's connection ID - * @oh: ptr to the single omap_hwmod that backs this omap_device - * @pdata: platform_data ptr to associate with the platform_device - * @pdata_len: amount of memory pointed to by @pdata + * @pdev: The platform device to update. * * Function for building an omap_device already registered from device-tree * @@ -292,7 +288,7 @@ static int _omap_device_idle_hwmods(struct omap_device *od) /** * omap_device_get_context_loss_count - get lost context count - * @od: struct omap_device * + * @pdev: The platform device to update. * * Using the primary hwmod, query the context loss count for this * device. @@ -321,9 +317,8 @@ int omap_device_get_context_loss_count(struct platform_device *pdev) /** * omap_device_alloc - allocate an omap_device * @pdev: platform_device that will be included in this omap_device - * @oh: ptr to the single omap_hwmod that backs this omap_device - * @pdata: platform_data ptr to associate with the platform_device - * @pdata_len: amount of memory pointed to by @pdata + * @ohs: ptr to the omap_hwmod for this omap_device + * @oh_cnt: the size of the ohs list * * Convenience function for allocating an omap_device structure and filling * hwmods, and resources. @@ -649,7 +644,7 @@ struct dev_pm_domain omap_device_pm_domain = { /** * omap_device_register - register an omap_device with one omap_hwmod - * @od: struct omap_device * to register + * @pdev: the platform device (omap_device) to register. * * Register the omap_device structure. This currently just calls * platform_device_register() on the underlying platform_device. @@ -668,7 +663,7 @@ int omap_device_register(struct platform_device *pdev) /** * omap_device_enable - fully activate an omap_device - * @od: struct omap_device * to activate + * @pdev: the platform device to activate * * Do whatever is necessary for the hwmods underlying omap_device @od * to be accessible and ready to operate. This generally involves @@ -702,7 +697,7 @@ int omap_device_enable(struct platform_device *pdev) /** * omap_device_idle - idle an omap_device - * @od: struct omap_device * to idle + * @pdev: The platform_device (omap_device) to idle * * Idle omap_device @od. Device drivers call this function indirectly * via pm_runtime_put*(). Returns -EINVAL if the omap_device is not diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 203664c40d3d..a136788db839 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -623,39 +623,6 @@ static int _enable_wakeup(struct omap_hwmod *oh, u32 *v) return 0; } -/** - * _disable_wakeup: clear OCP_SYSCONFIG.ENAWAKEUP bit in the hardware - * @oh: struct omap_hwmod * - * - * Prevent the hardware module @oh to send wakeups. Returns -EINVAL - * upon error or 0 upon success. - */ -static int _disable_wakeup(struct omap_hwmod *oh, u32 *v) -{ - if (!oh->class->sysc || - !((oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP) || - (oh->class->sysc->idlemodes & SIDLE_SMART_WKUP) || - (oh->class->sysc->idlemodes & MSTANDBY_SMART_WKUP))) - return -EINVAL; - - if (!oh->class->sysc->sysc_fields) { - WARN(1, "omap_hwmod: %s: offset struct for sysconfig not provided in class\n", oh->name); - return -EINVAL; - } - - if (oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP) - *v &= ~(0x1 << oh->class->sysc->sysc_fields->enwkup_shift); - - if (oh->class->sysc->idlemodes & SIDLE_SMART_WKUP) - _set_slave_idlemode(oh, HWMOD_IDLEMODE_SMART, v); - if (oh->class->sysc->idlemodes & MSTANDBY_SMART_WKUP) - _set_master_standbymode(oh, HWMOD_IDLEMODE_SMART, v); - - /* XXX test pwrdm_get_wken for this hwmod's subsystem */ - - return 0; -} - static struct clockdomain *_get_clkdm(struct omap_hwmod *oh) { struct clk_hw_omap *clk; @@ -3868,70 +3835,6 @@ void __iomem *omap_hwmod_get_mpu_rt_va(struct omap_hwmod *oh) */ /** - * omap_hwmod_enable_wakeup - allow device to wake up the system - * @oh: struct omap_hwmod * - * - * Sets the module OCP socket ENAWAKEUP bit to allow the module to - * send wakeups to the PRCM, and enable I/O ring wakeup events for - * this IP block if it has dynamic mux entries. Eventually this - * should set PRCM wakeup registers to cause the PRCM to receive - * wakeup events from the module. Does not set any wakeup routing - * registers beyond this point - if the module is to wake up any other - * module or subsystem, that must be set separately. Called by - * omap_device code. Returns -EINVAL on error or 0 upon success. - */ -int omap_hwmod_enable_wakeup(struct omap_hwmod *oh) -{ - unsigned long flags; - u32 v; - - spin_lock_irqsave(&oh->_lock, flags); - - if (oh->class->sysc && - (oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP)) { - v = oh->_sysc_cache; - _enable_wakeup(oh, &v); - _write_sysconfig(v, oh); - } - - spin_unlock_irqrestore(&oh->_lock, flags); - - return 0; -} - -/** - * omap_hwmod_disable_wakeup - prevent device from waking the system - * @oh: struct omap_hwmod * - * - * Clears the module OCP socket ENAWAKEUP bit to prevent the module - * from sending wakeups to the PRCM, and disable I/O ring wakeup - * events for this IP block if it has dynamic mux entries. Eventually - * this should clear PRCM wakeup registers to cause the PRCM to ignore - * wakeup events from the module. Does not set any wakeup routing - * registers beyond this point - if the module is to wake up any other - * module or subsystem, that must be set separately. Called by - * omap_device code. Returns -EINVAL on error or 0 upon success. - */ -int omap_hwmod_disable_wakeup(struct omap_hwmod *oh) -{ - unsigned long flags; - u32 v; - - spin_lock_irqsave(&oh->_lock, flags); - - if (oh->class->sysc && - (oh->class->sysc->sysc_flags & SYSC_HAS_ENAWAKEUP)) { - v = oh->_sysc_cache; - _disable_wakeup(oh, &v); - _write_sysconfig(v, oh); - } - - spin_unlock_irqrestore(&oh->_lock, flags); - - return 0; -} - -/** * omap_hwmod_assert_hardreset - assert the HW reset line of submodules * contained in the hwmod module. * @oh: struct omap_hwmod * diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index ef1bb08b1a2d..2d0fd99d4713 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -646,9 +646,6 @@ int omap_hwmod_get_resource_byname(struct omap_hwmod *oh, unsigned int type, struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh); void __iomem *omap_hwmod_get_mpu_rt_va(struct omap_hwmod *oh); -int omap_hwmod_enable_wakeup(struct omap_hwmod *oh); -int omap_hwmod_disable_wakeup(struct omap_hwmod *oh); - int omap_hwmod_for_each_by_class(const char *classname, int (*fn)(struct omap_hwmod *oh, void *user), diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index b4a7b49746fc..292f544bd62d 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -790,7 +790,7 @@ static struct omap_hwmod_class omap44xx_sha0_hwmod_class = { .sysc = &omap44xx_sha0_sysc, }; -struct omap_hwmod omap44xx_sha0_hwmod = { +static struct omap_hwmod omap44xx_sha0_hwmod = { .name = "sham", .class = &omap44xx_sha0_hwmod_class, .clkdm_name = "l4_secure_clkdm", @@ -974,7 +974,7 @@ static struct omap_hwmod omap44xx_des_hwmod = { }, }; -struct omap_hwmod_ocp_if omap44xx_l3_main_2__des = { +static struct omap_hwmod_ocp_if omap44xx_l3_main_2__des = { .master = &omap44xx_l3_main_2_hwmod, .slave = &omap44xx_des_hwmod, .clk = "l3_div_ck", diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index ce5a45c9e2c2..f8715bd96687 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -682,7 +682,7 @@ static struct omap_hwmod_class dra7xx_sha0_hwmod_class = { .sysc = &dra7xx_sha0_sysc, }; -struct omap_hwmod dra7xx_sha0_hwmod = { +static struct omap_hwmod dra7xx_sha0_hwmod = { .name = "sham", .class = &dra7xx_sha0_hwmod_class, .clkdm_name = "l4sec_clkdm", diff --git a/arch/arm/mach-omap2/omap_twl.c b/arch/arm/mach-omap2/omap_twl.c index 6787f1e72c6b..a642d3b39e50 100644 --- a/arch/arm/mach-omap2/omap_twl.c +++ b/arch/arm/mach-omap2/omap_twl.c @@ -36,11 +36,6 @@ #define OMAP4_VDD_CORE_SR_VOLT_REG 0x61 #define OMAP4_VDD_CORE_SR_CMD_REG 0x62 -#define OMAP4_VP_CONFIG_ERROROFFSET 0x00 -#define OMAP4_VP_VSTEPMIN_VSTEPMIN 0x01 -#define OMAP4_VP_VSTEPMAX_VSTEPMAX 0x04 -#define OMAP4_VP_VLIMITTO_TIMEOUT_US 200 - static bool is_offset_valid; static u8 smps_offset; @@ -219,7 +214,8 @@ int __init omap4_twl_init(void) { struct voltagedomain *voltdm; - if (!cpu_is_omap44xx()) + if (!cpu_is_omap44xx() || + of_find_compatible_node(NULL, NULL, "motorola,cpcap")) return -ENODEV; voltdm = voltdm_lookup("mpu"); diff --git a/arch/arm/mach-omap2/opp4xxx_data.c b/arch/arm/mach-omap2/opp4xxx_data.c index adea43ea1c60..985aeab9bc2a 100644 --- a/arch/arm/mach-omap2/opp4xxx_data.c +++ b/arch/arm/mach-omap2/opp4xxx_data.c @@ -32,20 +32,22 @@ #define OMAP4430_VDD_MPU_OPP50_UV 1025000 #define OMAP4430_VDD_MPU_OPP100_UV 1200000 -#define OMAP4430_VDD_MPU_OPPTURBO_UV 1313000 -#define OMAP4430_VDD_MPU_OPPNITRO_UV 1375000 +#define OMAP4430_VDD_MPU_OPPTURBO_UV 1325000 +#define OMAP4430_VDD_MPU_OPPNITRO_UV 1388000 +#define OMAP4430_VDD_MPU_OPPNITROSB_UV 1398000 struct omap_volt_data omap443x_vdd_mpu_volt_data[] = { VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPP50_UV, OMAP44XX_CONTROL_FUSE_MPU_OPP50, 0xf4, 0x0c), VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPP100_UV, OMAP44XX_CONTROL_FUSE_MPU_OPP100, 0xf9, 0x16), VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPPTURBO_UV, OMAP44XX_CONTROL_FUSE_MPU_OPPTURBO, 0xfa, 0x23), VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPPNITRO_UV, OMAP44XX_CONTROL_FUSE_MPU_OPPNITRO, 0xfa, 0x27), + VOLT_DATA_DEFINE(OMAP4430_VDD_MPU_OPPNITROSB_UV, OMAP44XX_CONTROL_FUSE_MPU_OPPNITROSB, 0xfa, 0x27), VOLT_DATA_DEFINE(0, 0, 0, 0), }; -#define OMAP4430_VDD_IVA_OPP50_UV 1013000 -#define OMAP4430_VDD_IVA_OPP100_UV 1188000 -#define OMAP4430_VDD_IVA_OPPTURBO_UV 1300000 +#define OMAP4430_VDD_IVA_OPP50_UV 950000 +#define OMAP4430_VDD_IVA_OPP100_UV 1114000 +#define OMAP4430_VDD_IVA_OPPTURBO_UV 1291000 struct omap_volt_data omap443x_vdd_iva_volt_data[] = { VOLT_DATA_DEFINE(OMAP4430_VDD_IVA_OPP50_UV, OMAP44XX_CONTROL_FUSE_IVA_OPP50, 0xf4, 0x0c), @@ -54,8 +56,8 @@ struct omap_volt_data omap443x_vdd_iva_volt_data[] = { VOLT_DATA_DEFINE(0, 0, 0, 0), }; -#define OMAP4430_VDD_CORE_OPP50_UV 1025000 -#define OMAP4430_VDD_CORE_OPP100_UV 1200000 +#define OMAP4430_VDD_CORE_OPP50_UV 962000 +#define OMAP4430_VDD_CORE_OPP100_UV 1127000 struct omap_volt_data omap443x_vdd_core_volt_data[] = { VOLT_DATA_DEFINE(OMAP4430_VDD_CORE_OPP50_UV, OMAP44XX_CONTROL_FUSE_CORE_OPP50, 0xf4, 0x0c), diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index d942a3357090..ca52271de5a8 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -7,11 +7,9 @@ #include <linux/clk.h> #include <linux/davinci_emac.h> #include <linux/gpio.h> -#include <linux/gpio/machine.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/of_platform.h> -#include <linux/ti_wilink_st.h> #include <linux/wl12xx.h> #include <linux/mmc/card.h> #include <linux/mmc/host.h> @@ -33,7 +31,6 @@ #include "omap_device.h" #include "omap-secure.h" #include "soc.h" -#include "hsmmc.h" static struct omap_hsmmc_platform_data __maybe_unused mmc_pdata[2]; @@ -89,6 +86,13 @@ static struct iommu_platform_data omap3_iommu_pdata = { .reset_name = "mmu", .assert_reset = omap_device_assert_hardreset, .deassert_reset = omap_device_deassert_hardreset, + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, +}; + +static struct iommu_platform_data omap3_iommu_isp_pdata = { + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, }; static int omap3_sbc_t3730_twl_callback(struct device *dev, @@ -139,53 +143,6 @@ static void __init omap3_sbc_t3530_legacy_init(void) omap3_sbc_t3x_usb_hub_init(167, "sb-t35 usb hub"); } -static struct ti_st_plat_data wilink_pdata = { - .nshutdown_gpio = 137, - .dev_name = "/dev/ttyO1", - .flow_cntrl = 1, - .baud_rate = 300000, -}; - -static struct platform_device wl18xx_device = { - .name = "kim", - .id = -1, - .dev = { - .platform_data = &wilink_pdata, - } -}; - -static struct ti_st_plat_data wilink7_pdata = { - .nshutdown_gpio = 162, - .dev_name = "/dev/ttyO1", - .flow_cntrl = 1, - .baud_rate = 3000000, -}; - -static struct platform_device wl128x_device = { - .name = "kim", - .id = -1, - .dev = { - .platform_data = &wilink7_pdata, - } -}; - -static struct platform_device btwilink_device = { - .name = "btwilink", - .id = -1, -}; - -static void __init omap3_igep0020_rev_f_legacy_init(void) -{ - platform_device_register(&wl18xx_device); - platform_device_register(&btwilink_device); -} - -static void __init omap3_igep0030_rev_g_legacy_init(void) -{ - platform_device_register(&wl18xx_device); - platform_device_register(&btwilink_device); -} - static void __init omap3_evm_legacy_init(void) { hsmmc2_internal_input_clk(); @@ -262,21 +219,13 @@ static void __init am3517_evm_legacy_init(void) am35xx_emac_reset(); } -static struct platform_device omap3_rom_rng_device = { - .name = "omap3-rom-rng", - .id = -1, - .dev = { - .platform_data = rx51_secure_rng_call, - }, -}; - static void __init nokia_n900_legacy_init(void) { hsmmc2_internal_input_clk(); mmc_pdata[0].name = "external"; mmc_pdata[1].name = "internal"; - if (omap_type() == OMAP2_DEVICE_TYPE_SEC) { + if (omap_type() != OMAP2_DEVICE_TYPE_GP) { if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) { pr_info("RX-51: Enabling ARM errata 430973 workaround\n"); /* set IBE to 1 */ @@ -285,9 +234,6 @@ static void __init nokia_n900_legacy_init(void) pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n"); pr_warn("Thumb binaries may crash randomly without this workaround\n"); } - - pr_info("RX-51: Registering OMAP3 HWRNG device\n"); - platform_device_register(&omap3_rom_rng_device); } } @@ -299,123 +245,18 @@ static void __init omap3_tao3530_legacy_init(void) static void __init omap3_logicpd_torpedo_init(void) { omap3_gpio126_127_129(); - platform_device_register(&wl128x_device); - platform_device_register(&btwilink_device); } /* omap3pandora legacy devices */ -#define PANDORA_WIFI_IRQ_GPIO 21 -#define PANDORA_WIFI_NRESET_GPIO 23 static struct platform_device pandora_backlight = { .name = "pandora-backlight", .id = -1, }; -static struct regulator_consumer_supply pandora_vmmc3_supply[] = { - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"), -}; - -static struct regulator_init_data pandora_vmmc3 = { - .constraints = { - .valid_ops_mask = REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = ARRAY_SIZE(pandora_vmmc3_supply), - .consumer_supplies = pandora_vmmc3_supply, -}; - -static struct fixed_voltage_config pandora_vwlan = { - .supply_name = "vwlan", - .microvolts = 1800000, /* 1.8V */ - .startup_delay = 50000, /* 50ms */ - .init_data = &pandora_vmmc3, -}; - -static struct platform_device pandora_vwlan_device = { - .name = "reg-fixed-voltage", - .id = 1, - .dev = { - .platform_data = &pandora_vwlan, - }, -}; - -static struct gpiod_lookup_table pandora_vwlan_gpiod_table = { - .dev_id = "reg-fixed-voltage.1", - .table = { - /* - * As this is a low GPIO number it should be at the first - * GPIO bank. - */ - GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO, - NULL, GPIO_ACTIVE_HIGH), - { }, - }, -}; - -static void pandora_wl1251_init_card(struct mmc_card *card) -{ - /* - * We have TI wl1251 attached to MMC3. Pass this information to - * SDIO core because it can't be probed by normal methods. - */ - if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) { - card->quirks |= MMC_QUIRK_NONSTD_SDIO; - card->cccr.wide_bus = 1; - card->cis.vendor = 0x104c; - card->cis.device = 0x9066; - card->cis.blksize = 512; - card->cis.max_dtr = 24000000; - card->ocr = 0x80; - } -} - -static struct omap2_hsmmc_info pandora_mmc3[] = { - { - .mmc = 3, - .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, - .init_card = pandora_wl1251_init_card, - }, - {} /* Terminator */ -}; - -static void __init pandora_wl1251_init(void) -{ - struct wl1251_platform_data pandora_wl1251_pdata; - int ret; - - memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); - - pandora_wl1251_pdata.power_gpio = -1; - - ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); - if (ret < 0) - goto fail; - - pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO); - if (pandora_wl1251_pdata.irq < 0) - goto fail_irq; - - pandora_wl1251_pdata.use_eeprom = true; - ret = wl1251_set_platform_data(&pandora_wl1251_pdata); - if (ret < 0) - goto fail_irq; - - return; - -fail_irq: - gpio_free(PANDORA_WIFI_IRQ_GPIO); -fail: - pr_err("wl1251 board initialisation failed\n"); -} - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - gpiod_add_lookup_table(&pandora_vwlan_gpiod_table); - platform_device_register(&pandora_vwlan_device); - omap_hsmmc_init(pandora_mmc3); - omap_hsmmc_late_init(pandora_mmc3); - pandora_wl1251_init(); } #endif /* CONFIG_ARCH_OMAP3 */ @@ -424,6 +265,8 @@ static struct iommu_platform_data omap4_iommu_pdata = { .reset_name = "mmu_cache", .assert_reset = omap_device_assert_hardreset, .deassert_reset = omap_device_deassert_hardreset, + .device_enable = omap_device_enable, + .device_idle = omap_device_idle, }; #endif @@ -617,6 +460,8 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu", &omap3_iommu_pdata), + OF_DEV_AUXDATA("ti,omap2-iommu", 0x480bd400, "480bd400.mmu", + &omap3_iommu_isp_pdata), OF_DEV_AUXDATA("ti,omap3-smartreflex-core", 0x480cb000, "480cb000.smartreflex", &omap_sr_pdata[OMAP_SR_CORE]), OF_DEV_AUXDATA("ti,omap3-smartreflex-mpu-iva", 0x480c9000, @@ -627,6 +472,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL), OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0", &am35xx_emac_pdata), + OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call), /* McBSP modules with sidetone core */ #if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP) OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata), @@ -679,8 +525,6 @@ static struct pdata_init pdata_quirks[] __initdata = { { "nokia,omap3-n900", nokia_n900_legacy_init, }, { "nokia,omap3-n9", hsmmc2_internal_input_clk, }, { "nokia,omap3-n950", hsmmc2_internal_input_clk, }, - { "isee,omap3-igep0020-rev-f", omap3_igep0020_rev_f_legacy_init, }, - { "isee,omap3-igep0030-rev-g", omap3_igep0030_rev_g_legacy_init, }, { "logicpd,dm3730-torpedo-devkit", omap3_logicpd_torpedo_init, }, { "ti,omap3-evm-37xx", omap3_evm_legacy_init, }, { "ti,am3517-evm", am3517_evm_legacy_init, }, diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 7ac9af56762d..01ec1ba4878b 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -148,6 +148,7 @@ int __init omap2_common_pm_late_init(void) /* Init the voltage layer */ omap3_twl_init(); omap4_twl_init(); + omap4_cpcap_init(); omap_voltage_late_init(); /* Smartreflex device init */ diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 8a55b69bca63..2a883a0c1fcd 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -107,6 +107,11 @@ extern u16 pm44xx_errata; #define IS_PM44XX_ERRATUM(id) 0 #endif +#define OMAP4_VP_CONFIG_ERROROFFSET 0x00 +#define OMAP4_VP_VSTEPMIN_VSTEPMIN 0x01 +#define OMAP4_VP_VSTEPMAX_VSTEPMAX 0x04 +#define OMAP4_VP_VLIMITTO_TIMEOUT_US 200 + #ifdef CONFIG_POWER_AVS_OMAP extern int omap_devinit_smartreflex(void); extern void omap_enable_smartreflex_on_init(void); @@ -134,6 +139,15 @@ static inline int omap4_twl_init(void) } #endif +#if IS_ENABLED(CONFIG_MFD_CPCAP) +extern int omap4_cpcap_init(void); +#else +static inline int omap4_cpcap_init(void) +{ + return -EINVAL; +} +#endif + #ifdef CONFIG_PM extern void omap_pm_setup_oscillator(u32 tstart, u32 tshut); extern void omap_pm_get_oscillator(u32 *tstart, u32 *tshut); diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index 485550af2506..5a7a949ae965 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -128,18 +128,9 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused) return 0; } - /* - * Bootloader or kexec boot may have LOGICRETSTATE cleared - * for some domains. This is the case when kexec booting from - * Android kernels that support off mode for example. - * Make sure it's set at least for core and per, otherwise - * we currently will see lost GPIO interrupts for wlcore and - * smsc911x at least if per hits retention during idle. - */ if (!strncmp(pwrdm->name, "core", 4) || - !strncmp(pwrdm->name, "l4per", 5) || - !strncmp(pwrdm->name, "wkup", 4)) - pwrdm_set_logic_retst(pwrdm, PWRDM_POWER_RET); + !strncmp(pwrdm->name, "l4per", 5)) + pwrdm_set_logic_retst(pwrdm, PWRDM_POWER_OFF); pwrst = kmalloc(sizeof(struct power_state), GFP_ATOMIC); if (!pwrst) diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c new file mode 100644 index 000000000000..eab281a5fc9f --- /dev/null +++ b/arch/arm/mach-omap2/pmic-cpcap.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * pmic-cpcap.c - CPCAP-specific functions for the OPP code + * + * Adapted from Motorola Mapphone Android Linux kernel + * Copyright (C) 2011 Motorola, Inc. + */ + +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> + +#include "soc.h" +#include "pm.h" +#include "voltage.h" + +#include <linux/init.h> +#include "vc.h" + +/** + * omap_cpcap_vsel_to_vdc - convert CPCAP VSEL value to microvolts DC + * @vsel: CPCAP VSEL value to convert + * + * Returns the microvolts DC that the CPCAP PMIC should generate when + * programmed with @vsel. + */ +static unsigned long omap_cpcap_vsel_to_uv(unsigned char vsel) +{ + if (vsel > 0x44) + vsel = 0x44; + return (((vsel * 125) + 6000)) * 100; +} + +/** + * omap_cpcap_uv_to_vsel - convert microvolts DC to CPCAP VSEL value + * @uv: microvolts DC to convert + * + * Returns the VSEL value necessary for the CPCAP PMIC to + * generate an output voltage equal to or greater than @uv microvolts DC. + */ +static unsigned char omap_cpcap_uv_to_vsel(unsigned long uv) +{ + if (uv < 600000) + uv = 600000; + else if (uv > 1450000) + uv = 1450000; + return DIV_ROUND_UP(uv - 600000, 12500); +} + +static struct omap_voltdm_pmic omap_cpcap_core = { + .slew_rate = 4000, + .step_size = 12500, + .vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET, + .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, + .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, + .vddmin = 900000, + .vddmax = 1350000, + .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, + .i2c_slave_addr = 0x02, + .volt_reg_addr = 0x00, + .cmd_reg_addr = 0x01, + .i2c_high_speed = false, + .vsel_to_uv = omap_cpcap_vsel_to_uv, + .uv_to_vsel = omap_cpcap_uv_to_vsel, +}; + +static struct omap_voltdm_pmic omap_cpcap_iva = { + .slew_rate = 4000, + .step_size = 12500, + .vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET, + .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, + .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, + .vddmin = 900000, + .vddmax = 1350000, + .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, + .i2c_slave_addr = 0x44, + .volt_reg_addr = 0x0, + .cmd_reg_addr = 0x01, + .i2c_high_speed = false, + .vsel_to_uv = omap_cpcap_vsel_to_uv, + .uv_to_vsel = omap_cpcap_uv_to_vsel, +}; + +/** + * omap_max8952_vsel_to_vdc - convert MAX8952 VSEL value to microvolts DC + * @vsel: MAX8952 VSEL value to convert + * + * Returns the microvolts DC that the MAX8952 Regulator should generate when + * programmed with @vsel. + */ +static unsigned long omap_max8952_vsel_to_uv(unsigned char vsel) +{ + if (vsel > 0x3F) + vsel = 0x3F; + return (((vsel * 100) + 7700)) * 100; +} + +/** + * omap_max8952_uv_to_vsel - convert microvolts DC to MAX8952 VSEL value + * @uv: microvolts DC to convert + * + * Returns the VSEL value necessary for the MAX8952 Regulator to + * generate an output voltage equal to or greater than @uv microvolts DC. + */ +static unsigned char omap_max8952_uv_to_vsel(unsigned long uv) +{ + if (uv < 770000) + uv = 770000; + else if (uv > 1400000) + uv = 1400000; + return DIV_ROUND_UP(uv - 770000, 10000); +} + +static struct omap_voltdm_pmic omap443x_max8952_mpu = { + .slew_rate = 16000, + .step_size = 10000, + .vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET, + .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, + .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, + .vddmin = 900000, + .vddmax = 1400000, + .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, + .i2c_slave_addr = 0x60, + .volt_reg_addr = 0x03, + .cmd_reg_addr = 0x03, + .i2c_high_speed = false, + .vsel_to_uv = omap_max8952_vsel_to_uv, + .uv_to_vsel = omap_max8952_uv_to_vsel, +}; + +/** + * omap_fan5355_vsel_to_vdc - convert FAN535503 VSEL value to microvolts DC + * @vsel: FAN535503 VSEL value to convert + * + * Returns the microvolts DC that the FAN535503 Regulator should generate when + * programmed with @vsel. + */ +static unsigned long omap_fan535503_vsel_to_uv(unsigned char vsel) +{ + /* Extract bits[5:0] */ + vsel &= 0x3F; + + return (((vsel * 125) + 7500)) * 100; +} + +/** + * omap_fan535508_vsel_to_vdc - convert FAN535508 VSEL value to microvolts DC + * @vsel: FAN535508 VSEL value to convert + * + * Returns the microvolts DC that the FAN535508 Regulator should generate when + * programmed with @vsel. + */ +static unsigned long omap_fan535508_vsel_to_uv(unsigned char vsel) +{ + /* Extract bits[5:0] */ + vsel &= 0x3F; + + if (vsel > 0x37) + vsel = 0x37; + return (((vsel * 125) + 7500)) * 100; +} + + +/** + * omap_fan535503_uv_to_vsel - convert microvolts DC to FAN535503 VSEL value + * @uv: microvolts DC to convert + * + * Returns the VSEL value necessary for the MAX8952 Regulator to + * generate an output voltage equal to or greater than @uv microvolts DC. + */ +static unsigned char omap_fan535503_uv_to_vsel(unsigned long uv) +{ + unsigned char vsel; + if (uv < 750000) + uv = 750000; + else if (uv > 1537500) + uv = 1537500; + + vsel = DIV_ROUND_UP(uv - 750000, 12500); + return vsel | 0xC0; +} + +/** + * omap_fan535508_uv_to_vsel - convert microvolts DC to FAN535508 VSEL value + * @uv: microvolts DC to convert + * + * Returns the VSEL value necessary for the MAX8952 Regulator to + * generate an output voltage equal to or greater than @uv microvolts DC. + */ +static unsigned char omap_fan535508_uv_to_vsel(unsigned long uv) +{ + unsigned char vsel; + if (uv < 750000) + uv = 750000; + else if (uv > 1437500) + uv = 1437500; + + vsel = DIV_ROUND_UP(uv - 750000, 12500); + return vsel | 0xC0; +} + +/* fan5335-core */ +static struct omap_voltdm_pmic omap4_fan_core = { + .slew_rate = 4000, + .step_size = 12500, + .vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET, + .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, + .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, + .vddmin = 850000, + .vddmax = 1375000, + .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, + .i2c_slave_addr = 0x4A, + .i2c_high_speed = false, + .volt_reg_addr = 0x01, + .cmd_reg_addr = 0x01, + .vsel_to_uv = omap_fan535508_vsel_to_uv, + .uv_to_vsel = omap_fan535508_uv_to_vsel, +}; + +/* fan5335 iva */ +static struct omap_voltdm_pmic omap4_fan_iva = { + .slew_rate = 4000, + .step_size = 12500, + .vp_erroroffset = OMAP4_VP_CONFIG_ERROROFFSET, + .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, + .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, + .vddmin = 850000, + .vddmax = 1375000, + .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, + .i2c_slave_addr = 0x48, + .volt_reg_addr = 0x01, + .cmd_reg_addr = 0x01, + .i2c_high_speed = false, + .vsel_to_uv = omap_fan535503_vsel_to_uv, + .uv_to_vsel = omap_fan535503_uv_to_vsel, +}; + +int __init omap4_cpcap_init(void) +{ + struct voltagedomain *voltdm; + + if (!of_find_compatible_node(NULL, NULL, "motorola,cpcap")) + return -ENODEV; + + voltdm = voltdm_lookup("mpu"); + omap_voltage_register_pmic(voltdm, &omap443x_max8952_mpu); + + if (of_machine_is_compatible("motorola,droid-bionic")) { + voltdm = voltdm_lookup("mpu"); + omap_voltage_register_pmic(voltdm, &omap_cpcap_core); + + voltdm = voltdm_lookup("mpu"); + omap_voltage_register_pmic(voltdm, &omap_cpcap_iva); + } else { + voltdm = voltdm_lookup("core"); + omap_voltage_register_pmic(voltdm, &omap4_fan_core); + + voltdm = voltdm_lookup("iva"); + omap_voltage_register_pmic(voltdm, &omap4_fan_iva); + } + + return 0; +} + +static int __init cpcap_late_init(void) +{ + omap4_vc_set_pmic_signaling(PWRDM_POWER_RET); + + return 0; +} +omap_late_initcall(cpcap_late_init); diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 1d9346f2a4ae..25093c1e5b9a 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -745,7 +745,7 @@ struct pwrdm_ops omap4_pwrdm_operations = { static int omap44xx_prm_late_init(void); -void prm_save_context(void) +static void prm_save_context(void) { omap_prm_context.irq_enable = omap4_prm_read_inst_reg(AM43XX_PRM_OCP_SOCKET_INST, @@ -756,7 +756,7 @@ void prm_save_context(void) omap4_prcm_irq_setup.pm_ctrl); } -void prm_restore_context(void) +static void prm_restore_context(void) { omap4_prm_write_inst_reg(omap_prm_context.irq_enable, OMAP4430_PRM_OCP_SOCKET_INST, diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c index d76b1e5eb8ba..86f1ac4c2412 100644 --- a/arch/arm/mach-omap2/vc.c +++ b/arch/arm/mach-omap2/vc.c @@ -26,6 +26,31 @@ #include "scrm44xx.h" #include "control.h" +#define OMAP4430_VDD_IVA_I2C_DISABLE BIT(14) +#define OMAP4430_VDD_MPU_I2C_DISABLE BIT(13) +#define OMAP4430_VDD_CORE_I2C_DISABLE BIT(12) +#define OMAP4430_VDD_IVA_PRESENCE BIT(9) +#define OMAP4430_VDD_MPU_PRESENCE BIT(8) +#define OMAP4430_AUTO_CTRL_VDD_IVA(x) ((x) << 4) +#define OMAP4430_AUTO_CTRL_VDD_MPU(x) ((x) << 2) +#define OMAP4430_AUTO_CTRL_VDD_CORE(x) ((x) << 0) +#define OMAP4430_AUTO_CTRL_VDD_RET 2 + +#define OMAP4430_VDD_I2C_DISABLE_MASK \ + (OMAP4430_VDD_IVA_I2C_DISABLE | \ + OMAP4430_VDD_MPU_I2C_DISABLE | \ + OMAP4430_VDD_CORE_I2C_DISABLE) + +#define OMAP4_VDD_DEFAULT_VAL \ + (OMAP4430_VDD_I2C_DISABLE_MASK | \ + OMAP4430_VDD_IVA_PRESENCE | OMAP4430_VDD_MPU_PRESENCE | \ + OMAP4430_AUTO_CTRL_VDD_IVA(OMAP4430_AUTO_CTRL_VDD_RET) | \ + OMAP4430_AUTO_CTRL_VDD_MPU(OMAP4430_AUTO_CTRL_VDD_RET) | \ + OMAP4430_AUTO_CTRL_VDD_CORE(OMAP4430_AUTO_CTRL_VDD_RET)) + +#define OMAP4_VDD_RET_VAL \ + (OMAP4_VDD_DEFAULT_VAL & ~OMAP4430_VDD_I2C_DISABLE_MASK) + /** * struct omap_vc_channel_cfg - describe the cfg_channel bitfield * @sa: bit for slave address @@ -280,6 +305,26 @@ void omap3_vc_set_pmic_signaling(int core_next_state) } } +void omap4_vc_set_pmic_signaling(int core_next_state) +{ + struct voltagedomain *vd = vc.vd; + u32 val; + + if (!vd) + return; + + switch (core_next_state) { + case PWRDM_POWER_RET: + val = OMAP4_VDD_RET_VAL; + break; + default: + val = OMAP4_VDD_DEFAULT_VAL; + break; + } + + vd->write(val, OMAP4_PRM_VOLTCTRL_OFFSET); +} + /* * Configure signal polarity for sys_clkreq and sys_off_mode pins * as the default values are wrong and can cause the system to hang @@ -542,9 +587,19 @@ static void omap4_set_timings(struct voltagedomain *voltdm, bool off_mode) writel_relaxed(val, OMAP4_SCRM_CLKSETUPTIME); } +static void __init omap4_vc_init_pmic_signaling(struct voltagedomain *voltdm) +{ + if (vc.vd) + return; + + vc.vd = voltdm; + voltdm->write(OMAP4_VDD_DEFAULT_VAL, OMAP4_PRM_VOLTCTRL_OFFSET); +} + /* OMAP4 specific voltage init functions */ static void __init omap4_vc_init_channel(struct voltagedomain *voltdm) { + omap4_vc_init_pmic_signaling(voltdm); omap4_set_timings(voltdm, true); omap4_set_timings(voltdm, false); } @@ -615,7 +670,7 @@ static void __init omap4_vc_i2c_timing_init(struct voltagedomain *voltdm) const struct i2c_init_data *i2c_data; if (!voltdm->pmic->i2c_high_speed) { - pr_warn("%s: only high speed supported!\n", __func__); + pr_info("%s: using bootloader low-speed timings\n", __func__); return; } diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h index 5bf088633b62..9e861dbc2c4c 100644 --- a/arch/arm/mach-omap2/vc.h +++ b/arch/arm/mach-omap2/vc.h @@ -117,7 +117,7 @@ extern struct omap_vc_param omap4_iva_vc_data; extern struct omap_vc_param omap4_core_vc_data; void omap3_vc_set_pmic_signaling(int core_next_state); - +void omap4_vc_set_pmic_signaling(int core_next_state); void omap_vc_init_channel(struct voltagedomain *voltdm); int omap_vc_pre_scale(struct voltagedomain *voltdm, diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index 865b10344ea2..0474a4b1394d 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -12,6 +12,7 @@ #include <linux/irq.h> #include <linux/platform_device.h> +#include <linux/property.h> #include <linux/gpio.h> #include <asm/mach-types.h> @@ -22,7 +23,6 @@ #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> -#include <linux/can/platform/mcp251x.h> #include <linux/regulator/machine.h> #include "generic.h" @@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = { .gpio_cs = ICONTROL_MCP251x_nCS4 }; -static struct mcp251x_platform_data mcp251x_info = { - .oscillator_frequency = 16E6, +static const struct property_entry mcp251x_properties[] = { + PROPERTY_ENTRY_U32("clock-frequency", 16000000), + {} }; static struct spi_board_info mcp251x_board_info[] = { @@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = { .max_speed_hz = 6500000, .bus_num = 3, .chip_select = 0, - .platform_data = &mcp251x_info, + .properties = mcp251x_properties, .controller_data = &mcp251x_chip_info1, .irq = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1) }, diff --git a/arch/arm/mach-pxa/include/mach/tosa.h b/arch/arm/mach-pxa/include/mach/tosa.h index a499ed17931e..8bfaca3a8b64 100644 --- a/arch/arm/mach-pxa/include/mach/tosa.h +++ b/arch/arm/mach-pxa/include/mach/tosa.h @@ -73,18 +73,6 @@ #define TOSA_GPIO_BAT1_TH_ON (TOSA_TC6393XB_GPIO_BASE + 15) /* - * Timing Generator - */ -#define TG_PNLCTL 0x00 -#define TG_TPOSCTL 0x01 -#define TG_DUTYCTL 0x02 -#define TG_GPOSR 0x03 -#define TG_GPODR1 0x04 -#define TG_GPODR2 0x05 -#define TG_PINICTL 0x06 -#define TG_HPOSCTL 0x07 - -/* * PXA GPIOs */ #define TOSA_GPIO_POWERON (0) @@ -192,7 +180,4 @@ #define TOSA_KEY_MAIL KEY_MAIL #endif -struct spi_device; -extern int tosa_bl_enable(struct spi_device *spi, int enable); - #endif /* _ASM_ARCH_TOSA_H_ */ diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index f537ff1c3ba7..4e13893edeb9 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -813,6 +813,26 @@ static struct pxa2xx_spi_controller pxa_ssp_master_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table tosa_lcd_gpio_table = { + .dev_id = "spi2.0", + .table = { + GPIO_LOOKUP("tc6393xb", + TOSA_GPIO_TG_ON - TOSA_TC6393XB_GPIO_BASE, + "tg #pwr", GPIO_ACTIVE_HIGH), + { }, + }, +}; + +static struct gpiod_lookup_table tosa_lcd_bl_gpio_table = { + .dev_id = "i2c-tosa-bl", + .table = { + GPIO_LOOKUP("tc6393xb", + TOSA_GPIO_BL_C20MA - TOSA_TC6393XB_GPIO_BASE, + "backlight", GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct spi_board_info spi_board_info[] __initdata = { { .modalias = "tosa-lcd", @@ -923,6 +943,8 @@ static void __init tosa_init(void) platform_scoop_config = &tosa_pcmcia_config; pxa2xx_set_spi_info(2, &pxa_ssp_master_info); + gpiod_add_lookup_table(&tosa_lcd_gpio_table); + gpiod_add_lookup_table(&tosa_lcd_bl_gpio_table); spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info)); clk_add_alias("CLK_CK3P6MI", tc6393xb_device.name, "GPIO11_CLK", NULL); diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index da113c8eefbf..b27fc7ac9cea 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -13,6 +13,7 @@ #include <linux/leds.h> #include <linux/irq.h> #include <linux/pm.h> +#include <linux/property.h> #include <linux/gpio.h> #include <linux/gpio/machine.h> #include <linux/serial_8250.h> @@ -27,7 +28,6 @@ #include <linux/platform_data/i2c-pxa.h> #include <linux/platform_data/pca953x.h> #include <linux/apm-emulation.h> -#include <linux/can/platform/mcp251x.h> #include <linux/regulator/fixed.h> #include <linux/regulator/machine.h> @@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = { }, }; -static struct mcp251x_platform_data zeus_mcp2515_pdata = { - .oscillator_frequency = 16*1000*1000, +static const struct property_entry mcp251x_properties[] = { + PROPERTY_ENTRY_U32("clock-frequency", 16000000), + {} }; static struct spi_board_info zeus_spi_board_info[] = { [0] = { .modalias = "mcp2515", - .platform_data = &zeus_mcp2515_pdata, + .properties = mcp251x_properties, .irq = PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO), .max_speed_hz = 1*1000*1000, .bus_num = 3, diff --git a/arch/arm/mach-s3c24xx/s3c2416.c b/arch/arm/mach-s3c24xx/s3c2416.c index 1cdb7bd3e713..9514196cad8c 100644 --- a/arch/arm/mach-s3c24xx/s3c2416.c +++ b/arch/arm/mach-s3c24xx/s3c2416.c @@ -113,7 +113,7 @@ void __init s3c2416_map_io(void) /* initialize device information early */ s3c2416_default_sdhci0(); s3c2416_default_sdhci1(); - s3c64xx_spi_setname("s3c2443-spi"); + s3c24xx_spi_setname("s3c2443-spi"); iotable_init(s3c2416_iodesc, ARRAY_SIZE(s3c2416_iodesc)); } diff --git a/arch/arm/mach-s3c24xx/s3c2443.c b/arch/arm/mach-s3c24xx/s3c2443.c index 313e369c3ddd..4cbeb74cf3d6 100644 --- a/arch/arm/mach-s3c24xx/s3c2443.c +++ b/arch/arm/mach-s3c24xx/s3c2443.c @@ -91,7 +91,7 @@ void __init s3c2443_map_io(void) s3c24xx_gpiocfg_default.get_pull = s3c2443_gpio_getpull; /* initialize device information early */ - s3c64xx_spi_setname("s3c2443-spi"); + s3c24xx_spi_setname("s3c2443-spi"); iotable_init(s3c2443_iodesc, ARRAY_SIZE(s3c2443_iodesc)); } diff --git a/arch/arm/mach-s3c24xx/spi-core.h b/arch/arm/mach-s3c24xx/spi-core.h index bb555ccbe057..1048fac629a2 100644 --- a/arch/arm/mach-s3c24xx/spi-core.h +++ b/arch/arm/mach-s3c24xx/spi-core.h @@ -11,7 +11,7 @@ */ /* re-define device name depending on support. */ -static inline void s3c64xx_spi_setname(char *name) +static inline void s3c24xx_spi_setname(char *name) { #ifdef CONFIG_S3C64XX_DEV_SPI0 s3c64xx_device_spi0.name = name; diff --git a/arch/arm/mach-s3c64xx/setup-usb-phy.c b/arch/arm/mach-s3c64xx/setup-usb-phy.c index 6aaaa1d8e8b9..d6b0e3b268af 100644 --- a/arch/arm/mach-s3c64xx/setup-usb-phy.c +++ b/arch/arm/mach-s3c64xx/setup-usb-phy.c @@ -73,7 +73,7 @@ static int s3c_usb_otgphy_exit(struct platform_device *pdev) return 0; } -int s5p_usb_phy_init(struct platform_device *pdev, int type) +int s3c_usb_phy_init(struct platform_device *pdev, int type) { if (type == USB_PHY_TYPE_DEVICE) return s3c_usb_otgphy_init(pdev); @@ -81,7 +81,7 @@ int s5p_usb_phy_init(struct platform_device *pdev, int type) return -EINVAL; } -int s5p_usb_phy_exit(struct platform_device *pdev, int type) +int s3c_usb_phy_exit(struct platform_device *pdev, int type) { if (type == USB_PHY_TYPE_DEVICE) return s3c_usb_otgphy_exit(pdev); diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index 9e4bc1865f84..2fd3aa6f3212 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -24,7 +24,6 @@ #include "rcar-gen2.h" static const struct of_device_id cpg_matches[] __initconst = { - { .compatible = "renesas,rcar-gen2-cpg-clocks", }, { .compatible = "renesas,r8a7743-cpg-mssr", .data = "extal" }, { .compatible = "renesas,r8a7744-cpg-mssr", .data = "extal" }, { .compatible = "renesas,r8a7790-cpg-mssr", .data = "extal" }, diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 2447427cb4a8..69f3fa270fbe 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void) { pr_info_once( "Disabling cpuidle LP2 state, since PCIe IRQs are in use\n"); - tegra_idle_driver.states[1].disabled = true; + cpuidle_driver_state_disabled(&tegra_idle_driver, 1, true); } int __init tegra20_cpuidle_init(void) diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 67b763fea005..e3f34815c9da 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -44,16 +44,16 @@ ENTRY(tegra_resume) cmp r6, #TEGRA20 beq 1f @ Yes /* Clear the flow controller flags for this CPU. */ - cpu_to_csr_reg r1, r0 + cpu_to_csr_reg r3, r0 mov32 r2, TEGRA_FLOW_CTRL_BASE - ldr r1, [r2, r1] + ldr r1, [r2, r3] /* Clear event & intr flag */ orr r1, r1, \ #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG movw r0, #0x3FFD @ enable, cluster_switch, immed, bitmaps @ & ext flags for CPU power mgnt bic r1, r1, r0 - str r1, [r2] + str r1, [r2, r3] 1: mov32 r9, 0xc09 diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index b408fa56eb89..3341a12bbb9c 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -682,10 +682,12 @@ tegra30_enter_sleep: dsb ldr r0, [r6, r2] /* memory barrier */ + cmp r10, #TEGRA30 halted: isb dsb - wfi /* CPU should be power gated here */ + wfine /* CPU should be power gated here */ + wfeeq /* !!!FIXME!!! Implement halt failure handler */ b halted diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 3875027ef8fc..e929aaa744c0 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -84,6 +84,7 @@ static void __init ux500_init_irq(void) struct resource r; irqchip_init(); + prcmu_early_init(); np = of_find_compatible_node(NULL, NULL, "stericsson,db8500-prcmu"); of_address_to_resource(np, 0, &r); of_node_put(np); @@ -91,7 +92,6 @@ static void __init ux500_init_irq(void) pr_err("could not find PRCMU base resource\n"); return; } - prcmu_early_init(r.start, r.end-r.start); ux500_pm_init(r.start, r.end-r.start); /* Unlock before init */ diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 0ab3a86b1f52..65e4482e3849 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -896,7 +896,10 @@ config VDSO bool "Enable VDSO for acceleration of some system calls" depends on AEABI && MMU && CPU_V7 default y if ARM_ARCH_TIMER + select HAVE_GENERIC_VDSO select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_32 + select GENERIC_GETTIMEOFDAY help Place in the process address space an ELF shared object providing fast implementations of gettimeofday and @@ -1041,7 +1044,7 @@ endif config CACHE_TAUROS2 bool "Enable the Tauros2 L2 cache controller" - depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4) + depends on (CPU_MOHAWK || CPU_PJ4) default y select OUTER_CACHE help diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 04b36436cbc0..788c5cf46de5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -324,7 +324,7 @@ union offset_union { __put32_unaligned_check("strbt", val, addr) static void -do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset) +do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, union offset_union offset) { if (!LDST_U_BIT(instr)) offset.un = -offset.un; @@ -337,7 +337,7 @@ do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs } static int -do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldrhstrh(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); @@ -386,8 +386,7 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r } static int -do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, - struct pt_regs *regs) +do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); unsigned int rd2; @@ -449,7 +448,7 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, } static int -do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldrstr(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd = RD_BITS(instr); @@ -498,7 +497,7 @@ do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *reg * PU = 10 A B */ static int -do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs) +do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs) { unsigned int rd, rn, correction, nr_regs, regbits; unsigned long eaddr, newaddr; @@ -539,7 +538,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg * processor for us. */ if (addr != eaddr) { - pr_err("LDMSTM: PC = %08lx, instr = %08lx, " + pr_err("LDMSTM: PC = %08lx, instr = %08x, " "addr = %08lx, eaddr = %08lx\n", instruction_pointer(regs), instr, addr, eaddr); show_regs(regs); @@ -716,10 +715,10 @@ thumb2arm(u16 tinstr) * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) */ static void * -do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, +do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, union offset_union *poffset) { - unsigned long instr = *pinstr; + u32 instr = *pinstr; u16 tinst1 = (instr >> 16) & 0xffff; u16 tinst2 = instr & 0xffff; @@ -767,17 +766,48 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { union offset_union uninitialized_var(offset); - unsigned long instr = 0, instrptr; - int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); + unsigned long instrptr; + int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); unsigned int type; - unsigned int fault; + u32 instr = 0; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + int fault; if (interrupts_enabled(regs)) local_irq_enable(); @@ -786,15 +816,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); - fault = probe_kernel_address(ptr, tinstr); - tinstr = __mem_to_opcode_thumb16(tinstr); + + fault = alignment_get_thumb(regs, ptr, &tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ - u16 tinst2 = 0; - fault = probe_kernel_address(ptr + 1, tinst2); - tinst2 = __mem_to_opcode_thumb16(tinst2); + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { @@ -803,8 +832,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address((void *)instrptr, instr); - instr = __mem_to_opcode_arm(instr); + fault = alignment_get_arm(regs, (void *)instrptr, &instr); } if (fault) { @@ -926,7 +954,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * Oops, we didn't handle the instruction. */ pr_err("Alignment trap: not handling instruction " - "%0*lx at [<%08lx>]\n", + "%0*x at [<%08lx>]\n", isize << 1, isize == 2 ? tinstr : instr, instrptr); ai_skipped += 1; @@ -936,7 +964,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) ai_user += 1; if (ai_usermode & UM_WARN) - printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " + printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*x " "Address=0x%08lx FSR 0x%03x\n", current->comm, task_pid_nr(current), instrptr, isize << 1, diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index db9247898300..287ef898a55e 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { - void *ret = dma_alloc_from_global_coherent(size, dma_handle); + void *ret = dma_alloc_from_global_coherent(dev, size, dma_handle); /* * dma_alloc_from_global_coherent() may fail because: diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7d042d5c43e3..e822af0d9219 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -529,7 +529,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) static bool __in_atomic_pool(void *start, size_t size) { - return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); } static int __free_from_pool(void *start, size_t size) @@ -1559,7 +1559,7 @@ static int arm_coherent_iommu_mmap_attrs(struct device *dev, * free a page as defined by the above mapping. * Must not be called with IRQs disabled. */ -void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, +static void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs, int coherent_flag) { struct page **pages; @@ -1583,13 +1583,14 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, __iommu_free_buffer(dev, pages, size, attrs); } -void arm_iommu_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t handle, unsigned long attrs) +static void arm_iommu_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, + unsigned long attrs) { __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL); } -void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, +static void arm_coherent_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs) { __arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT); @@ -1713,7 +1714,7 @@ bad_mapping: * possible) and tagged with the appropriate dma address and length. They are * obtained via sg_dma_{address,length}. */ -int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, +static int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { return __iommu_map_sg(dev, sg, nents, dir, attrs, true); @@ -1731,7 +1732,7 @@ int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, * tagged with the appropriate dma address and length. They are obtained via * sg_dma_{address,length}. */ -int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, +static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { return __iommu_map_sg(dev, sg, nents, dir, attrs, false); @@ -1764,8 +1765,8 @@ static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, * Unmap a set of streaming mode DMA translations. Again, CPU access * rules concerning calls here are the same as for dma_unmap_single(). */ -void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, +static void arm_coherent_iommu_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); @@ -1781,9 +1782,10 @@ void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, * Unmap a set of streaming mode DMA translations. Again, CPU access * rules concerning calls here are the same as for dma_unmap_single(). */ -void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, - unsigned long attrs) +static void arm_iommu_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction dir, + unsigned long attrs) { __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); } @@ -1795,7 +1797,8 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +static void arm_iommu_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir) { struct scatterlist *s; @@ -1813,7 +1816,8 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +static void arm_iommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir) { struct scatterlist *s; @@ -2015,7 +2019,7 @@ static void arm_iommu_sync_single_for_device(struct device *dev, __dma_page_cpu_to_dev(page, offset, size, dir); } -const struct dma_map_ops iommu_ops = { +static const struct dma_map_ops iommu_ops = { .alloc = arm_iommu_alloc_attrs, .free = arm_iommu_free_attrs, .mmap = arm_iommu_mmap_attrs, @@ -2037,7 +2041,7 @@ const struct dma_map_ops iommu_ops = { .dma_supported = arm_dma_supported, }; -const struct dma_map_ops iommu_coherent_ops = { +static const struct dma_map_ops iommu_coherent_ops = { .alloc = arm_coherent_iommu_alloc_attrs, .free = arm_coherent_iommu_free_attrs, .mmap = arm_coherent_iommu_mmap_attrs, @@ -2332,26 +2336,20 @@ void arch_teardown_dma_ops(struct device *dev) } #ifdef CONFIG_SWIOTLB -void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { __dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), size, dir); } -void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { __dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), size, dir); } -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr) -{ - return dma_to_pfn(dev, dma_addr); -} - void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b4be3baa83d4..3ef204137e73 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -30,6 +30,7 @@ #include <asm/prom.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/set_memory.h> #include <asm/system_info.h> #include <asm/tlb.h> #include <asm/fixmap.h> @@ -180,7 +181,7 @@ int pfn_valid(unsigned long pfn) if (__phys_to_pfn(addr) != pfn) return 0; - return memblock_is_map_memory(__pfn_to_phys(pfn)); + return memblock_is_map_memory(addr); } EXPORT_SYMBOL(pfn_valid); #endif @@ -593,8 +594,8 @@ static inline bool arch_has_strict_perms(void) return !!(get_cr() & CR_XP); } -void set_section_perms(struct section_perm *perms, int n, bool set, - struct mm_struct *mm) +static void set_section_perms(struct section_perm *perms, int n, bool set, + struct mm_struct *mm) { size_t i; unsigned long addr; diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c index 091ddc56827e..415d0a454237 100644 --- a/arch/arm/mm/iomap.c +++ b/arch/arm/mm/iomap.c @@ -10,6 +10,8 @@ #include <linux/ioport.h> #include <linux/io.h> +#include <asm/vga.h> + unsigned long vga_base; EXPORT_SYMBOL(vga_base); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d42b93316183..72286f9a4d30 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -382,15 +382,11 @@ void __iomem *ioremap(resource_size_t res_cookie, size_t size) EXPORT_SYMBOL(ioremap); void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) - __alias(ioremap_cached); - -void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size) { return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); -EXPORT_SYMBOL(ioremap_cached); void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 48c2888297dd..5d0d0f86e790 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -259,7 +259,7 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PROT_SECT_DEVICE, .domain = DOMAIN_IO, }, - [MT_DEVICE_CACHED] = { /* ioremap_cached */ + [MT_DEVICE_CACHED] = { /* ioremap_cache */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 24ecf8d30a1e..8b3d7191e2b8 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -206,15 +206,11 @@ void __iomem *ioremap(resource_size_t res_cookie, size_t size) EXPORT_SYMBOL(ioremap); void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) - __alias(ioremap_cached); - -void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size) { return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); -EXPORT_SYMBOL(ioremap_cached); void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) { diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 4fa5371bc662..2785da387c91 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -491,7 +491,7 @@ cpu_arm1020_name: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1020_proc_info,#object __arm1020_proc_info: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 5d8a8339e09a..e9ea237ed785 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -449,7 +449,7 @@ arm1020e_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1020e_proc_info,#object __arm1020e_proc_info: diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index b3dd95c345e4..920c279e7879 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -443,7 +443,7 @@ arm1022_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1022_proc_info,#object __arm1022_proc_info: diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index ac5afde12f35..0bdf25a95b10 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -138,7 +138,7 @@ ENTRY(arm1026_flush_kern_cache_all) mov ip, #0 __flush_whole_cache: #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif tst r2, #VM_EXEC @@ -363,7 +363,7 @@ ENTRY(cpu_arm1026_switch_mm) #ifdef CONFIG_MMU mov r1, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif #ifndef CONFIG_CPU_ICACHE_DISABLE @@ -437,7 +437,7 @@ arm1026_crval: string cpu_arm1026_name, "ARM1026EJ-S" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1026_proc_info,#object __arm1026_proc_info: diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index c99d24363f32..39361e196d61 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -172,7 +172,7 @@ arm720_crval: * See <asm/procinfo.h> for a definition of this structure. */ - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 1b4a3838393f..1a94bbf6e53f 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -128,7 +128,7 @@ __arm740_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm740_proc_info,#object __arm740_proc_info: .long 0x41807400 diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 17a4687065c7..52b66cf0259e 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -72,7 +72,7 @@ __arm7tdmi_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ extra_hwcaps=0 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 298c76b47749..31ac8acc34dc 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -434,7 +434,7 @@ arm920_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm920_proc_info,#object __arm920_proc_info: diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 824be3a0bc23..ca2c7ca8af21 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -412,7 +412,7 @@ arm922_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm922_proc_info,#object __arm922_proc_info: diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index d40cff8f102c..a381a0c9f109 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -477,7 +477,7 @@ arm925_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f3cd08f353f0..1ba253c2bce1 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -131,7 +131,7 @@ __flush_whole_cache: #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif tst r2, #VM_EXEC @@ -358,7 +358,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else @ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache @@ -460,7 +460,7 @@ arm926_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm926_proc_info,#object __arm926_proc_info: diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1c26d991386d..4b8a00220cc9 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -340,7 +340,7 @@ __arm940_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm940_proc_info,#object __arm940_proc_info: diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 2dc1c75a4fd4..555becf9c758 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -395,7 +395,7 @@ __arm946_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm946_proc_info,#object __arm946_proc_info: .long 0x41009460 diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 913c06e590af..ef517530130b 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -66,7 +66,7 @@ __arm9tdmi_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info, #object diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 8120b6f4dbb8..dddf833fe000 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -185,7 +185,7 @@ fa526_cr1_set: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __fa526_proc_info,#object __fa526_proc_info: diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index bb6dc34d42a3..b12b76bc8d30 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -571,7 +571,7 @@ feroceon_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index f08308578885..d47d6c5cee63 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -416,7 +416,7 @@ mohawk_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __88sv331x_proc_info,#object __88sv331x_proc_info: diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index d5bc5d702563..baba503ba816 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -196,7 +196,7 @@ sa110_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __sa110_proc_info,#object __sa110_proc_info: diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index be7b611c76c7..75ebacc8e4e5 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -239,7 +239,7 @@ sa1100_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index c1c85eb3484f..1dd0d5ca27da 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -261,7 +261,7 @@ v6_crval: string cpu_elf_name, "v6" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" /* * Match any ARMv6 processor core. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 9a07916af8dd..c0fbfca5da8b 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/arm-smccc.h> #include <linux/kernel.h> -#include <linux/psci.h> #include <linux/smp.h> #include <asm/cp15.h> @@ -65,6 +64,9 @@ static void cpu_v7_spectre_init(void) break; #ifdef CONFIG_ARM_PSCI + case ARM_CPU_PART_BRAHMA_B53: + /* Requires no workaround */ + break; default: /* Other ARM CPUs require no workaround */ if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) @@ -75,26 +77,20 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A72: { struct arm_smccc_res res; - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - break; + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + return; - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; + case SMCCC_CONDUIT_SMC: per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; cpu_do_switch_mm = cpu_v7_smc_switch_mm; diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c4e8006a1a8c..48e0ef6f0dcc 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -644,7 +644,7 @@ __v7_setup_stack: string cpu_elf_name, "v7" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" /* * Standard v7 proc info content diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1448f144e7fb..84459c1d31b8 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -93,7 +93,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin) - .section ".init.text", #alloc, #execinstr + .section ".init.text", "ax" __v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) @@ -132,13 +132,11 @@ __v7m_setup_cont: dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP - stmia sp, {r0-r3, r12} cpsie i svc #0 1: cpsid i - ldr r0, =exc_ret - orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK - str lr, [r0] + /* Calculate exc_ret */ + orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK ldmia sp, {r0-r3, r12} str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR @@ -179,7 +177,7 @@ ENDPROC(__v7m_setup) string cpu_elf_name "v7m" string cpu_v7m_name "ARMv7-M" - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 1ac0fbbe9f12..42eaecc43cfe 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -496,7 +496,7 @@ xsc3_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index bdb2b7749b03..18ac5a1f8922 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -610,7 +610,7 @@ xscale_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index 9a6e4923bd69..563440315acd 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -89,7 +89,7 @@ void pxa_ssp_free(struct ssp_device *ssp) ssp->use_count--; ssp->label = NULL; } else - dev_err(&ssp->pdev->dev, "device already free\n"); + dev_err(ssp->dev, "device already free\n"); mutex_unlock(&ssp_lock); } EXPORT_SYMBOL(pxa_ssp_free); @@ -118,7 +118,7 @@ static int pxa_ssp_probe(struct platform_device *pdev) if (ssp == NULL) return -ENOMEM; - ssp->pdev = pdev; + ssp->dev = dev; ssp->clk = devm_clk_get(dev, NULL); if (IS_ERR(ssp->clk)) diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 1d1fa068d228..1602f6dc900b 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -1010,9 +1010,9 @@ void __init dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd) npd = s3c_set_platdata(pd, sizeof(*npd), &s3c_device_usb_hsotg); if (!npd->phy_init) - npd->phy_init = s5p_usb_phy_init; + npd->phy_init = s3c_usb_phy_init; if (!npd->phy_exit) - npd->phy_exit = s5p_usb_phy_exit; + npd->phy_exit = s3c_usb_phy_exit; } #endif /* CONFIG_S3C_DEV_USB_HSOTG */ diff --git a/arch/arm/plat-samsung/include/plat/usb-phy.h b/arch/arm/plat-samsung/include/plat/usb-phy.h index 94da89ecbd3b..759d66a0773a 100644 --- a/arch/arm/plat-samsung/include/plat/usb-phy.h +++ b/arch/arm/plat-samsung/include/plat/usb-phy.h @@ -7,7 +7,7 @@ #ifndef __PLAT_SAMSUNG_USB_PHY_H #define __PLAT_SAMSUNG_USB_PHY_H __FILE__ -extern int s5p_usb_phy_init(struct platform_device *pdev, int type); -extern int s5p_usb_phy_exit(struct platform_device *pdev, int type); +extern int s3c_usb_phy_init(struct platform_device *pdev, int type); +extern int s3c_usb_phy_exit(struct platform_device *pdev, int type); #endif /* __PLAT_SAMSUNG_USB_PHY_H */ diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 87b7769214e0..0fda344beb0b 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -1,7 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 +include $(srctree)/lib/vdso/Makefile + hostprogs-y := vdsomunge -obj-vdso := vgettimeofday.o datapage.o +obj-vdso := vgettimeofday.o datapage.o note.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.so.raw vdso.lds @@ -24,7 +30,11 @@ CFLAGS_REMOVE_vdso.o = -pg # Force -O2 to avoid libgcc dependencies CFLAGS_REMOVE_vgettimeofday.o = -pg -Os +ifeq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o = -O2 +else +CFLAGS_vgettimeofday.o = -O2 -include $(c-gettimeofday-y) +endif # Disable gcov profiling for VDSO code GCOV_PROFILE := n @@ -37,7 +47,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file $(obj)/vdso.so.raw: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,ld) + $(call if_changed,vdsold_and_vdso_check) $(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/vdsomunge FORCE $(call if_changed,vdsomunge) @@ -47,6 +57,10 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) +# Actual build commands +quiet_cmd_vdsold_and_vdso_check = LD $@ + cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check) + quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(objtree)/$(obj)/vdsomunge $< $@ diff --git a/arch/arm/vdso/note.c b/arch/arm/vdso/note.c new file mode 100644 index 000000000000..eff5bf9efb8b --- /dev/null +++ b/arch/arm/vdso/note.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2012-2018 ARM Limited + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> +#include <linux/build-salt.h> + +ELFNOTE32("Linux", 0, LINUX_VERSION_CODE); +BUILD_SALT; diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S index 73cf205b003e..165d1d2eb76b 100644 --- a/arch/arm/vdso/vdso.lds.S +++ b/arch/arm/vdso/vdso.lds.S @@ -71,6 +71,8 @@ VERSION global: __vdso_clock_gettime; __vdso_gettimeofday; + __vdso_clock_getres; + __vdso_clock_gettime64; local: *; }; } diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index d1fdbb12760a..1976c6f325a4 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -1,259 +1,34 @@ // SPDX-License-Identifier: GPL-2.0-only /* + * ARM userspace implementations of gettimeofday() and similar. + * * Copyright 2015 Mentor Graphics Corporation. */ - -#include <linux/compiler.h> -#include <linux/hrtimer.h> #include <linux/time.h> -#include <asm/barrier.h> -#include <asm/bug.h> -#include <asm/cp15.h> -#include <asm/page.h> -#include <asm/unistd.h> -#include <asm/vdso_datapage.h> - -#ifndef CONFIG_AEABI -#error This code depends on AEABI system call conventions -#endif - -extern struct vdso_data *__get_datapage(void); - -static notrace u32 __vdso_read_begin(const struct vdso_data *vdata) -{ - u32 seq; -repeat: - seq = READ_ONCE(vdata->seq_count); - if (seq & 1) { - cpu_relax(); - goto repeat; - } - return seq; -} - -static notrace u32 vdso_read_begin(const struct vdso_data *vdata) -{ - u32 seq; - - seq = __vdso_read_begin(vdata); - - smp_rmb(); /* Pairs with smp_wmb in vdso_write_end */ - return seq; -} +#include <linux/types.h> -static notrace int vdso_read_retry(const struct vdso_data *vdata, u32 start) +int __vdso_clock_gettime(clockid_t clock, + struct old_timespec32 *ts) { - smp_rmb(); /* Pairs with smp_wmb in vdso_write_begin */ - return vdata->seq_count != start; + return __cvdso_clock_gettime32(clock, ts); } -static notrace long clock_gettime_fallback(clockid_t _clkid, - struct timespec *_ts) +int __vdso_clock_gettime64(clockid_t clock, + struct __kernel_timespec *ts) { - register struct timespec *ts asm("r1") = _ts; - register clockid_t clkid asm("r0") = _clkid; - register long ret asm ("r0"); - register long nr asm("r7") = __NR_clock_gettime; - - asm volatile( - " swi #0\n" - : "=r" (ret) - : "r" (clkid), "r" (ts), "r" (nr) - : "memory"); - - return ret; + return __cvdso_clock_gettime(clock, ts); } -static notrace int do_realtime_coarse(struct timespec *ts, - struct vdso_data *vdata) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) { - u32 seq; - - do { - seq = vdso_read_begin(vdata); - - ts->tv_sec = vdata->xtime_coarse_sec; - ts->tv_nsec = vdata->xtime_coarse_nsec; - - } while (vdso_read_retry(vdata, seq)); - - return 0; + return __cvdso_gettimeofday(tv, tz); } -static notrace int do_monotonic_coarse(struct timespec *ts, - struct vdso_data *vdata) +int __vdso_clock_getres(clockid_t clock_id, + struct old_timespec32 *res) { - struct timespec tomono; - u32 seq; - - do { - seq = vdso_read_begin(vdata); - - ts->tv_sec = vdata->xtime_coarse_sec; - ts->tv_nsec = vdata->xtime_coarse_nsec; - - tomono.tv_sec = vdata->wtm_clock_sec; - tomono.tv_nsec = vdata->wtm_clock_nsec; - - } while (vdso_read_retry(vdata, seq)); - - ts->tv_sec += tomono.tv_sec; - timespec_add_ns(ts, tomono.tv_nsec); - - return 0; -} - -#ifdef CONFIG_ARM_ARCH_TIMER - -static notrace u64 get_ns(struct vdso_data *vdata) -{ - u64 cycle_delta; - u64 cycle_now; - u64 nsec; - - isb(); - cycle_now = read_sysreg(CNTVCT); - - cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask; - - nsec = (cycle_delta * vdata->cs_mult) + vdata->xtime_clock_snsec; - nsec >>= vdata->cs_shift; - - return nsec; -} - -static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata) -{ - u64 nsecs; - u32 seq; - - do { - seq = vdso_read_begin(vdata); - - if (!vdata->tk_is_cntvct) - return -1; - - ts->tv_sec = vdata->xtime_clock_sec; - nsecs = get_ns(vdata); - - } while (vdso_read_retry(vdata, seq)); - - ts->tv_nsec = 0; - timespec_add_ns(ts, nsecs); - - return 0; -} - -static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata) -{ - struct timespec tomono; - u64 nsecs; - u32 seq; - - do { - seq = vdso_read_begin(vdata); - - if (!vdata->tk_is_cntvct) - return -1; - - ts->tv_sec = vdata->xtime_clock_sec; - nsecs = get_ns(vdata); - - tomono.tv_sec = vdata->wtm_clock_sec; - tomono.tv_nsec = vdata->wtm_clock_nsec; - - } while (vdso_read_retry(vdata, seq)); - - ts->tv_sec += tomono.tv_sec; - ts->tv_nsec = 0; - timespec_add_ns(ts, nsecs + tomono.tv_nsec); - - return 0; -} - -#else /* CONFIG_ARM_ARCH_TIMER */ - -static notrace int do_realtime(struct timespec *ts, struct vdso_data *vdata) -{ - return -1; -} - -static notrace int do_monotonic(struct timespec *ts, struct vdso_data *vdata) -{ - return -1; -} - -#endif /* CONFIG_ARM_ARCH_TIMER */ - -notrace int __vdso_clock_gettime(clockid_t clkid, struct timespec *ts) -{ - struct vdso_data *vdata; - int ret = -1; - - vdata = __get_datapage(); - - switch (clkid) { - case CLOCK_REALTIME_COARSE: - ret = do_realtime_coarse(ts, vdata); - break; - case CLOCK_MONOTONIC_COARSE: - ret = do_monotonic_coarse(ts, vdata); - break; - case CLOCK_REALTIME: - ret = do_realtime(ts, vdata); - break; - case CLOCK_MONOTONIC: - ret = do_monotonic(ts, vdata); - break; - default: - break; - } - - if (ret) - ret = clock_gettime_fallback(clkid, ts); - - return ret; -} - -static notrace long gettimeofday_fallback(struct timeval *_tv, - struct timezone *_tz) -{ - register struct timezone *tz asm("r1") = _tz; - register struct timeval *tv asm("r0") = _tv; - register long ret asm ("r0"); - register long nr asm("r7") = __NR_gettimeofday; - - asm volatile( - " swi #0\n" - : "=r" (ret) - : "r" (tv), "r" (tz), "r" (nr) - : "memory"); - - return ret; -} - -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - struct timespec ts; - struct vdso_data *vdata; - int ret; - - vdata = __get_datapage(); - - ret = do_realtime(&ts, vdata); - if (ret) - return gettimeofday_fallback(tv, tz); - - if (tv) { - tv->tv_sec = ts.tv_sec; - tv->tv_usec = ts.tv_nsec / 1000; - } - if (tz) { - tz->tz_minuteswest = vdata->tz_minuteswest; - tz->tz_dsttime = vdata->tz_dsttime; - } - - return ret; + return __cvdso_clock_getres_time32(clock_id, res); } /* Avoid unresolved references emitted by GCC */ diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 38fa917c8585..d40e9e5fc52b 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -15,6 +15,7 @@ #include <xen/interface/grant_table.h> #include <xen/interface/memory.h> #include <xen/page.h> +#include <xen/xen-ops.h> #include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> @@ -70,20 +71,20 @@ static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op) * pfn_valid returns true the pages is local and we can use the native * dma-direct functions, otherwise we call the Xen specific version. */ -void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, - phys_addr_t paddr, size_t size, enum dma_data_direction dir) +void xen_dma_sync_for_cpu(dma_addr_t handle, phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { if (pfn_valid(PFN_DOWN(handle))) - arch_sync_dma_for_cpu(dev, paddr, size, dir); + arch_sync_dma_for_cpu(paddr, size, dir); else if (dir != DMA_TO_DEVICE) dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); } -void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, - phys_addr_t paddr, size_t size, enum dma_data_direction dir) +void xen_dma_sync_for_device(dma_addr_t handle, phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { if (pfn_valid(PFN_DOWN(handle))) - arch_sync_dma_for_device(dev, paddr, size, dir); + arch_sync_dma_for_device(paddr, size, dir); else if (dir == DMA_FROM_DEVICE) dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); else @@ -133,7 +134,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) return; } -int __init xen_mm_init(void) +static int __init xen_mm_init(void) { struct gnttab_cache_flush cflush; if (!xen_initial_domain()) |